Skip to main content
Agentic retrieval uses an LLM to understand complex queries, decompose them into sub-queries, and intelligently aggregate results. While it has higher latency (2-5 seconds), it excels at nuanced questions that simple keyword or vector search can’t handle well.

How Agentic Retrieval Works

User Query: "What context would help me prepare for my meeting with Alice about the Q3 budget?"

                            ┌───────────────┐
                            │   LLM Agent   │
                            └───────┬───────┘

         ┌──────────────────────────┼──────────────────────────┐
         ▼                          ▼                          ▼
┌─────────────────┐     ┌─────────────────┐     ┌─────────────────┐
│  Sub-query 1:   │     │  Sub-query 2:   │     │  Sub-query 3:   │
│  "Alice profile │     │  "Q3 budget     │     │  "Previous      │
│   preferences"  │     │   discussions"  │     │   meetings"     │
└────────┬────────┘     └────────┬────────┘     └────────┬────────┘
         │                       │                       │
         ▼                       ▼                       ▼
    [Results 1]             [Results 2]             [Results 3]
         │                       │                       │
         └───────────────────────┼───────────────────────┘

                        ┌───────────────┐
                        │   Aggregate   │
                        │   & Rank      │
                        └───────┬───────┘

                        Final Results
The agent:
  1. Analyzes the query to understand intent
  2. Decomposes into multiple sub-queries
  3. Executes each sub-query using hybrid search
  4. Aggregates and re-ranks results by relevance

When to Use Agentic Retrieval

Use AgenticUse Hybrid Instead
Complex, multi-part questionsSimple keyword lookups
”Prepare me context for…""Find messages about X”
Queries requiring reasoningDirect topic searches
High-stakes retrievalReal-time responses
Research and analysisChat applications

Good Candidates for Agentic

# Complex context gathering
"What context would help me understand the evolution of our pricing strategy?"

# Multi-faceted questions
"What are all the factors we discussed that affect the product launch timeline?"

# Relationship queries
"What connections exist between our customer feedback and the recent product changes?"

# Preparatory context
"Help me prepare for a conversation with the engineering team about technical debt"

Better Suited for Hybrid

# Simple lookups
"What did Alice say about the budget?"

# Direct searches
"Find discussions about Kubernetes"

# Recent context
"What did we discuss in yesterday's meeting?"

Basic Usage

import requests
from datetime import datetime

BASE_URL = "https://api.evermind.ai"
headers = {"Content-Type": "application/json"}

def agentic_search(user_id: str, query: str, top_k: int = 10) -> dict:
    """Perform agentic retrieval."""
    search_params = {
        "user_id": user_id,
        "query": query,
        "retrieve_method": "agentic",  # Enable agentic retrieval
        "top_k": top_k,
        "memory_types": ["episodic_memory", "profile"]
    }

    response = requests.get(
        f"{BASE_URL}/api/v0/memories/search",
        json=search_params,
        headers=headers,
        timeout=60  # Longer timeout for agentic
    )

    return response.json()

# Example usage
result = agentic_search(
    user_id="user_alice",
    query="What context would help me prepare for discussing the product roadmap with stakeholders?"
)

memories = result.get("result", {}).get("memories", [])
print(f"Found {len(memories)} relevant memories")
for mem in memories:
    print(f"- {mem.get('memory_content', '')[:100]}...")

Complex Query Examples

Example 1: Meeting Preparation

def prepare_meeting_context(user_id: str, meeting_topic: str, attendees: list) -> dict:
    """Gather comprehensive context for a meeting."""

    # Build query incorporating topic and attendees
    attendee_names = ", ".join(attendees)
    query = f"""What context would help me prepare for a meeting about {meeting_topic}?
    Attendees include: {attendee_names}.
    I need:
    - Previous discussions on this topic
    - Relevant decisions and outcomes
    - Any concerns or blockers mentioned
    - Attendee preferences and working styles"""

    search_params = {
        "user_id": user_id,
        "query": query,
        "retrieve_method": "agentic",
        "top_k": 15,
        "memory_types": ["episodic_memory", "profile"]
    }

    response = requests.get(
        f"{BASE_URL}/api/v0/memories/search",
        json=search_params,
        headers=headers,
        timeout=60
    )

    return response.json()

# Usage
context = prepare_meeting_context(
    user_id="user_alice",
    meeting_topic="Q3 product roadmap",
    attendees=["Bob (Engineering)", "Carol (Product)", "Dave (Sales)"]
)

Example 2: Decision History

def trace_decision_history(user_id: str, decision_topic: str) -> dict:
    """Trace the evolution of decisions on a topic."""

    query = f"""Trace the history of decisions and discussions about {decision_topic}.
    I want to understand:
    - What options were considered
    - What factors influenced the decisions
    - Who was involved in the discussions
    - What the outcomes were
    - Any changes or reversals over time"""

    search_params = {
        "user_id": user_id,
        "query": query,
        "retrieve_method": "agentic",
        "top_k": 20,
        "memory_types": ["episodic_memory"]
    }

    response = requests.get(
        f"{BASE_URL}/api/v0/memories/search",
        json=search_params,
        headers=headers,
        timeout=60
    )

    return response.json()

# Usage
history = trace_decision_history(
    user_id="user_alice",
    decision_topic="choosing our cloud provider"
)

Example 3: Relationship Analysis

def analyze_topic_relationships(user_id: str, topics: list) -> dict:
    """Find connections between multiple topics."""

    topics_str = ", ".join(topics)
    query = f"""Find connections and relationships between these topics: {topics_str}.
    Look for:
    - How these topics have been discussed together
    - Dependencies or conflicts between them
    - People involved in multiple topics
    - Timeline overlaps"""

    search_params = {
        "user_id": user_id,
        "query": query,
        "retrieve_method": "agentic",
        "top_k": 15,
        "memory_types": ["episodic_memory"]
    }

    response = requests.get(
        f"{BASE_URL}/api/v0/memories/search",
        json=search_params,
        headers=headers,
        timeout=60
    )

    return response.json()

# Usage
relationships = analyze_topic_relationships(
    user_id="user_alice",
    topics=["customer feedback", "product features", "technical debt"]
)

Cost and Latency Considerations

Agentic retrieval has higher resource usage:
MetricHybridAgentic
Latency200-600ms2-5s
LLM Calls01-3
Search Operations13-5

Optimizing Agentic Queries

# 1. Use appropriate top_k - don't over-request
search_params = {
    "top_k": 10,  # Enough context, not excessive
    ...
}

# 2. Filter memory types when possible
search_params = {
    "memory_types": ["episodic_memory"],  # Only what's needed
    ...
}

# 3. Use group_ids for scoped searches
search_params = {
    "group_ids": ["specific_conversation"],  # Narrow the search space
    ...
}

Fallback Strategy

Implement a tiered retrieval strategy:
import asyncio

async def tiered_retrieval(user_id: str, query: str, complexity: str = "auto") -> dict:
    """Use appropriate retrieval based on query complexity."""

    if complexity == "auto":
        complexity = estimate_complexity(query)

    if complexity == "simple":
        # Fast path for simple queries
        method = "hybrid"
        timeout = 10
    else:
        # Complex queries get agentic
        method = "agentic"
        timeout = 60

    search_params = {
        "user_id": user_id,
        "query": query,
        "retrieve_method": method,
        "top_k": 10,
        "memory_types": ["episodic_memory", "profile"]
    }

    try:
        response = requests.get(
            f"{BASE_URL}/api/v0/memories/search",
            json=search_params,
            headers=headers,
            timeout=timeout
        )
        return response.json()

    except requests.Timeout:
        if method == "agentic":
            # Fallback to hybrid on timeout
            search_params["retrieve_method"] = "hybrid"
            response = requests.get(
                f"{BASE_URL}/api/v0/memories/search",
                json=search_params,
                headers=headers,
                timeout=10
            )
            return response.json()
        raise


def estimate_complexity(query: str) -> str:
    """Estimate if a query needs agentic retrieval."""
    complex_indicators = [
        "prepare", "context", "help me understand", "trace",
        "relationship", "connection", "evolution", "history of",
        "factors", "all the", "comprehensive"
    ]

    query_lower = query.lower()

    # Check for complexity indicators
    if any(indicator in query_lower for indicator in complex_indicators):
        return "complex"

    # Check for multi-part queries
    if query.count("?") > 1 or " and " in query_lower:
        return "complex"

    return "simple"

Async Client with Longer Timeouts

For production use, handle agentic retrieval’s longer latency:
import aiohttp
import asyncio

class AgenticEverMemOSClient:
    """Client optimized for agentic retrieval."""

    def __init__(self, base_url: str = "https://api.evermind.ai"):
        self.base_url = base_url
        self.headers = {"Content-Type": "application/json"}

    async def agentic_search(
        self,
        user_id: str,
        query: str,
        top_k: int = 10,
        timeout: int = 60
    ) -> dict:
        """Perform agentic search with appropriate timeout."""

        search_params = {
            "user_id": user_id,
            "query": query,
            "retrieve_method": "agentic",
            "top_k": top_k,
            "memory_types": ["episodic_memory", "profile"]
        }

        client_timeout = aiohttp.ClientTimeout(total=timeout)

        async with aiohttp.ClientSession(timeout=client_timeout) as session:
            async with session.get(
                f"{self.base_url}/api/v0/memories/search",
                json=search_params,
                headers=self.headers
            ) as response:
                response.raise_for_status()
                return await response.json()

    async def search_with_fallback(
        self,
        user_id: str,
        query: str,
        prefer_agentic: bool = True
    ) -> dict:
        """Search with automatic fallback."""

        if prefer_agentic:
            try:
                return await self.agentic_search(user_id, query, timeout=60)
            except asyncio.TimeoutError:
                pass  # Fall through to hybrid

        # Hybrid fallback
        search_params = {
            "user_id": user_id,
            "query": query,
            "retrieve_method": "hybrid",
            "top_k": 10,
            "memory_types": ["episodic_memory", "profile"]
        }

        client_timeout = aiohttp.ClientTimeout(total=10)
        async with aiohttp.ClientSession(timeout=client_timeout) as session:
            async with session.get(
                f"{self.base_url}/api/v0/memories/search",
                json=search_params,
                headers=self.headers
            ) as response:
                response.raise_for_status()
                return await response.json()


# Usage
async def main():
    client = AgenticEverMemOSClient()

    result = await client.search_with_fallback(
        user_id="user_alice",
        query="What context do I need to understand our pricing strategy evolution?",
        prefer_agentic=True
    )

    print(f"Found {len(result.get('result', {}).get('memories', []))} memories")

asyncio.run(main())

Best Practices

Write detailed queries that explain what context you need:
# Good: Detailed, explains intent
query = """What context would help me prepare for discussing
technical debt with the engineering team? I need to understand
past discussions, proposed solutions, and any blockers mentioned."""

# Bad: Too vague
query = "technical debt"
Always use appropriate timeouts and fallbacks:
# Set longer timeout for agentic
response = requests.get(url, timeout=60)

# Implement fallback
try:
    result = agentic_search(...)
except TimeoutError:
    result = hybrid_search(...)  # Fallback
Cache results for repeated complex queries:
from functools import lru_cache
import hashlib

def cache_key(user_id: str, query: str) -> str:
    return hashlib.md5(f"{user_id}:{query}".encode()).hexdigest()

# Cache expensive agentic results
@lru_cache(maxsize=100)
def cached_agentic_search(cache_key: str, user_id: str, query: str):
    return agentic_search(user_id, query)
Reserve agentic for high-value queries where accuracy matters:
# Use agentic for:
- User explicitly asks for comprehensive context
- Preparing for important meetings/decisions
- Research and analysis tasks

# Use hybrid for:
- Real-time chat responses
- Simple lookups
- Frequently repeated queries

Next Steps