Documentation Index Fetch the complete documentation index at: https://docs.evermind.ai/llms.txt
Use this file to discover all available pages before exploring further.
Agentic retrieval uses an LLM to understand complex queries, decompose them into sub-queries, and intelligently aggregate results. While it has higher latency (2-5 seconds), it excels at nuanced questions that simple keyword or vector search can’t handle well.
How Agentic Retrieval Works
The agent:
Analyzes the query to understand intent
Decomposes into multiple sub-queries
Executes each sub-query using hybrid search
Aggregates and re-ranks results by relevance
When to Use Agentic Retrieval
Use Agentic Use Hybrid Instead Complex, multi-part questions Simple keyword lookups ”Prepare me context for…" "Find messages about X” Queries requiring reasoning Direct topic searches High-stakes retrieval Real-time responses Research and analysis Chat applications
Good Candidates for Agentic
# Complex context gathering
"What context would help me understand the evolution of our pricing strategy?"
# Multi-faceted questions
"What are all the factors we discussed that affect the product launch timeline?"
# Relationship queries
"What connections exist between our customer feedback and the recent product changes?"
# Preparatory context
"Help me prepare for a conversation with the engineering team about technical debt"
Better Suited for Hybrid
# Simple lookups
"What did Alice say about the budget?"
# Direct searches
"Find discussions about Kubernetes"
# Recent context
"What did we discuss in yesterday's meeting?"
Basic Usage
import requests
from datetime import datetime
BASE_URL = "https://api.evermind.ai"
headers = { "Content-Type" : "application/json" }
def agentic_search ( user_id : str , query : str , top_k : int = 10 ) -> dict :
"""Perform agentic retrieval."""
search_params = {
"user_id" : user_id,
"query" : query,
"retrieve_method" : "agentic" , # Enable agentic retrieval
"top_k" : top_k,
"memory_types" : [ "episodic_memory" , "profile" ]
}
response = requests.get(
f " { BASE_URL } /api/v0/memories/search" ,
json = search_params,
headers = headers,
timeout = 60 # Longer timeout for agentic
)
return response.json()
# Example usage
result = agentic_search(
user_id = "user_alice" ,
query = "What context would help me prepare for discussing the product roadmap with stakeholders?"
)
memories = result.get( "result" , {}).get( "memories" , [])
print ( f "Found { len (memories) } relevant memories" )
for mem in memories:
print ( f "- { mem.get( 'memory_content' , '' )[: 100 ] } ..." )
Complex Query Examples
Example 1: Meeting Preparation
def prepare_meeting_context ( user_id : str , meeting_topic : str , attendees : list ) -> dict :
"""Gather comprehensive context for a meeting."""
# Build query incorporating topic and attendees
attendee_names = ", " .join(attendees)
query = f """What context would help me prepare for a meeting about { meeting_topic } ?
Attendees include: { attendee_names } .
I need:
- Previous discussions on this topic
- Relevant decisions and outcomes
- Any concerns or blockers mentioned
- Attendee preferences and working styles"""
search_params = {
"user_id" : user_id,
"query" : query,
"retrieve_method" : "agentic" ,
"top_k" : 15 ,
"memory_types" : [ "episodic_memory" , "profile" ]
}
response = requests.get(
f " { BASE_URL } /api/v0/memories/search" ,
json = search_params,
headers = headers,
timeout = 60
)
return response.json()
# Usage
context = prepare_meeting_context(
user_id = "user_alice" ,
meeting_topic = "Q3 product roadmap" ,
attendees = [ "Bob (Engineering)" , "Carol (Product)" , "Dave (Sales)" ]
)
Example 2: Decision History
def trace_decision_history ( user_id : str , decision_topic : str ) -> dict :
"""Trace the evolution of decisions on a topic."""
query = f """Trace the history of decisions and discussions about { decision_topic } .
I want to understand:
- What options were considered
- What factors influenced the decisions
- Who was involved in the discussions
- What the outcomes were
- Any changes or reversals over time"""
search_params = {
"user_id" : user_id,
"query" : query,
"retrieve_method" : "agentic" ,
"top_k" : 20 ,
"memory_types" : [ "episodic_memory" ]
}
response = requests.get(
f " { BASE_URL } /api/v0/memories/search" ,
json = search_params,
headers = headers,
timeout = 60
)
return response.json()
# Usage
history = trace_decision_history(
user_id = "user_alice" ,
decision_topic = "choosing our cloud provider"
)
Example 3: Relationship Analysis
def analyze_topic_relationships ( user_id : str , topics : list ) -> dict :
"""Find connections between multiple topics."""
topics_str = ", " .join(topics)
query = f """Find connections and relationships between these topics: { topics_str } .
Look for:
- How these topics have been discussed together
- Dependencies or conflicts between them
- People involved in multiple topics
- Timeline overlaps"""
search_params = {
"user_id" : user_id,
"query" : query,
"retrieve_method" : "agentic" ,
"top_k" : 15 ,
"memory_types" : [ "episodic_memory" ]
}
response = requests.get(
f " { BASE_URL } /api/v0/memories/search" ,
json = search_params,
headers = headers,
timeout = 60
)
return response.json()
# Usage
relationships = analyze_topic_relationships(
user_id = "user_alice" ,
topics = [ "customer feedback" , "product features" , "technical debt" ]
)
Cost and Latency Considerations
Agentic retrieval has higher resource usage:
Metric Hybrid Agentic Latency 200-600ms 2-5s LLM Calls 0 1-3 Search Operations 1 3-5
Optimizing Agentic Queries
# 1. Use appropriate top_k - don't over-request
search_params = {
"top_k" : 10 , # Enough context, not excessive
...
}
# 2. Filter memory types when possible
search_params = {
"memory_types" : [ "episodic_memory" ], # Only what's needed
...
}
# 3. Use group_ids for scoped searches
search_params = {
"group_ids" : [ "specific_conversation" ], # Narrow the search space
...
}
Fallback Strategy
Implement a tiered retrieval strategy:
import asyncio
async def tiered_retrieval ( user_id : str , query : str , complexity : str = "auto" ) -> dict :
"""Use appropriate retrieval based on query complexity."""
if complexity == "auto" :
complexity = estimate_complexity(query)
if complexity == "simple" :
# Fast path for simple queries
method = "hybrid"
timeout = 10
else :
# Complex queries get agentic
method = "agentic"
timeout = 60
search_params = {
"user_id" : user_id,
"query" : query,
"retrieve_method" : method,
"top_k" : 10 ,
"memory_types" : [ "episodic_memory" , "profile" ]
}
try :
response = requests.get(
f " { BASE_URL } /api/v0/memories/search" ,
json = search_params,
headers = headers,
timeout = timeout
)
return response.json()
except requests.Timeout:
if method == "agentic" :
# Fallback to hybrid on timeout
search_params[ "retrieve_method" ] = "hybrid"
response = requests.get(
f " { BASE_URL } /api/v0/memories/search" ,
json = search_params,
headers = headers,
timeout = 10
)
return response.json()
raise
def estimate_complexity ( query : str ) -> str :
"""Estimate if a query needs agentic retrieval."""
complex_indicators = [
"prepare" , "context" , "help me understand" , "trace" ,
"relationship" , "connection" , "evolution" , "history of" ,
"factors" , "all the" , "comprehensive"
]
query_lower = query.lower()
# Check for complexity indicators
if any (indicator in query_lower for indicator in complex_indicators):
return "complex"
# Check for multi-part queries
if query.count( "?" ) > 1 or " and " in query_lower:
return "complex"
return "simple"
Async Client with Longer Timeouts
For production use, handle agentic retrieval’s longer latency:
import aiohttp
import asyncio
class AgenticEverOSClient :
"""Client optimized for agentic retrieval."""
def __init__ ( self , base_url : str = "https://api.evermind.ai" ):
self .base_url = base_url
self .headers = { "Content-Type" : "application/json" }
async def agentic_search (
self ,
user_id : str ,
query : str ,
top_k : int = 10 ,
timeout : int = 60
) -> dict :
"""Perform agentic search with appropriate timeout."""
search_params = {
"user_id" : user_id,
"query" : query,
"retrieve_method" : "agentic" ,
"top_k" : top_k,
"memory_types" : [ "episodic_memory" , "profile" ]
}
client_timeout = aiohttp.ClientTimeout( total = timeout)
async with aiohttp.ClientSession( timeout = client_timeout) as session:
async with session.get(
f " { self .base_url } /api/v0/memories/search" ,
json = search_params,
headers = self .headers
) as response:
response.raise_for_status()
return await response.json()
async def search_with_fallback (
self ,
user_id : str ,
query : str ,
prefer_agentic : bool = True
) -> dict :
"""Search with automatic fallback."""
if prefer_agentic:
try :
return await self .agentic_search(user_id, query, timeout = 60 )
except asyncio.TimeoutError:
pass # Fall through to hybrid
# Hybrid fallback
search_params = {
"user_id" : user_id,
"query" : query,
"retrieve_method" : "hybrid" ,
"top_k" : 10 ,
"memory_types" : [ "episodic_memory" , "profile" ]
}
client_timeout = aiohttp.ClientTimeout( total = 10 )
async with aiohttp.ClientSession( timeout = client_timeout) as session:
async with session.get(
f " { self .base_url } /api/v0/memories/search" ,
json = search_params,
headers = self .headers
) as response:
response.raise_for_status()
return await response.json()
# Usage
async def main ():
client = AgenticEverOSClient()
result = await client.search_with_fallback(
user_id = "user_alice" ,
query = "What context do I need to understand our pricing strategy evolution?" ,
prefer_agentic = True
)
print ( f "Found { len (result.get( 'result' , {}).get( 'memories' , [])) } memories" )
asyncio.run(main())
Best Practices
Always use appropriate timeouts and fallbacks: # Set longer timeout for agentic
response = requests.get(url, timeout = 60 )
# Implement fallback
try :
result = agentic_search( ... )
except TimeoutError :
result = hybrid_search( ... ) # Fallback
Cache results for repeated complex queries: from functools import lru_cache
import hashlib
def cache_key ( user_id : str , query : str ) -> str :
return hashlib.md5( f " { user_id } : { query } " .encode()).hexdigest()
# Cache expensive agentic results
@lru_cache ( maxsize = 100 )
def cached_agentic_search ( cache_key : str , user_id : str , query : str ):
return agentic_search(user_id, query)
Reserve agentic for high-value queries where accuracy matters: # Use agentic for:
- User explicitly asks for comprehensive context
- Preparing for important meetings / decisions
- Research and analysis tasks
# Use hybrid for:
- Real - time chat responses
- Simple lookups
- Frequently repeated queries
Next Steps
Concepts Guide Compare all retrieval methods
Python Integration Production patterns with timeout handling