Skip to main content
Build an AI assistant that remembers user preferences, past conversations, and upcoming commitments. This guide shows you how to integrate EverMemOS into a personal assistant workflow.

Architecture Overview

Personal Assistant Architecture The assistant:
  1. Stores every conversation turn in EverMemOS
  2. Retrieves relevant context before generating responses
  3. Uses the context to personalize LLM responses

Setup: Configure Assistant Scene

For 1:1 assistants, use the assistant scene which enables all memory types including Foresight and EventLog.
import requests
from datetime import datetime

BASE_URL = "https://api.evermind.ai"
headers = {"Content-Type": "application/json"}

# Configure the conversation as assistant scene
def setup_assistant_scene(user_id: str, assistant_id: str):
    """Configure metadata for 1:1 assistant conversations."""
    meta = {
        "group_id": f"assistant_{user_id}",
        "scene": "assistant",
        "user_details": [
            {
                "user_id": user_id,
                "user_name": "User",
                "role": "user"
            },
            {
                "user_id": assistant_id,
                "user_name": "Assistant",
                "role": "assistant"
            }
        ]
    }

    response = requests.post(
        f"{BASE_URL}/api/v0/memories/conversation-meta",
        json=meta,
        headers=headers
    )
    return response.json()

# Initialize
setup_assistant_scene("user_alice", "assistant_001")

Store Conversation Messages

Store each conversation turn asynchronously. EverMemOS processes them in the background.
def store_message(user_id: str, message_id: str, content: str, is_assistant: bool = False):
    """Store a single message in EverMemOS."""
    sender = "assistant_001" if is_assistant else user_id
    sender_name = "Assistant" if is_assistant else "User"

    message = {
        "group_id": f"assistant_{user_id}",
        "group_name": "Personal Assistant",
        "message_id": message_id,
        "create_time": datetime.now().isoformat() + "Z",
        "sender": sender,
        "sender_name": sender_name,
        "content": content
    }

    response = requests.post(
        f"{BASE_URL}/api/v0/memories",
        json=message,
        headers=headers
    )
    return response.json()

Retrieve Relevant Context

Before generating a response, retrieve relevant memories to provide context to your LLM.
from datetime import datetime

def get_memory_context(user_id: str, user_message: str) -> str:
    """Retrieve relevant memories for the current query."""

    # Search for relevant memories
    search_params = {
        "user_id": user_id,
        "query": user_message,
        "retrieve_method": "hybrid",
        "top_k": 5,
        "memory_types": ["profile", "episodic_memory"]
    }

    response = requests.get(
        f"{BASE_URL}/api/v0/memories/search",
        json=search_params,
        headers=headers
    )

    result = response.json()
    memories = result.get("result", {}).get("memories", [])

    if not memories:
        return ""

    # Format memories for LLM context
    context_parts = []
    for mem in memories:
        mem_type = mem.get("memory_type", "unknown")
        content = mem.get("memory_content", "")

        if mem_type == "profile":
            context_parts.append(f"[User Profile] {content}")
        elif mem_type == "episodic_memory":
            context_parts.append(f"[Past Conversation] {content}")
        elif mem_type == "foresight":
            context_parts.append(f"[Upcoming/Reminder] {content}")

    return "\n".join(context_parts)

Complete Assistant Loop

Here’s a complete implementation that ties everything together:
import requests
from datetime import datetime
import uuid

BASE_URL = "https://api.evermind.ai"
headers = {"Content-Type": "application/json"}

class PersonalAssistant:
    def __init__(self, user_id: str):
        self.user_id = user_id
        self.assistant_id = "assistant_001"
        self.group_id = f"assistant_{user_id}"
        self._setup_scene()

    def _setup_scene(self):
        """Configure assistant scene for this user."""
        meta = {
            "group_id": self.group_id,
            "scene": "assistant",
            "user_details": [
                {"user_id": self.user_id, "user_name": "User", "role": "user"},
                {"user_id": self.assistant_id, "user_name": "Assistant", "role": "assistant"}
            ]
        }
        requests.post(f"{BASE_URL}/api/v0/memories/conversation-meta", json=meta, headers=headers)

    def _store_message(self, content: str, is_assistant: bool = False):
        """Store a message in EverMemOS."""
        message = {
            "group_id": self.group_id,
            "group_name": "Personal Assistant",
            "message_id": str(uuid.uuid4()),
            "create_time": datetime.now().isoformat() + "Z",
            "sender": self.assistant_id if is_assistant else self.user_id,
            "sender_name": "Assistant" if is_assistant else "User",
            "content": content
        }
        requests.post(f"{BASE_URL}/api/v0/memories", json=message, headers=headers)

    def _get_context(self, query: str) -> str:
        """Retrieve relevant memory context."""
        search_params = {
            "user_id": self.user_id,
            "query": query,
            "retrieve_method": "hybrid",
            "top_k": 5,
            "memory_types": ["profile", "episodic_memory"]
        }

        response = requests.get(f"{BASE_URL}/api/v0/memories/search", json=search_params, headers=headers)
        memories = response.json().get("result", {}).get("memories", [])

        if not memories:
            return "No relevant memories found."

        parts = []
        for mem in memories:
            mem_type = mem.get("memory_type", "").replace("_", " ").title()
            content = mem.get("memory_content", "")
            parts.append(f"[{mem_type}] {content}")

        return "\n".join(parts)

    def _generate_response(self, user_message: str, context: str) -> str:
        """Generate response using your LLM of choice."""
        # Replace this with your actual LLM call (OpenAI, Anthropic, etc.)
        prompt = f"""You are a helpful personal assistant. Use the following context about the user to personalize your response.

MEMORY CONTEXT:
{context}

USER MESSAGE:
{user_message}

Respond naturally, incorporating relevant context when appropriate. Don't explicitly mention that you're using memory unless asked."""

        # Example: OpenAI call (replace with your LLM)
        # response = openai.chat.completions.create(
        #     model="gpt-4",
        #     messages=[{"role": "user", "content": prompt}]
        # )
        # return response.choices[0].message.content

        # Placeholder for demo
        return f"[LLM would respond here with context: {context[:100]}...]"

    def chat(self, user_message: str) -> str:
        """Main chat method - store, retrieve, generate, store."""
        # 1. Store user message
        self._store_message(user_message, is_assistant=False)

        # 2. Retrieve relevant context
        context = self._get_context(user_message)

        # 3. Generate response with context
        response = self._generate_response(user_message, context)

        # 4. Store assistant response
        self._store_message(response, is_assistant=True)

        return response

# Usage
assistant = PersonalAssistant("user_alice")

# Simulate conversation
print(assistant.chat("I prefer meetings in the morning, before 10am."))
print(assistant.chat("What time works best for our call tomorrow?"))
# The second response will use memory of the preference!

Example: Using Preferences

After a few conversations, your assistant can leverage stored preferences:
# Earlier conversation (already stored)
# User: "I'm vegetarian and allergic to nuts"
# User: "I love Italian food"

# Later conversation
user_message = "Can you suggest a restaurant for dinner?"

context = assistant._get_context(user_message)
# Context includes:
# [Profile] User is vegetarian
# [Profile] User has nut allergy
# [Profile] User enjoys Italian cuisine

# LLM generates: "How about that new Italian place downtown?
# They have great vegetarian options and I checked - they're
# nut-free friendly!"

Example: Using Foresight for Reminders

Foresight memories surface time-relevant information:
# Earlier conversation
# User: "Remind me to call mom on Sunday at 5pm"

# On Sunday at 4:30pm, user asks:
user_message = "What do I have coming up today?"

context = assistant._get_context(user_message)
# Context includes:
# [Foresight] Call mom at 5pm today

# LLM generates: "You have a reminder to call your mom at 5pm -
# that's in about 30 minutes!"

Best Practices

Use UUIDs or timestamp-based IDs to ensure uniqueness. Duplicate message IDs will be ignored.
message_id = f"{user_id}_{int(datetime.now().timestamp() * 1000)}"
# or
message_id = str(uuid.uuid4())
Limit retrieved memories to avoid overwhelming your LLM context window.
# Good: Limit to most relevant
"top_k": 5

# Better: Truncate if needed
context = context[:2000]  # Limit to ~500 tokens
Store messages asynchronously to avoid blocking your main thread.
import asyncio
import aiohttp

async def store_message_async(message):
    async with aiohttp.ClientSession() as session:
        await session.post(f"{BASE_URL}/api/v0/memories", json=message)

# Fire and forget
asyncio.create_task(store_message_async(message))

Next Steps