🤖
Agent Party

Memory

Select frameworks to compare

Pick one or more frameworks from the bar above

Memory

OpenAI

from openai import OpenAI

LLM_MODEL = "gpt-5.4"
client = OpenAI()

# turn 1
response = client.responses.create(
    model=LLM_MODEL,
    input="What is the capital of France?",
)
print(response.output_text)
# "The capital of France is Paris."

# turn 2 — previous_response_id continues the conversation server-side
response = client.responses.create(
    model=LLM_MODEL,
    previous_response_id=response.id,
    input=[{"role": "user", "content": "What is its population?"}],
)
print(response.output_text)
# "The population of Paris is approximately 2.1 million..."

Anthropic

import anthropic

LLM_MODEL = "claude-opus-4-6"
client = anthropic.Anthropic()

# Anthropic has no built-in memory — store messages yourself
store: dict[str, list] = {}

def chat(thread_id: str, message: str) -> str:
    history = store.get(thread_id, [])
    history.append({"role": "user", "content": message})
    response = client.messages.create(
        model=LLM_MODEL, max_tokens=1024, messages=history,
    )
    history.append({"role": "assistant", "content": response.content})
    store[thread_id] = history
    return response.content[0].text

# turn 1
print(chat("chat_1", "What is the capital of France?"))
# "The capital of France is Paris."

# turn 2 — same thread, history is restored from the store
print(chat("chat_1", "What is its population?"))
# "The population of Paris is approximately 2.1 million..."

Gemini

from google import genai

LLM_MODEL = "gemini-pro-latest"
client = genai.Client()

# chat session manages history — just send the next message
chat = client.chats.create(model=LLM_MODEL)

# turn 1
response = chat.send_message("What is the capital of France?")
print(response.text)
# "The capital of France is Paris."

# turn 2 — chat session remembers the conversation
response = chat.send_message("What is its population?")
print(response.text)
# "The population of Paris is approximately 2.1 million..."

Pydantic AI

from pydantic_ai import Agent

LLM_MODEL = "openai:gpt-5.4"
agent = Agent(LLM_MODEL)

# Pydantic AI has no built-in memory — store messages yourself
store: dict[str, list] = {}

def chat(thread_id: str, message: str) -> str:
    history = store.get(thread_id, [])
    result = agent.run_sync(message, message_history=history)
    store[thread_id] = result.new_messages()
    return result.output

# turn 1
print(chat("chat_1", "What is the capital of France?"))
# "The capital of France is Paris."

# turn 2 — same thread, history is restored from the store
print(chat("chat_1", "What is its population?"))
# "The population of Paris is approximately 2.1 million..."

LangGraph

from langchain.agents import create_agent
from langchain_openai import ChatOpenAI
from langgraph.checkpoint.memory import MemorySaver

LLM_MODEL = "gpt-5.4"
model = ChatOpenAI(model=LLM_MODEL)

# checkpointer persists state — same thread_id restores history
checkpointer = MemorySaver()
agent = create_agent(model, tools=[], checkpointer=checkpointer)

config = {"configurable": {"thread_id": "chat_1"}}

# turn 1
result = agent.invoke(
    {"messages": [("user", "What is the capital of France?")]},
    config=config,
)
print(result["messages"][-1].content)
# "The capital of France is Paris."

# turn 2 — same thread_id, checkpointer restores history automatically
result = agent.invoke(
    {"messages": [("user", "What is its population?")]},
    config=config,
)
print(result["messages"][-1].content)
# "The population of Paris is approximately 2.1 million..."

AI SDK

import { generateText, type ModelMessage } from "ai";
import { openai } from "@ai-sdk/openai";

const LLM_MODEL = "gpt-5.4";

// AI SDK has no built-in memory — store messages yourself
const store: Record<string, ModelMessage[]> = {};

async function chat(threadId: string, message: string): Promise<string> {
  const history = store[threadId] ?? [];
  history.push({ role: "user", content: message });
  const result = await generateText({ model: openai(LLM_MODEL), messages: history });
  history.push({ role: "assistant", content: result.text });
  store[threadId] = history;
  return result.text;
}

// turn 1
console.log(await chat("chat_1", "What is the capital of France?"));
// "The capital of France is Paris."

// turn 2 — same thread, history is restored from the store
console.log(await chat("chat_1", "What is its population?"));
// "The population of Paris is approximately 2.1 million..."

Mastra

import { Agent } from "@mastra/core/agent";
import { Memory } from "@mastra/memory";

const LLM_MODEL = "openai/gpt-5.4";

// Memory manages conversation state — same thread restores history
const agent = new Agent({
  name: "assistant",
  instructions: "You are a helpful assistant.",
  model: LLM_MODEL,
  memory: new Memory(),
});

const memoryConfig = { memory: { thread: "chat_1", resource: "user_1" } };

// turn 1
const result1 = await agent.generate(
  "What is the capital of France?",
  memoryConfig,
);
console.log(result1.text);
// "The capital of France is Paris."

// turn 2 — same thread, memory restores history automatically
const result2 = await agent.generate(
  "What is its population?",
  memoryConfig,
);
console.log(result2.text);
// "The population of Paris is approximately 2.1 million..."

History Compaction

OpenAI

from openai import OpenAI

LLM_MODEL = "gpt-5.4"
client = OpenAI()

# sliding window: keep last N messages, discard everything older
# cheapest — zero latency, but complete context loss beyond the window
WINDOW = 10
messages: list = []

def chat(message: str) -> str:
    messages.append({"role": "user", "content": message})
    # only send the last WINDOW messages
    window = messages[-WINDOW:]
    response = client.responses.create(model=LLM_MODEL, input=window)
    messages.append({"role": "assistant", "content": response.output_text})
    return response.output_text

print(chat("What is the capital of France?"))
print(chat("What is its population?"))

Anthropic

import anthropic

LLM_MODEL = "claude-opus-4-6"
client = anthropic.Anthropic()

# sliding window: keep last N messages, discard everything older
# cheapest — zero latency, but complete context loss beyond the window
WINDOW = 10
messages: list = []

def chat(message: str) -> str:
    messages.append({"role": "user", "content": message})
    # only send the last WINDOW messages
    window = messages[-WINDOW:]
    response = client.messages.create(
        model=LLM_MODEL, max_tokens=1024, messages=window,
    )
    messages.append({"role": "assistant", "content": response.content})
    return response.content[0].text

print(chat("What is the capital of France?"))
print(chat("What is its population?"))

Gemini

from google import genai
from google.genai import types

LLM_MODEL = "gemini-pro-latest"
client = genai.Client()

# sliding window: keep last N messages, discard everything older
# cheapest — zero latency, but complete context loss beyond the window
# can't use chat.send_message() here — it manages history internally
WINDOW = 10
history: list[types.Content] = []

def chat(message: str) -> str:
    history.append(types.Content(role="user", parts=[types.Part(text=message)]))
    window = history[-WINDOW:]
    response = client.models.generate_content(
        model=LLM_MODEL, contents=window,
    )
    history.append(types.Content(role="model", parts=[types.Part(text=response.text)]))
    return response.text

print(chat("What is the capital of France?"))
print(chat("What is its population?"))

Pydantic AI

from pydantic_ai import Agent
from pydantic_ai.messages import ModelMessage

LLM_MODEL = "openai:gpt-5.4"

# sliding window: keep last N messages, discard everything older
# processor runs before each model call, replaces history for that call
WINDOW = 10

def sliding_window(messages: list[ModelMessage]) -> list[ModelMessage]:
    return messages[-WINDOW:]

agent = Agent(LLM_MODEL, history_processors=[sliding_window])

history: list[ModelMessage] = []

result = agent.run_sync("What is the capital of France?", message_history=history)
history = result.new_messages()
print(result.output)

result = agent.run_sync("What is its population?", message_history=history)
history = result.new_messages()
print(result.output)

LangGraph

from langchain.agents import create_agent
from langchain.agents.middleware import before_model
from langchain_openai import ChatOpenAI
from langchain_core.messages import RemoveMessage
from langgraph.graph.message import REMOVE_ALL_MESSAGES
from langgraph.checkpoint.memory import MemorySaver

LLM_MODEL = "gpt-5.4"
model = ChatOpenAI(model=LLM_MODEL)

# sliding window: keep last N messages via @before_model middleware
# runs before every model call — trims checkpointed state in-place
WINDOW = 10

@before_model
def sliding_window(state, runtime):
    messages = state["messages"]
    if len(messages) <= WINDOW:
        return None  # short enough — no trimming needed
    # remove all, then re-add only the recent window
    return {
        "messages": [
            RemoveMessage(id=REMOVE_ALL_MESSAGES),
            *messages[-WINDOW:],
        ]
    }

agent = create_agent(
    model, tools=[], middleware=[sliding_window],
    checkpointer=MemorySaver(),
)

config = {"configurable": {"thread_id": "chat_1"}}

result = agent.invoke(
    {"messages": [("user", "What is the capital of France?")]},
    config=config,
)
print(result["messages"][-1].content)

result = agent.invoke(
    {"messages": [("user", "What is its population?")]},
    config=config,
)
print(result["messages"][-1].content)

AI SDK

import { generateText, type ModelMessage } from "ai";
import { openai } from "@ai-sdk/openai";

const LLM_MODEL = "gpt-5.4";

// sliding window: keep last N messages, discard everything older
// cheapest — zero latency, but complete context loss beyond the window
const WINDOW = 10;
const messages: ModelMessage[] = [];

async function chat(message: string): Promise<string> {
  messages.push({ role: "user", content: message });
  const window = messages.slice(-WINDOW);
  const result = await generateText({
    model: openai(LLM_MODEL),
    messages: window,
  });
  messages.push({ role: "assistant", content: result.text });
  return result.text;
}

console.log(await chat("What is the capital of France?"));
console.log(await chat("What is its population?"));

Mastra

import { Agent } from "@mastra/core/agent";
import { Memory } from "@mastra/memory";

const LLM_MODEL = "openai/gpt-5.4";

// sliding window: keep last N messages, discard everything older
// built into Memory — one config option, no custom code needed
const agent = new Agent({
  name: "assistant",
  instructions: "You are a helpful assistant.",
  model: LLM_MODEL,
  memory: new Memory({
    options: { lastMessages: 10 },
  }),
});

const memoryConfig = { memory: { thread: "chat_1", resource: "user_1" } };

const result1 = await agent.generate(
  "What is the capital of France?",
  memoryConfig,
);
console.log(result1.text);

const result2 = await agent.generate(
  "What is its population?",
  memoryConfig,
);
console.log(result2.text);