Select frameworks to compare
Pick one or more frameworks from the bar above
Memory
OpenAI
from openai import OpenAI
LLM_MODEL = "gpt-5.4"
client = OpenAI()
# turn 1
response = client.responses.create(
model=LLM_MODEL,
input="What is the capital of France?",
)
print(response.output_text)
# "The capital of France is Paris."
# turn 2 — previous_response_id continues the conversation server-side
response = client.responses.create(
model=LLM_MODEL,
previous_response_id=response.id,
input=[{"role": "user", "content": "What is its population?"}],
)
print(response.output_text)
# "The population of Paris is approximately 2.1 million..."
Anthropic
import anthropic
LLM_MODEL = "claude-opus-4-6"
client = anthropic.Anthropic()
# Anthropic has no built-in memory — store messages yourself
store: dict[str, list] = {}
def chat(thread_id: str, message: str) -> str:
history = store.get(thread_id, [])
history.append({"role": "user", "content": message})
response = client.messages.create(
model=LLM_MODEL, max_tokens=1024, messages=history,
)
history.append({"role": "assistant", "content": response.content})
store[thread_id] = history
return response.content[0].text
# turn 1
print(chat("chat_1", "What is the capital of France?"))
# "The capital of France is Paris."
# turn 2 — same thread, history is restored from the store
print(chat("chat_1", "What is its population?"))
# "The population of Paris is approximately 2.1 million..."
Gemini
from google import genai
LLM_MODEL = "gemini-pro-latest"
client = genai.Client()
# chat session manages history — just send the next message
chat = client.chats.create(model=LLM_MODEL)
# turn 1
response = chat.send_message("What is the capital of France?")
print(response.text)
# "The capital of France is Paris."
# turn 2 — chat session remembers the conversation
response = chat.send_message("What is its population?")
print(response.text)
# "The population of Paris is approximately 2.1 million..."
Pydantic AI
from pydantic_ai import Agent
LLM_MODEL = "openai:gpt-5.4"
agent = Agent(LLM_MODEL)
# Pydantic AI has no built-in memory — store messages yourself
store: dict[str, list] = {}
def chat(thread_id: str, message: str) -> str:
history = store.get(thread_id, [])
result = agent.run_sync(message, message_history=history)
store[thread_id] = result.new_messages()
return result.output
# turn 1
print(chat("chat_1", "What is the capital of France?"))
# "The capital of France is Paris."
# turn 2 — same thread, history is restored from the store
print(chat("chat_1", "What is its population?"))
# "The population of Paris is approximately 2.1 million..."
LangGraph
from langchain.agents import create_agent
from langchain_openai import ChatOpenAI
from langgraph.checkpoint.memory import MemorySaver
LLM_MODEL = "gpt-5.4"
model = ChatOpenAI(model=LLM_MODEL)
# checkpointer persists state — same thread_id restores history
checkpointer = MemorySaver()
agent = create_agent(model, tools=[], checkpointer=checkpointer)
config = {"configurable": {"thread_id": "chat_1"}}
# turn 1
result = agent.invoke(
{"messages": [("user", "What is the capital of France?")]},
config=config,
)
print(result["messages"][-1].content)
# "The capital of France is Paris."
# turn 2 — same thread_id, checkpointer restores history automatically
result = agent.invoke(
{"messages": [("user", "What is its population?")]},
config=config,
)
print(result["messages"][-1].content)
# "The population of Paris is approximately 2.1 million..."
AI SDK
import { generateText, type ModelMessage } from "ai";
import { openai } from "@ai-sdk/openai";
const LLM_MODEL = "gpt-5.4";
// AI SDK has no built-in memory — store messages yourself
const store: Record<string, ModelMessage[]> = {};
async function chat(threadId: string, message: string): Promise<string> {
const history = store[threadId] ?? [];
history.push({ role: "user", content: message });
const result = await generateText({ model: openai(LLM_MODEL), messages: history });
history.push({ role: "assistant", content: result.text });
store[threadId] = history;
return result.text;
}
// turn 1
console.log(await chat("chat_1", "What is the capital of France?"));
// "The capital of France is Paris."
// turn 2 — same thread, history is restored from the store
console.log(await chat("chat_1", "What is its population?"));
// "The population of Paris is approximately 2.1 million..."
Mastra
import { Agent } from "@mastra/core/agent";
import { Memory } from "@mastra/memory";
const LLM_MODEL = "openai/gpt-5.4";
// Memory manages conversation state — same thread restores history
const agent = new Agent({
name: "assistant",
instructions: "You are a helpful assistant.",
model: LLM_MODEL,
memory: new Memory(),
});
const memoryConfig = { memory: { thread: "chat_1", resource: "user_1" } };
// turn 1
const result1 = await agent.generate(
"What is the capital of France?",
memoryConfig,
);
console.log(result1.text);
// "The capital of France is Paris."
// turn 2 — same thread, memory restores history automatically
const result2 = await agent.generate(
"What is its population?",
memoryConfig,
);
console.log(result2.text);
// "The population of Paris is approximately 2.1 million..."History Compaction
OpenAI
from openai import OpenAI
LLM_MODEL = "gpt-5.4"
client = OpenAI()
# sliding window: keep last N messages, discard everything older
# cheapest — zero latency, but complete context loss beyond the window
WINDOW = 10
messages: list = []
def chat(message: str) -> str:
messages.append({"role": "user", "content": message})
# only send the last WINDOW messages
window = messages[-WINDOW:]
response = client.responses.create(model=LLM_MODEL, input=window)
messages.append({"role": "assistant", "content": response.output_text})
return response.output_text
print(chat("What is the capital of France?"))
print(chat("What is its population?"))
Anthropic
import anthropic
LLM_MODEL = "claude-opus-4-6"
client = anthropic.Anthropic()
# sliding window: keep last N messages, discard everything older
# cheapest — zero latency, but complete context loss beyond the window
WINDOW = 10
messages: list = []
def chat(message: str) -> str:
messages.append({"role": "user", "content": message})
# only send the last WINDOW messages
window = messages[-WINDOW:]
response = client.messages.create(
model=LLM_MODEL, max_tokens=1024, messages=window,
)
messages.append({"role": "assistant", "content": response.content})
return response.content[0].text
print(chat("What is the capital of France?"))
print(chat("What is its population?"))
Gemini
from google import genai
from google.genai import types
LLM_MODEL = "gemini-pro-latest"
client = genai.Client()
# sliding window: keep last N messages, discard everything older
# cheapest — zero latency, but complete context loss beyond the window
# can't use chat.send_message() here — it manages history internally
WINDOW = 10
history: list[types.Content] = []
def chat(message: str) -> str:
history.append(types.Content(role="user", parts=[types.Part(text=message)]))
window = history[-WINDOW:]
response = client.models.generate_content(
model=LLM_MODEL, contents=window,
)
history.append(types.Content(role="model", parts=[types.Part(text=response.text)]))
return response.text
print(chat("What is the capital of France?"))
print(chat("What is its population?"))
Pydantic AI
from pydantic_ai import Agent
from pydantic_ai.messages import ModelMessage
LLM_MODEL = "openai:gpt-5.4"
# sliding window: keep last N messages, discard everything older
# processor runs before each model call, replaces history for that call
WINDOW = 10
def sliding_window(messages: list[ModelMessage]) -> list[ModelMessage]:
return messages[-WINDOW:]
agent = Agent(LLM_MODEL, history_processors=[sliding_window])
history: list[ModelMessage] = []
result = agent.run_sync("What is the capital of France?", message_history=history)
history = result.new_messages()
print(result.output)
result = agent.run_sync("What is its population?", message_history=history)
history = result.new_messages()
print(result.output)
LangGraph
from langchain.agents import create_agent
from langchain.agents.middleware import before_model
from langchain_openai import ChatOpenAI
from langchain_core.messages import RemoveMessage
from langgraph.graph.message import REMOVE_ALL_MESSAGES
from langgraph.checkpoint.memory import MemorySaver
LLM_MODEL = "gpt-5.4"
model = ChatOpenAI(model=LLM_MODEL)
# sliding window: keep last N messages via @before_model middleware
# runs before every model call — trims checkpointed state in-place
WINDOW = 10
@before_model
def sliding_window(state, runtime):
messages = state["messages"]
if len(messages) <= WINDOW:
return None # short enough — no trimming needed
# remove all, then re-add only the recent window
return {
"messages": [
RemoveMessage(id=REMOVE_ALL_MESSAGES),
*messages[-WINDOW:],
]
}
agent = create_agent(
model, tools=[], middleware=[sliding_window],
checkpointer=MemorySaver(),
)
config = {"configurable": {"thread_id": "chat_1"}}
result = agent.invoke(
{"messages": [("user", "What is the capital of France?")]},
config=config,
)
print(result["messages"][-1].content)
result = agent.invoke(
{"messages": [("user", "What is its population?")]},
config=config,
)
print(result["messages"][-1].content)
AI SDK
import { generateText, type ModelMessage } from "ai";
import { openai } from "@ai-sdk/openai";
const LLM_MODEL = "gpt-5.4";
// sliding window: keep last N messages, discard everything older
// cheapest — zero latency, but complete context loss beyond the window
const WINDOW = 10;
const messages: ModelMessage[] = [];
async function chat(message: string): Promise<string> {
messages.push({ role: "user", content: message });
const window = messages.slice(-WINDOW);
const result = await generateText({
model: openai(LLM_MODEL),
messages: window,
});
messages.push({ role: "assistant", content: result.text });
return result.text;
}
console.log(await chat("What is the capital of France?"));
console.log(await chat("What is its population?"));
Mastra
import { Agent } from "@mastra/core/agent";
import { Memory } from "@mastra/memory";
const LLM_MODEL = "openai/gpt-5.4";
// sliding window: keep last N messages, discard everything older
// built into Memory — one config option, no custom code needed
const agent = new Agent({
name: "assistant",
instructions: "You are a helpful assistant.",
model: LLM_MODEL,
memory: new Memory({
options: { lastMessages: 10 },
}),
});
const memoryConfig = { memory: { thread: "chat_1", resource: "user_1" } };
const result1 = await agent.generate(
"What is the capital of France?",
memoryConfig,
);
console.log(result1.text);
const result2 = await agent.generate(
"What is its population?",
memoryConfig,
);
console.log(result2.text);