Select frameworks to compare
Pick one or more frameworks from the bar above
Setup
OpenAI
from pathlib import Path
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
# ChromaDB is a vector database that stores documents and their embeddings.
# PersistentClient saves the index to disk so agent scripts can query it later.
client = chromadb.PersistentClient(path="./acme_index")
# tell ChromaDB which embedding model to use for indexing and queries.
# without this, it silently downloads an ~80MB local model on first run.
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=os.environ["OPENAI_API_KEY"],
model_name="text-embedding-3-small",
)
# a collection groups related documents — the embedding function above
# converts text to vectors automatically during add and query operations
collection = client.get_or_create_collection(
"acme_docs", embedding_function=openai_ef,
)
# load documents from docs.txt — each non-empty line becomes one document.
# in production you'd load real files, DB rows, or API responses instead.
lines = [l for l in Path("docs.txt").read_text().splitlines()
if l.strip() and not l.startswith("#")]
# upsert inserts new docs or updates existing ones (safe to re-run)
collection.upsert(
documents=lines,
ids=[f"doc_{i}" for i in range(len(lines))],
)
print(f"Indexed {len(lines)} documents")
Anthropic
from pathlib import Path
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
# ChromaDB is a vector database that stores documents and their embeddings.
# PersistentClient saves the index to disk so agent scripts can query it later.
client = chromadb.PersistentClient(path="./acme_index")
# tell ChromaDB which embedding model to use for indexing and queries.
# without this, it silently downloads an ~80MB local model on first run.
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=os.environ["OPENAI_API_KEY"],
model_name="text-embedding-3-small",
)
# a collection groups related documents — the embedding function above
# converts text to vectors automatically during add and query operations
collection = client.get_or_create_collection(
"acme_docs", embedding_function=openai_ef,
)
# load documents from docs.txt — each non-empty line becomes one document.
# in production you'd load real files, DB rows, or API responses instead.
lines = [l for l in Path("docs.txt").read_text().splitlines()
if l.strip() and not l.startswith("#")]
# upsert inserts new docs or updates existing ones (safe to re-run)
collection.upsert(
documents=lines,
ids=[f"doc_{i}" for i in range(len(lines))],
)
print(f"Indexed {len(lines)} documents")
Gemini
from pathlib import Path
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
# ChromaDB is a vector database that stores documents and their embeddings.
# PersistentClient saves the index to disk so agent scripts can query it later.
client = chromadb.PersistentClient(path="./acme_index")
# tell ChromaDB which embedding model to use for indexing and queries.
# without this, it silently downloads an ~80MB local model on first run.
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=os.environ["OPENAI_API_KEY"],
model_name="text-embedding-3-small",
)
# a collection groups related documents — the embedding function above
# converts text to vectors automatically during add and query operations
collection = client.get_or_create_collection(
"acme_docs", embedding_function=openai_ef,
)
# load documents from docs.txt — each non-empty line becomes one document.
# in production you'd load real files, DB rows, or API responses instead.
lines = [l for l in Path("docs.txt").read_text().splitlines()
if l.strip() and not l.startswith("#")]
# upsert inserts new docs or updates existing ones (safe to re-run)
collection.upsert(
documents=lines,
ids=[f"doc_{i}" for i in range(len(lines))],
)
print(f"Indexed {len(lines)} documents")
Pydantic AI
from pathlib import Path
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
# ChromaDB is a vector database that stores documents and their embeddings.
# PersistentClient saves the index to disk so agent scripts can query it later.
client = chromadb.PersistentClient(path="./acme_index")
# tell ChromaDB which embedding model to use for indexing and queries.
# without this, it silently downloads an ~80MB local model on first run.
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=os.environ["OPENAI_API_KEY"],
model_name="text-embedding-3-small",
)
# a collection groups related documents — the embedding function above
# converts text to vectors automatically during add and query operations
collection = client.get_or_create_collection(
"acme_docs", embedding_function=openai_ef,
)
# load documents from docs.txt — each non-empty line becomes one document.
# in production you'd load real files, DB rows, or API responses instead.
lines = [l for l in Path("docs.txt").read_text().splitlines()
if l.strip() and not l.startswith("#")]
# upsert inserts new docs or updates existing ones (safe to re-run)
collection.upsert(
documents=lines,
ids=[f"doc_{i}" for i in range(len(lines))],
)
print(f"Indexed {len(lines)} documents")
LangGraph
from pathlib import Path
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
# ChromaDB is a vector database that stores documents and their embeddings.
# PersistentClient saves the index to disk so agent scripts can query it later.
client = chromadb.PersistentClient(path="./acme_index")
# tell ChromaDB which embedding model to use for indexing and queries.
# without this, it silently downloads an ~80MB local model on first run.
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=os.environ["OPENAI_API_KEY"],
model_name="text-embedding-3-small",
)
# a collection groups related documents — the embedding function above
# converts text to vectors automatically during add and query operations
collection = client.get_or_create_collection(
"acme_docs", embedding_function=openai_ef,
)
# load documents from docs.txt — each non-empty line becomes one document.
# in production you'd load real files, DB rows, or API responses instead.
lines = [l for l in Path("docs.txt").read_text().splitlines()
if l.strip() and not l.startswith("#")]
# upsert inserts new docs or updates existing ones (safe to re-run)
collection.upsert(
documents=lines,
ids=[f"doc_{i}" for i in range(len(lines))],
)
print(f"Indexed {len(lines)} documents")
AI SDK
import { LocalIndex } from "vectra";
import { readFileSync } from "fs";
import OpenAI from "openai";
// Vectra is a local vector database that stores documents with their embeddings.
// Unlike ChromaDB (Python), Vectra requires you to compute embeddings yourself —
// we use OpenAI's embedding model for that.
const openai = new OpenAI();
// LocalIndex persists to disk so agent scripts can query it later
const index = new LocalIndex("./acme_index");
if (!(await index.isIndexCreated())) await index.createIndex();
// load documents from docs.txt — each non-empty line becomes one document.
// in production you'd load real files, DB rows, or API responses instead.
const lines = readFileSync("docs.txt", "utf-8")
.split("\n")
.filter((l) => l.trim() && !l.startsWith("#"));
// embed each document and store it in the index
for (const [i, doc] of lines.entries()) {
const res = await openai.embeddings.create({
model: "text-embedding-3-small",
input: doc,
});
await index.insertItem({
id: `doc_${i}`,
metadata: { text: doc },
vector: res.data[0].embedding,
});
}
console.log(`Indexed ${lines.length} documents`);
Mastra
import { LocalIndex } from "vectra";
import { readFileSync } from "fs";
import OpenAI from "openai";
// Vectra is a local vector database that stores documents with their embeddings.
// Unlike ChromaDB (Python), Vectra requires you to compute embeddings yourself —
// we use OpenAI's embedding model for that.
const openai = new OpenAI();
// LocalIndex persists to disk so agent scripts can query it later
const index = new LocalIndex("./acme_index");
if (!(await index.isIndexCreated())) await index.createIndex();
// load documents from docs.txt — each non-empty line becomes one document.
// in production you'd load real files, DB rows, or API responses instead.
const lines = readFileSync("docs.txt", "utf-8")
.split("\n")
.filter((l) => l.trim() && !l.startsWith("#"));
// embed each document and store it in the index
for (const [i, doc] of lines.entries()) {
const res = await openai.embeddings.create({
model: "text-embedding-3-small",
input: doc,
});
await index.insertItem({
id: `doc_${i}`,
metadata: { text: doc },
vector: res.data[0].embedding,
});
}
console.log(`Indexed ${lines.length} documents`);Similarity Search
OpenAI
import json
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from openai import OpenAI
LLM_MODEL = "gpt-5.4"
client = OpenAI()
# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=os.environ["OPENAI_API_KEY"],
model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)
def search_docs(query: str) -> str:
"""Search the knowledge base for relevant documents."""
print(f"-> call: search_docs({query!r})")
results = collection.query(query_texts=[query], n_results=3)
docs = results["documents"][0]
print(f"-> result: {len(docs)} docs found")
return "\n\n".join(docs)
tools = [{
"type": "function",
"name": "search_docs",
"description": "Search the knowledge base for relevant documents.",
"parameters": {
"type": "object",
"properties": {
"query": {"type": "string"},
},
"required": ["query"],
},
}]
input = [{"role": "user", "content": "What's included in the Pro plan?"}]
# step 1: LLM calls the search tool
response = client.responses.create(
model=LLM_MODEL, input=input, tools=tools,
)
tool_call = next(i for i in response.output if i.type == "function_call")
result = search_docs(**json.loads(tool_call.arguments))
# step 2: send results back, LLM generates answer
input += response.output
input.append({
"type": "function_call_output",
"call_id": tool_call.call_id,
"output": result,
})
response = client.responses.create(
model=LLM_MODEL, input=input, tools=tools,
)
print(response.output_text)
# -> call: search_docs('Pro plan features')
# -> result: 3 docs found
# "The Pro plan costs $49/month and includes priority support,
# API access, 100GB storage, and up to 10 team members."
Anthropic
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
import anthropic
LLM_MODEL = "claude-opus-4-6"
client = anthropic.Anthropic()
# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=os.environ["OPENAI_API_KEY"],
model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)
def search_docs(query: str) -> str:
"""Search the knowledge base for relevant documents."""
print(f"-> call: search_docs({query!r})")
results = collection.query(query_texts=[query], n_results=3)
docs = results["documents"][0]
print(f"-> result: {len(docs)} docs found")
return "\n\n".join(docs)
tools = [{
"name": "search_docs",
"description": "Search the knowledge base for relevant documents.",
"input_schema": {
"type": "object",
"properties": {
"query": {"type": "string"},
},
"required": ["query"],
},
}]
messages = [{"role": "user", "content": "What's included in the Pro plan?"}]
# step 1: LLM calls the search tool
response = client.messages.create(
model=LLM_MODEL, max_tokens=1024, tools=tools, messages=messages,
)
tool_block = next(b for b in response.content if b.type == "tool_use")
result = search_docs(**tool_block.input)
# step 2: send results back, LLM generates answer
messages.append({"role": "assistant", "content": response.content})
messages.append({"role": "user", "content": [{
"type": "tool_result",
"tool_use_id": tool_block.id,
"content": result,
}]})
response = client.messages.create(
model=LLM_MODEL, max_tokens=1024, tools=tools, messages=messages,
)
print(response.content[0].text)
# -> call: search_docs('Pro plan features')
# -> result: 3 docs found
# "The Pro plan costs $49/month and includes priority support,
# API access, 100GB storage, and up to 10 team members."
Gemini
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from google import genai
from google.genai import types
LLM_MODEL = "gemini-pro-latest"
client = genai.Client()
# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=os.environ["OPENAI_API_KEY"],
model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)
def search_docs(query: str) -> str:
"""Search the knowledge base for relevant documents."""
print(f"-> call: search_docs({query!r})")
results = collection.query(query_texts=[query], n_results=3)
docs = results["documents"][0]
print(f"-> result: {len(docs)} docs found")
return "\n\n".join(docs)
# automatic function calling: SDK executes the tool and feeds results back
config = types.GenerateContentConfig(tools=[search_docs])
response = client.models.generate_content(
model=LLM_MODEL,
config=config,
contents="What's included in the Pro plan?",
)
print(response.text)
# -> call: search_docs('Pro plan features')
# -> result: 3 docs found
# "The Pro plan costs $49/month and includes priority support,
# API access, 100GB storage, and up to 10 team members."
Pydantic AI
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from pydantic_ai import Agent
LLM_MODEL = "openai:gpt-5.4"
# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=os.environ["OPENAI_API_KEY"],
model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)
agent = Agent(LLM_MODEL)
@agent.tool_plain
def search_docs(query: str) -> str:
"""Search the knowledge base for relevant documents."""
print(f"-> call: search_docs({query!r})")
results = collection.query(query_texts=[query], n_results=3)
docs = results["documents"][0]
print(f"-> result: {len(docs)} docs found")
return "\n\n".join(docs)
result = agent.run_sync("What's included in the Pro plan?")
print(result.output)
# -> call: search_docs('Pro plan features')
# -> result: 3 docs found
# "The Pro plan costs $49/month and includes priority support,
# API access, 100GB storage, and up to 10 team members."
LangGraph
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from langchain.tools import tool
from langchain.agents import create_agent
from langchain_openai import ChatOpenAI
LLM_MODEL = "gpt-5.4"
model = ChatOpenAI(model=LLM_MODEL)
# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=os.environ["OPENAI_API_KEY"],
model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)
@tool
def search_docs(query: str) -> str:
"""Search the knowledge base for relevant documents."""
print(f"-> call: search_docs({query!r})")
results = collection.query(query_texts=[query], n_results=3)
docs = results["documents"][0]
print(f"-> result: {len(docs)} docs found")
return "\n\n".join(docs)
agent = create_agent(model, [search_docs])
result = agent.invoke({
"messages": [("user", "What's included in the Pro plan?")]
})
print(result["messages"][-1].content)
# -> call: search_docs('Pro plan features')
# -> result: 3 docs found
# "The Pro plan costs $49/month and includes priority support,
# API access, 100GB storage, and up to 10 team members."
AI SDK
import { ToolLoopAgent, tool } from "ai";
import { openai } from "@ai-sdk/openai";
import { LocalIndex } from "vectra";
import OpenAI from "openai";
import { z } from "zod";
const LLM_MODEL = "gpt-5.4";
// connect to indexed documents — see Setup scenario
const index = new LocalIndex("./acme_index");
const embeddingClient = new OpenAI();
async function embed(text: string) {
const r = await embeddingClient.embeddings.create({
model: "text-embedding-3-small",
input: text,
});
return r.data[0].embedding;
}
const searchDocs = tool({
description: "Search the knowledge base for relevant documents",
inputSchema: z.object({ query: z.string() }),
execute: async ({ query }) => {
console.log(`-> call: searchDocs(${JSON.stringify(query)})`);
const results = await index.queryItems(await embed(query), 3);
const docs = results.map((r) => r.item.metadata.text);
console.log(`-> result: ${docs.length} docs found`);
return docs.join("\n\n");
},
});
const agent = new ToolLoopAgent({
model: openai(LLM_MODEL),
tools: { searchDocs },
});
const result = await agent.generate({
prompt: "What's included in the Pro plan?",
});
console.log(result.text);
// -> call: searchDocs("Pro plan features")
// -> result: 3 docs found
// "The Pro plan costs $49/month and includes priority support,
// API access, 100GB storage, and up to 10 team members."
Mastra
import { Agent } from "@mastra/core/agent";
import { createTool } from "@mastra/core/tools";
import { LocalIndex } from "vectra";
import OpenAI from "openai";
import { z } from "zod";
const LLM_MODEL = "openai/gpt-5.4";
// connect to indexed documents — see Setup scenario
const index = new LocalIndex("./acme_index");
const embeddingClient = new OpenAI();
async function embed(text: string) {
const r = await embeddingClient.embeddings.create({
model: "text-embedding-3-small",
input: text,
});
return r.data[0].embedding;
}
const searchDocs = createTool({
id: "search-docs",
description: "Search the knowledge base for relevant documents",
inputSchema: z.object({ query: z.string() }),
execute: async ({ query }) => {
console.log(`-> call: searchDocs(${JSON.stringify(query)})`);
const results = await index.queryItems(await embed(query), 3);
const docs = results.map((r) => r.item.metadata.text);
console.log(`-> result: ${docs.length} docs found`);
return docs.join("\n\n");
},
});
const agent = new Agent({
name: "rag-agent",
instructions: "You are a helpful assistant.",
model: LLM_MODEL,
tools: { searchDocs },
});
const result = await agent.generate("What's included in the Pro plan?");
console.log(result.text);
// -> call: searchDocs("Pro plan features")
// -> result: 3 docs found
// "The Pro plan costs $49/month and includes priority support,
// API access, 100GB storage, and up to 10 team members."Keyword Search
OpenAI
import json
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from openai import OpenAI
LLM_MODEL = "gpt-5.4"
client = OpenAI()
# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=os.environ["OPENAI_API_KEY"],
model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)
# the tool schema shapes retrieval: list[str] keywords instead of a free-text query
# the description tells the LLM to decompose questions into specific search terms
def search_by_keywords(keywords: list[str]) -> str:
"""Search the knowledge base using specific keywords.
Use precise terms, not full questions."""
print(f"-> call: search_by_keywords({keywords})")
query = " ".join(keywords)
results = collection.query(query_texts=[query], n_results=3)
docs = results["documents"][0]
print(f"-> result: {len(docs)} docs found")
return "\n\n".join(docs)
tools = [{
"type": "function",
"name": "search_by_keywords",
"description": "Search the knowledge base using specific keywords. Use precise terms, not full questions.",
"parameters": {
"type": "object",
"properties": {
"keywords": {
"type": "array",
"items": {"type": "string"},
"description": "Specific search keywords",
},
},
"required": ["keywords"],
},
}]
input = [{
"role": "user",
"content": "Can I get a refund if I cancel my annual Pro plan after a month?",
}]
# step 1: LLM decomposes question into keywords and calls the tool
response = client.responses.create(
model=LLM_MODEL, input=input, tools=tools,
)
# handle all tool calls — the LLM may emit more than one
input += response.output
for item in response.output:
if item.type == "function_call":
result = search_by_keywords(**json.loads(item.arguments))
input.append({
"type": "function_call_output",
"call_id": item.call_id,
"output": result,
})
# step 2: send results back, LLM generates answer
response = client.responses.create(
model=LLM_MODEL, input=input, tools=tools,
)
print(response.output_text)
# -> call: search_by_keywords(['refund', 'cancel', 'annual', 'Pro'])
# -> result: 3 docs found
# "Yes — annual plans can be refunded within 30 days of purchase."
Anthropic
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
import anthropic
LLM_MODEL = "claude-opus-4-6"
client = anthropic.Anthropic()
# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=os.environ["OPENAI_API_KEY"],
model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)
# the tool schema shapes retrieval: list[str] keywords instead of a free-text query
# the description tells the LLM to decompose questions into specific search terms
def search_by_keywords(keywords: list[str]) -> str:
"""Search the knowledge base using specific keywords.
Use precise terms, not full questions."""
print(f"-> call: search_by_keywords({keywords})")
query = " ".join(keywords)
results = collection.query(query_texts=[query], n_results=3)
docs = results["documents"][0]
print(f"-> result: {len(docs)} docs found")
return "\n\n".join(docs)
tools = [{
"name": "search_by_keywords",
"description": "Search the knowledge base using specific keywords. Use precise terms, not full questions.",
"input_schema": {
"type": "object",
"properties": {
"keywords": {
"type": "array",
"items": {"type": "string"},
"description": "Specific search keywords",
},
},
"required": ["keywords"],
},
}]
messages = [{
"role": "user",
"content": "Can I get a refund if I cancel my annual Pro plan after a month?",
}]
# step 1: LLM decomposes question into keywords and calls the tool
response = client.messages.create(
model=LLM_MODEL, max_tokens=1024, tools=tools, messages=messages,
)
# handle all tool_use blocks — the LLM may emit more than one
messages.append({"role": "assistant", "content": response.content})
tool_results = []
for block in response.content:
if block.type == "tool_use":
result = search_by_keywords(**block.input)
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": result,
})
messages.append({"role": "user", "content": tool_results})
# step 2: send results back, LLM generates answer
response = client.messages.create(
model=LLM_MODEL, max_tokens=1024, tools=tools, messages=messages,
)
print(response.content[0].text)
# -> call: search_by_keywords(['refund', 'cancel', 'annual', 'Pro'])
# -> result: 3 docs found
# "Yes — annual plans can be refunded within 30 days of purchase."
Gemini
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from google import genai
from google.genai import types
LLM_MODEL = "gemini-pro-latest"
client = genai.Client()
# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=os.environ["OPENAI_API_KEY"],
model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)
# the tool schema shapes retrieval: list[str] keywords instead of a free-text query
# the description tells the LLM to decompose questions into specific search terms
def search_by_keywords(keywords: list[str]) -> str:
"""Search the knowledge base using specific keywords.
Use precise terms, not full questions."""
print(f"-> call: search_by_keywords({keywords})")
query = " ".join(keywords)
results = collection.query(query_texts=[query], n_results=3)
docs = results["documents"][0]
print(f"-> result: {len(docs)} docs found")
return "\n\n".join(docs)
# automatic function calling: SDK executes the tool and feeds results back
config = types.GenerateContentConfig(tools=[search_by_keywords])
response = client.models.generate_content(
model=LLM_MODEL,
config=config,
contents="Can I get a refund if I cancel my annual Pro plan after a month?",
)
print(response.text)
# -> call: search_by_keywords(['refund', 'cancel', 'annual', 'Pro'])
# -> result: 3 docs found
# "Yes — annual plans can be refunded within 30 days of purchase."
Pydantic AI
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from pydantic_ai import Agent
LLM_MODEL = "openai:gpt-5.4"
# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=os.environ["OPENAI_API_KEY"],
model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)
agent = Agent(LLM_MODEL)
# the tool schema shapes retrieval: list[str] keywords instead of a free-text query
# the description tells the LLM to decompose questions into specific search terms
@agent.tool_plain
def search_by_keywords(keywords: list[str]) -> str:
"""Search the knowledge base using specific keywords.
Use precise terms, not full questions."""
print(f"-> call: search_by_keywords({keywords})")
query = " ".join(keywords)
results = collection.query(query_texts=[query], n_results=3)
docs = results["documents"][0]
print(f"-> result: {len(docs)} docs found")
return "\n\n".join(docs)
result = agent.run_sync(
"Can I get a refund if I cancel my annual Pro plan after a month?",
)
print(result.output)
# -> call: search_by_keywords(['refund', 'cancel', 'annual', 'Pro'])
# -> result: 3 docs found
# "Yes — annual plans can be refunded within 30 days of purchase."
LangGraph
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from langchain.tools import tool
from langchain.agents import create_agent
from langchain_openai import ChatOpenAI
LLM_MODEL = "gpt-5.4"
model = ChatOpenAI(model=LLM_MODEL)
# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=os.environ["OPENAI_API_KEY"],
model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)
# the tool schema shapes retrieval: list[str] keywords instead of a free-text query
# the description tells the LLM to decompose questions into specific search terms
@tool
def search_by_keywords(keywords: list[str]) -> str:
"""Search the knowledge base using specific keywords.
Use precise terms, not full questions."""
print(f"-> call: search_by_keywords({keywords})")
query = " ".join(keywords)
results = collection.query(query_texts=[query], n_results=3)
docs = results["documents"][0]
print(f"-> result: {len(docs)} docs found")
return "\n\n".join(docs)
agent = create_agent(model, [search_by_keywords])
result = agent.invoke({
"messages": [(
"user",
"Can I get a refund if I cancel my annual Pro plan after a month?",
)]
})
print(result["messages"][-1].content)
# -> call: search_by_keywords(['refund', 'cancel', 'annual', 'Pro'])
# -> result: 3 docs found
# "Yes — annual plans can be refunded within 30 days of purchase."
AI SDK
import { ToolLoopAgent, tool } from "ai";
import { openai } from "@ai-sdk/openai";
import { LocalIndex } from "vectra";
import OpenAI from "openai";
import { z } from "zod";
const LLM_MODEL = "gpt-5.4";
// connect to indexed documents — see Setup scenario
const index = new LocalIndex("./acme_index");
const embeddingClient = new OpenAI();
async function embed(text: string) {
const r = await embeddingClient.embeddings.create({
model: "text-embedding-3-small",
input: text,
});
return r.data[0].embedding;
}
// the tool schema shapes retrieval: string[] keywords instead of a free-text query
// the description tells the LLM to decompose questions into specific search terms
const searchByKeywords = tool({
description:
"Search the knowledge base using specific keywords. Use precise terms, not full questions.",
inputSchema: z.object({
keywords: z.array(z.string()).describe("Specific search keywords"),
}),
execute: async ({ keywords }) => {
console.log(`-> call: searchByKeywords(${JSON.stringify(keywords)})`);
const results = await index.queryItems(
await embed(keywords.join(" ")),
3,
);
const docs = results.map((r) => r.item.metadata.text);
console.log(`-> result: ${docs.length} docs found`);
return docs.join("\n\n");
},
});
const agent = new ToolLoopAgent({
model: openai(LLM_MODEL),
tools: { searchByKeywords },
});
const result = await agent.generate({
prompt: "Can I get a refund if I cancel my annual Pro plan after a month?",
});
console.log(result.text);
// -> call: searchByKeywords(["refund", "cancel", "annual", "Pro"])
// -> result: 3 docs found
// "Yes — annual plans can be refunded within 30 days of purchase."
Mastra
import { Agent } from "@mastra/core/agent";
import { createTool } from "@mastra/core/tools";
import { LocalIndex } from "vectra";
import OpenAI from "openai";
import { z } from "zod";
const LLM_MODEL = "openai/gpt-5.4";
// connect to indexed documents — see Setup scenario
const index = new LocalIndex("./acme_index");
const embeddingClient = new OpenAI();
async function embed(text: string) {
const r = await embeddingClient.embeddings.create({
model: "text-embedding-3-small",
input: text,
});
return r.data[0].embedding;
}
// the tool schema shapes retrieval: string[] keywords instead of a free-text query
// the description tells the LLM to decompose questions into specific search terms
const searchByKeywords = createTool({
id: "search-by-keywords",
description:
"Search the knowledge base using specific keywords. Use precise terms, not full questions.",
inputSchema: z.object({
keywords: z.array(z.string()).describe("Specific search keywords"),
}),
execute: async ({ keywords }) => {
console.log(`-> call: searchByKeywords(${JSON.stringify(keywords)})`);
const results = await index.queryItems(
await embed(keywords.join(" ")),
3,
);
const docs = results.map((r) => r.item.metadata.text);
console.log(`-> result: ${docs.length} docs found`);
return docs.join("\n\n");
},
});
const agent = new Agent({
name: "rag-agent",
instructions: "You are a helpful assistant.",
model: LLM_MODEL,
tools: { searchByKeywords },
});
const result = await agent.generate(
"Can I get a refund if I cancel my annual Pro plan after a month?",
);
console.log(result.text);
// -> call: searchByKeywords(["refund", "cancel", "annual", "Pro"])
// -> result: 3 docs found
// "Yes — annual plans can be refunded within 30 days of purchase."Agentic RAG
OpenAI
import json
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from openai import OpenAI
LLM_MODEL = "gpt-5.4"
client = OpenAI()
# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=os.environ["OPENAI_API_KEY"],
model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)
def search_docs(query: str) -> str:
"""Search the knowledge base for relevant documents."""
print(f"-> call: search_docs({query!r})")
results = collection.query(query_texts=[query], n_results=3)
docs = results["documents"][0]
print(f"-> result: {len(docs)} docs found")
return "\n\n".join(docs)
tools = [{
"type": "function",
"name": "search_docs",
"description": "Search the knowledge base for relevant documents.",
"parameters": {
"type": "object",
"properties": {
"query": {"type": "string"},
},
"required": ["query"],
},
}]
input = [
{"role": "developer", "content": (
"You are a support agent. Search the knowledge base to answer questions. "
"If results don't fully answer the question, search again with different terms."
)},
{"role": "user", "content": (
"I'm choosing between Pro and Enterprise. I need SSO and at least "
"99.9% uptime. Which plan should I pick and what's the price difference?"
)},
]
# agentic loop: LLM searches multiple times until it has enough info
while True:
response = client.responses.create(
model=LLM_MODEL, input=input, tools=tools,
)
tool_calls = [i for i in response.output if i.type == "function_call"]
if not tool_calls:
break
input += response.output
for tc in tool_calls:
result = search_docs(**json.loads(tc.arguments))
input.append({
"type": "function_call_output",
"call_id": tc.call_id,
"output": result,
})
print(response.output_text)
# -> call: search_docs('SSO uptime Pro Enterprise')
# -> result: 3 docs found
# -> call: search_docs('Pro Enterprise pricing comparison')
# -> result: 3 docs found
# "Based on your requirements ... Enterprise plan at $199/month."
Anthropic
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
import anthropic
LLM_MODEL = "claude-opus-4-6"
client = anthropic.Anthropic()
# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=os.environ["OPENAI_API_KEY"],
model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)
def search_docs(query: str) -> str:
"""Search the knowledge base for relevant documents."""
print(f"-> call: search_docs({query!r})")
results = collection.query(query_texts=[query], n_results=3)
docs = results["documents"][0]
print(f"-> result: {len(docs)} docs found")
return "\n\n".join(docs)
tools = [{
"name": "search_docs",
"description": "Search the knowledge base for relevant documents.",
"input_schema": {
"type": "object",
"properties": {
"query": {"type": "string"},
},
"required": ["query"],
},
}]
system = (
"You are a support agent. Search the knowledge base to answer questions. "
"If results don't fully answer the question, search again with different terms."
)
messages = [{"role": "user", "content": (
"I'm choosing between Pro and Enterprise. I need SSO and at least "
"99.9% uptime. Which plan should I pick and what's the price difference?"
)}]
# agentic loop: LLM searches multiple times until it has enough info
while True:
response = client.messages.create(
model=LLM_MODEL, max_tokens=1024,
system=system, tools=tools, messages=messages,
)
if response.stop_reason != "tool_use":
break
messages.append({"role": "assistant", "content": response.content})
tool_results = []
for block in response.content:
if block.type == "tool_use":
result = search_docs(**block.input)
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": result,
})
messages.append({"role": "user", "content": tool_results})
print(response.content[0].text)
# -> call: search_docs('SSO uptime Pro Enterprise')
# -> result: 3 docs found
# -> call: search_docs('Pro Enterprise pricing comparison')
# -> result: 3 docs found
# "Based on your requirements ... Enterprise plan at $199/month."
Gemini
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from google import genai
from google.genai import types
LLM_MODEL = "gemini-pro-latest"
client = genai.Client()
# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=os.environ["OPENAI_API_KEY"],
model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)
def search_docs(query: str) -> str:
"""Search the knowledge base for relevant documents."""
print(f"-> call: search_docs({query!r})")
results = collection.query(query_texts=[query], n_results=3)
docs = results["documents"][0]
print(f"-> result: {len(docs)} docs found")
return "\n\n".join(docs)
# automatic function calling: SDK runs the multi-step ReAct loop
config = types.GenerateContentConfig(
tools=[search_docs],
system_instruction=(
"You are a support agent. Search the knowledge base to answer questions. "
"If results don't fully answer the question, search again with different terms."
),
)
response = client.models.generate_content(
model=LLM_MODEL,
config=config,
contents=(
"I'm choosing between Pro and Enterprise. I need SSO and at least "
"99.9% uptime. Which plan should I pick and what's the price difference?"
),
)
print(response.text)
# -> call: search_docs('SSO uptime Pro Enterprise')
# -> result: 3 docs found
# -> call: search_docs('Pro Enterprise pricing comparison')
# -> result: 3 docs found
# "Based on your requirements ... Enterprise plan at $199/month."
Pydantic AI
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from pydantic_ai import Agent
LLM_MODEL = "openai:gpt-5.4"
# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=os.environ["OPENAI_API_KEY"],
model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)
agent = Agent(
LLM_MODEL,
instructions=(
"You are a support agent. Search the knowledge base to answer questions. "
"If results don't fully answer the question, search again with different terms."
),
)
@agent.tool_plain
def search_docs(query: str) -> str:
"""Search the knowledge base for relevant documents."""
print(f"-> call: search_docs({query!r})")
results = collection.query(query_texts=[query], n_results=3)
docs = results["documents"][0]
print(f"-> result: {len(docs)} docs found")
return "\n\n".join(docs)
# run_sync handles the multi-step loop automatically
result = agent.run_sync(
"I'm choosing between Pro and Enterprise. I need SSO and at least "
"99.9% uptime. Which plan should I pick and what's the price difference?",
)
print(result.output)
# -> call: search_docs('SSO uptime Pro Enterprise')
# -> result: 3 docs found
# -> call: search_docs('Pro Enterprise pricing comparison')
# -> result: 3 docs found
# "Based on your requirements ... Enterprise plan at $199/month."
LangGraph
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from langchain.tools import tool
from langchain.agents import create_agent
from langchain_openai import ChatOpenAI
from langchain_core.messages import SystemMessage
LLM_MODEL = "gpt-5.4"
model = ChatOpenAI(model=LLM_MODEL)
# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=os.environ["OPENAI_API_KEY"],
model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)
@tool
def search_docs(query: str) -> str:
"""Search the knowledge base for relevant documents."""
print(f"-> call: search_docs({query!r})")
results = collection.query(query_texts=[query], n_results=3)
docs = results["documents"][0]
print(f"-> result: {len(docs)} docs found")
return "\n\n".join(docs)
# invoke handles the multi-step loop automatically
agent = create_agent(model, [search_docs])
result = agent.invoke({
"messages": [
SystemMessage(content=(
"You are a support agent. Search the knowledge base to answer questions. "
"If results don't fully answer the question, search again with different terms."
)),
("user", (
"I'm choosing between Pro and Enterprise. I need SSO and at least "
"99.9% uptime. Which plan should I pick and what's the price difference?"
)),
]
})
print(result["messages"][-1].content)
# -> call: search_docs('SSO uptime Pro Enterprise')
# -> result: 3 docs found
# -> call: search_docs('Pro Enterprise pricing comparison')
# -> result: 3 docs found
# "Based on your requirements ... Enterprise plan at $199/month."
AI SDK
import { ToolLoopAgent, tool } from "ai";
import { openai } from "@ai-sdk/openai";
import { LocalIndex } from "vectra";
import OpenAI from "openai";
import { z } from "zod";
const LLM_MODEL = "gpt-5.4";
// connect to indexed documents — see Setup scenario
const index = new LocalIndex("./acme_index");
const embeddingClient = new OpenAI();
async function embed(text: string) {
const r = await embeddingClient.embeddings.create({
model: "text-embedding-3-small",
input: text,
});
return r.data[0].embedding;
}
const searchDocs = tool({
description: "Search the knowledge base for relevant documents",
inputSchema: z.object({ query: z.string() }),
execute: async ({ query }) => {
console.log(`-> call: searchDocs(${JSON.stringify(query)})`);
const results = await index.queryItems(await embed(query), 3);
const docs = results.map((r) => r.item.metadata.text);
console.log(`-> result: ${docs.length} docs found`);
return docs.join("\n\n");
},
});
// ToolLoopAgent handles the multi-step loop automatically
const agent = new ToolLoopAgent({
model: openai(LLM_MODEL),
system: `\
You are a support agent. Search the knowledge base to answer questions.
If results don't fully answer the question, search again with different terms.
`,
tools: { searchDocs },
});
const result = await agent.generate({
prompt: `\
I'm choosing between Pro and Enterprise. I need SSO and at least
99.9% uptime. Which plan should I pick and what's the price difference?
`,
});
console.log(result.text);
// -> call: searchDocs("SSO uptime Pro Enterprise")
// -> result: 3 docs found
// -> call: searchDocs("Pro Enterprise pricing comparison")
// -> result: 3 docs found
// "Based on your requirements ... Enterprise plan at $199/month."
Mastra
import { Agent } from "@mastra/core/agent";
import { createTool } from "@mastra/core/tools";
import { LocalIndex } from "vectra";
import OpenAI from "openai";
import { z } from "zod";
const LLM_MODEL = "openai/gpt-5.4";
// connect to indexed documents — see Setup scenario
const index = new LocalIndex("./acme_index");
const embeddingClient = new OpenAI();
async function embed(text: string) {
const r = await embeddingClient.embeddings.create({
model: "text-embedding-3-small",
input: text,
});
return r.data[0].embedding;
}
const searchDocs = createTool({
id: "search-docs",
description: "Search the knowledge base for relevant documents",
inputSchema: z.object({ query: z.string() }),
execute: async ({ query }) => {
console.log(`-> call: searchDocs(${JSON.stringify(query)})`);
const results = await index.queryItems(await embed(query), 3);
const docs = results.map((r) => r.item.metadata.text);
console.log(`-> result: ${docs.length} docs found`);
return docs.join("\n\n");
},
});
const agent = new Agent({
name: "rag-agent",
instructions: `\
You are a support agent. Search the knowledge base to answer questions.
If results don't fully answer the question, search again with different terms.
`,
model: LLM_MODEL,
tools: { searchDocs },
});
// maxSteps controls the ReAct loop iterations (default: 1)
// without maxSteps > 1, Mastra won't loop back after a tool call
const result = await agent.generate(
`\
I'm choosing between Pro and Enterprise. I need SSO and at least
99.9% uptime. Which plan should I pick and what's the price difference?
`,
{ maxSteps: 5 },
);
console.log(result.text);
// -> call: searchDocs("SSO uptime Pro Enterprise")
// -> result: 3 docs found
// -> call: searchDocs("Pro Enterprise pricing comparison")
// -> result: 3 docs found
// "Based on your requirements ... Enterprise plan at $199/month."