RAG

Select frameworks to compare

Pick one or more frameworks from the bar above

Setup

OpenAI

from pathlib import Path
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions

# ChromaDB is a vector database that stores documents and their embeddings.
# PersistentClient saves the index to disk so agent scripts can query it later.
client = chromadb.PersistentClient(path="./acme_index")

# tell ChromaDB which embedding model to use for indexing and queries.
# without this, it silently downloads an ~80MB local model on first run.
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.environ["OPENAI_API_KEY"],
    model_name="text-embedding-3-small",
)

# a collection groups related documents — the embedding function above
# converts text to vectors automatically during add and query operations
collection = client.get_or_create_collection(
    "acme_docs", embedding_function=openai_ef,
)

# load documents from docs.txt — each non-empty line becomes one document.
# in production you'd load real files, DB rows, or API responses instead.
lines = [l for l in Path("docs.txt").read_text().splitlines()
         if l.strip() and not l.startswith("#")]

# upsert inserts new docs or updates existing ones (safe to re-run)
collection.upsert(
    documents=lines,
    ids=[f"doc_{i}" for i in range(len(lines))],
)
print(f"Indexed {len(lines)} documents")

# Demo: each line is indexed as a separate document.
# In production, load real documents from files, databases, or APIs.
Pro plan costs $49/month and includes priority support, API access, 100GB storage, and up to 10 team members.
Enterprise plan costs $199/month and includes dedicated account manager, unlimited storage, custom integrations, and premium onboarding.
Free plan includes 1 user, 1GB storage, community support, and 100 API calls per day with no SLA guarantee.
Refund policy: annual plans can be refunded within 30 days of purchase. Monthly plans are non-refundable but can be cancelled anytime with no penalty.
API rate limits: Free plan allows 100 requests/day, Pro plan allows 10,000 requests/day, Enterprise plan allows unlimited requests with priority routing.
SSO authentication is available on Enterprise plans only. We support SAML 2.0 and OpenID Connect with all major identity providers including Okta, Azure AD, and Google Workspace.
Uptime SLA: Pro plan guarantees 99.5% uptime. Enterprise plan guarantees 99.99% uptime with automatic service credits for any downtime below the SLA threshold.
Data is encrypted at rest using AES-256 and in transit using TLS 1.3. Backups run every 6 hours with 30-day retention on all paid plans.
Support: Free plan gets community forums only. Pro plan gets email support with 24-hour response time. Enterprise plan gets 24/7 phone and email support with 1-hour response time.
Billing accepts credit card and PayPal for all plans. Wire transfer is available for Enterprise annual contracts. Annual billing gives a 20% discount on Pro and Enterprise plans.
All plans include SOC 2 Type II compliance. Enterprise plan adds HIPAA compliance, custom data residency, and a dedicated security review process.

Anthropic

from pathlib import Path
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions

# ChromaDB is a vector database that stores documents and their embeddings.
# PersistentClient saves the index to disk so agent scripts can query it later.
client = chromadb.PersistentClient(path="./acme_index")

# tell ChromaDB which embedding model to use for indexing and queries.
# without this, it silently downloads an ~80MB local model on first run.
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.environ["OPENAI_API_KEY"],
    model_name="text-embedding-3-small",
)

# a collection groups related documents — the embedding function above
# converts text to vectors automatically during add and query operations
collection = client.get_or_create_collection(
    "acme_docs", embedding_function=openai_ef,
)

# load documents from docs.txt — each non-empty line becomes one document.
# in production you'd load real files, DB rows, or API responses instead.
lines = [l for l in Path("docs.txt").read_text().splitlines()
         if l.strip() and not l.startswith("#")]

# upsert inserts new docs or updates existing ones (safe to re-run)
collection.upsert(
    documents=lines,
    ids=[f"doc_{i}" for i in range(len(lines))],
)
print(f"Indexed {len(lines)} documents")

# Demo: each line is indexed as a separate document.
# In production, load real documents from files, databases, or APIs.
Pro plan costs $49/month and includes priority support, API access, 100GB storage, and up to 10 team members.
Enterprise plan costs $199/month and includes dedicated account manager, unlimited storage, custom integrations, and premium onboarding.
Free plan includes 1 user, 1GB storage, community support, and 100 API calls per day with no SLA guarantee.
Refund policy: annual plans can be refunded within 30 days of purchase. Monthly plans are non-refundable but can be cancelled anytime with no penalty.
API rate limits: Free plan allows 100 requests/day, Pro plan allows 10,000 requests/day, Enterprise plan allows unlimited requests with priority routing.
SSO authentication is available on Enterprise plans only. We support SAML 2.0 and OpenID Connect with all major identity providers including Okta, Azure AD, and Google Workspace.
Uptime SLA: Pro plan guarantees 99.5% uptime. Enterprise plan guarantees 99.99% uptime with automatic service credits for any downtime below the SLA threshold.
Data is encrypted at rest using AES-256 and in transit using TLS 1.3. Backups run every 6 hours with 30-day retention on all paid plans.
Support: Free plan gets community forums only. Pro plan gets email support with 24-hour response time. Enterprise plan gets 24/7 phone and email support with 1-hour response time.
Billing accepts credit card and PayPal for all plans. Wire transfer is available for Enterprise annual contracts. Annual billing gives a 20% discount on Pro and Enterprise plans.
All plans include SOC 2 Type II compliance. Enterprise plan adds HIPAA compliance, custom data residency, and a dedicated security review process.

Gemini

from pathlib import Path
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions

# ChromaDB is a vector database that stores documents and their embeddings.
# PersistentClient saves the index to disk so agent scripts can query it later.
client = chromadb.PersistentClient(path="./acme_index")

# tell ChromaDB which embedding model to use for indexing and queries.
# without this, it silently downloads an ~80MB local model on first run.
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.environ["OPENAI_API_KEY"],
    model_name="text-embedding-3-small",
)

# a collection groups related documents — the embedding function above
# converts text to vectors automatically during add and query operations
collection = client.get_or_create_collection(
    "acme_docs", embedding_function=openai_ef,
)

# load documents from docs.txt — each non-empty line becomes one document.
# in production you'd load real files, DB rows, or API responses instead.
lines = [l for l in Path("docs.txt").read_text().splitlines()
         if l.strip() and not l.startswith("#")]

# upsert inserts new docs or updates existing ones (safe to re-run)
collection.upsert(
    documents=lines,
    ids=[f"doc_{i}" for i in range(len(lines))],
)
print(f"Indexed {len(lines)} documents")

# Demo: each line is indexed as a separate document.
# In production, load real documents from files, databases, or APIs.
Pro plan costs $49/month and includes priority support, API access, 100GB storage, and up to 10 team members.
Enterprise plan costs $199/month and includes dedicated account manager, unlimited storage, custom integrations, and premium onboarding.
Free plan includes 1 user, 1GB storage, community support, and 100 API calls per day with no SLA guarantee.
Refund policy: annual plans can be refunded within 30 days of purchase. Monthly plans are non-refundable but can be cancelled anytime with no penalty.
API rate limits: Free plan allows 100 requests/day, Pro plan allows 10,000 requests/day, Enterprise plan allows unlimited requests with priority routing.
SSO authentication is available on Enterprise plans only. We support SAML 2.0 and OpenID Connect with all major identity providers including Okta, Azure AD, and Google Workspace.
Uptime SLA: Pro plan guarantees 99.5% uptime. Enterprise plan guarantees 99.99% uptime with automatic service credits for any downtime below the SLA threshold.
Data is encrypted at rest using AES-256 and in transit using TLS 1.3. Backups run every 6 hours with 30-day retention on all paid plans.
Support: Free plan gets community forums only. Pro plan gets email support with 24-hour response time. Enterprise plan gets 24/7 phone and email support with 1-hour response time.
Billing accepts credit card and PayPal for all plans. Wire transfer is available for Enterprise annual contracts. Annual billing gives a 20% discount on Pro and Enterprise plans.
All plans include SOC 2 Type II compliance. Enterprise plan adds HIPAA compliance, custom data residency, and a dedicated security review process.

Pydantic AI

from pathlib import Path
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions

# ChromaDB is a vector database that stores documents and their embeddings.
# PersistentClient saves the index to disk so agent scripts can query it later.
client = chromadb.PersistentClient(path="./acme_index")

# tell ChromaDB which embedding model to use for indexing and queries.
# without this, it silently downloads an ~80MB local model on first run.
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.environ["OPENAI_API_KEY"],
    model_name="text-embedding-3-small",
)

# a collection groups related documents — the embedding function above
# converts text to vectors automatically during add and query operations
collection = client.get_or_create_collection(
    "acme_docs", embedding_function=openai_ef,
)

# load documents from docs.txt — each non-empty line becomes one document.
# in production you'd load real files, DB rows, or API responses instead.
lines = [l for l in Path("docs.txt").read_text().splitlines()
         if l.strip() and not l.startswith("#")]

# upsert inserts new docs or updates existing ones (safe to re-run)
collection.upsert(
    documents=lines,
    ids=[f"doc_{i}" for i in range(len(lines))],
)
print(f"Indexed {len(lines)} documents")

# Demo: each line is indexed as a separate document.
# In production, load real documents from files, databases, or APIs.
Pro plan costs $49/month and includes priority support, API access, 100GB storage, and up to 10 team members.
Enterprise plan costs $199/month and includes dedicated account manager, unlimited storage, custom integrations, and premium onboarding.
Free plan includes 1 user, 1GB storage, community support, and 100 API calls per day with no SLA guarantee.
Refund policy: annual plans can be refunded within 30 days of purchase. Monthly plans are non-refundable but can be cancelled anytime with no penalty.
API rate limits: Free plan allows 100 requests/day, Pro plan allows 10,000 requests/day, Enterprise plan allows unlimited requests with priority routing.
SSO authentication is available on Enterprise plans only. We support SAML 2.0 and OpenID Connect with all major identity providers including Okta, Azure AD, and Google Workspace.
Uptime SLA: Pro plan guarantees 99.5% uptime. Enterprise plan guarantees 99.99% uptime with automatic service credits for any downtime below the SLA threshold.
Data is encrypted at rest using AES-256 and in transit using TLS 1.3. Backups run every 6 hours with 30-day retention on all paid plans.
Support: Free plan gets community forums only. Pro plan gets email support with 24-hour response time. Enterprise plan gets 24/7 phone and email support with 1-hour response time.
Billing accepts credit card and PayPal for all plans. Wire transfer is available for Enterprise annual contracts. Annual billing gives a 20% discount on Pro and Enterprise plans.
All plans include SOC 2 Type II compliance. Enterprise plan adds HIPAA compliance, custom data residency, and a dedicated security review process.

LangGraph

from pathlib import Path
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions

# ChromaDB is a vector database that stores documents and their embeddings.
# PersistentClient saves the index to disk so agent scripts can query it later.
client = chromadb.PersistentClient(path="./acme_index")

# tell ChromaDB which embedding model to use for indexing and queries.
# without this, it silently downloads an ~80MB local model on first run.
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.environ["OPENAI_API_KEY"],
    model_name="text-embedding-3-small",
)

# a collection groups related documents — the embedding function above
# converts text to vectors automatically during add and query operations
collection = client.get_or_create_collection(
    "acme_docs", embedding_function=openai_ef,
)

# load documents from docs.txt — each non-empty line becomes one document.
# in production you'd load real files, DB rows, or API responses instead.
lines = [l for l in Path("docs.txt").read_text().splitlines()
         if l.strip() and not l.startswith("#")]

# upsert inserts new docs or updates existing ones (safe to re-run)
collection.upsert(
    documents=lines,
    ids=[f"doc_{i}" for i in range(len(lines))],
)
print(f"Indexed {len(lines)} documents")

# Demo: each line is indexed as a separate document.
# In production, load real documents from files, databases, or APIs.
Pro plan costs $49/month and includes priority support, API access, 100GB storage, and up to 10 team members.
Enterprise plan costs $199/month and includes dedicated account manager, unlimited storage, custom integrations, and premium onboarding.
Free plan includes 1 user, 1GB storage, community support, and 100 API calls per day with no SLA guarantee.
Refund policy: annual plans can be refunded within 30 days of purchase. Monthly plans are non-refundable but can be cancelled anytime with no penalty.
API rate limits: Free plan allows 100 requests/day, Pro plan allows 10,000 requests/day, Enterprise plan allows unlimited requests with priority routing.
SSO authentication is available on Enterprise plans only. We support SAML 2.0 and OpenID Connect with all major identity providers including Okta, Azure AD, and Google Workspace.
Uptime SLA: Pro plan guarantees 99.5% uptime. Enterprise plan guarantees 99.99% uptime with automatic service credits for any downtime below the SLA threshold.
Data is encrypted at rest using AES-256 and in transit using TLS 1.3. Backups run every 6 hours with 30-day retention on all paid plans.
Support: Free plan gets community forums only. Pro plan gets email support with 24-hour response time. Enterprise plan gets 24/7 phone and email support with 1-hour response time.
Billing accepts credit card and PayPal for all plans. Wire transfer is available for Enterprise annual contracts. Annual billing gives a 20% discount on Pro and Enterprise plans.
All plans include SOC 2 Type II compliance. Enterprise plan adds HIPAA compliance, custom data residency, and a dedicated security review process.

AI SDK

import { LocalIndex } from "vectra";
import { readFileSync } from "fs";
import OpenAI from "openai";

// Vectra is a local vector database that stores documents with their embeddings.
// Unlike ChromaDB (Python), Vectra requires you to compute embeddings yourself —
// we use OpenAI's embedding model for that.
const openai = new OpenAI();

// LocalIndex persists to disk so agent scripts can query it later
const index = new LocalIndex("./acme_index");
if (!(await index.isIndexCreated())) await index.createIndex();

// load documents from docs.txt — each non-empty line becomes one document.
// in production you'd load real files, DB rows, or API responses instead.
const lines = readFileSync("docs.txt", "utf-8")
  .split("\n")
  .filter((l) => l.trim() && !l.startsWith("#"));

// embed each document and store it in the index
for (const [i, doc] of lines.entries()) {
  const res = await openai.embeddings.create({
    model: "text-embedding-3-small",
    input: doc,
  });
  await index.insertItem({
    id: `doc_${i}`,
    metadata: { text: doc },
    vector: res.data[0].embedding,
  });
}
console.log(`Indexed ${lines.length} documents`);

# Demo: each line is indexed as a separate document.
# In production, load real documents from files, databases, or APIs.
Pro plan costs $49/month and includes priority support, API access, 100GB storage, and up to 10 team members.
Enterprise plan costs $199/month and includes dedicated account manager, unlimited storage, custom integrations, and premium onboarding.
Free plan includes 1 user, 1GB storage, community support, and 100 API calls per day with no SLA guarantee.
Refund policy: annual plans can be refunded within 30 days of purchase. Monthly plans are non-refundable but can be cancelled anytime with no penalty.
API rate limits: Free plan allows 100 requests/day, Pro plan allows 10,000 requests/day, Enterprise plan allows unlimited requests with priority routing.
SSO authentication is available on Enterprise plans only. We support SAML 2.0 and OpenID Connect with all major identity providers including Okta, Azure AD, and Google Workspace.
Uptime SLA: Pro plan guarantees 99.5% uptime. Enterprise plan guarantees 99.99% uptime with automatic service credits for any downtime below the SLA threshold.
Data is encrypted at rest using AES-256 and in transit using TLS 1.3. Backups run every 6 hours with 30-day retention on all paid plans.
Support: Free plan gets community forums only. Pro plan gets email support with 24-hour response time. Enterprise plan gets 24/7 phone and email support with 1-hour response time.
Billing accepts credit card and PayPal for all plans. Wire transfer is available for Enterprise annual contracts. Annual billing gives a 20% discount on Pro and Enterprise plans.
All plans include SOC 2 Type II compliance. Enterprise plan adds HIPAA compliance, custom data residency, and a dedicated security review process.

Mastra

import { LocalIndex } from "vectra";
import { readFileSync } from "fs";
import OpenAI from "openai";

// Vectra is a local vector database that stores documents with their embeddings.
// Unlike ChromaDB (Python), Vectra requires you to compute embeddings yourself —
// we use OpenAI's embedding model for that.
const openai = new OpenAI();

// LocalIndex persists to disk so agent scripts can query it later
const index = new LocalIndex("./acme_index");
if (!(await index.isIndexCreated())) await index.createIndex();

// load documents from docs.txt — each non-empty line becomes one document.
// in production you'd load real files, DB rows, or API responses instead.
const lines = readFileSync("docs.txt", "utf-8")
  .split("\n")
  .filter((l) => l.trim() && !l.startsWith("#"));

// embed each document and store it in the index
for (const [i, doc] of lines.entries()) {
  const res = await openai.embeddings.create({
    model: "text-embedding-3-small",
    input: doc,
  });
  await index.insertItem({
    id: `doc_${i}`,
    metadata: { text: doc },
    vector: res.data[0].embedding,
  });
}
console.log(`Indexed ${lines.length} documents`);

# Demo: each line is indexed as a separate document.
# In production, load real documents from files, databases, or APIs.
Pro plan costs $49/month and includes priority support, API access, 100GB storage, and up to 10 team members.
Enterprise plan costs $199/month and includes dedicated account manager, unlimited storage, custom integrations, and premium onboarding.
Free plan includes 1 user, 1GB storage, community support, and 100 API calls per day with no SLA guarantee.
Refund policy: annual plans can be refunded within 30 days of purchase. Monthly plans are non-refundable but can be cancelled anytime with no penalty.
API rate limits: Free plan allows 100 requests/day, Pro plan allows 10,000 requests/day, Enterprise plan allows unlimited requests with priority routing.
SSO authentication is available on Enterprise plans only. We support SAML 2.0 and OpenID Connect with all major identity providers including Okta, Azure AD, and Google Workspace.
Uptime SLA: Pro plan guarantees 99.5% uptime. Enterprise plan guarantees 99.99% uptime with automatic service credits for any downtime below the SLA threshold.
Data is encrypted at rest using AES-256 and in transit using TLS 1.3. Backups run every 6 hours with 30-day retention on all paid plans.
Support: Free plan gets community forums only. Pro plan gets email support with 24-hour response time. Enterprise plan gets 24/7 phone and email support with 1-hour response time.
Billing accepts credit card and PayPal for all plans. Wire transfer is available for Enterprise annual contracts. Annual billing gives a 20% discount on Pro and Enterprise plans.
All plans include SOC 2 Type II compliance. Enterprise plan adds HIPAA compliance, custom data residency, and a dedicated security review process.

Similarity Search

OpenAI

import json
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from openai import OpenAI

LLM_MODEL = "gpt-5.4"
client = OpenAI()

# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.environ["OPENAI_API_KEY"],
    model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)

def search_docs(query: str) -> str:
    """Search the knowledge base for relevant documents."""
    print(f"-> call: search_docs({query!r})")
    results = collection.query(query_texts=[query], n_results=3)
    docs = results["documents"][0]
    print(f"-> result: {len(docs)} docs found")
    return "\n\n".join(docs)

tools = [{
    "type": "function",
    "name": "search_docs",
    "description": "Search the knowledge base for relevant documents.",
    "parameters": {
        "type": "object",
        "properties": {
            "query": {"type": "string"},
        },
        "required": ["query"],
    },
}]

input = [{"role": "user", "content": "What's included in the Pro plan?"}]

# step 1: LLM calls the search tool
response = client.responses.create(
    model=LLM_MODEL, input=input, tools=tools,
)
tool_call = next(i for i in response.output if i.type == "function_call")
result = search_docs(**json.loads(tool_call.arguments))

# step 2: send results back, LLM generates answer
input += response.output
input.append({
    "type": "function_call_output",
    "call_id": tool_call.call_id,
    "output": result,
})

response = client.responses.create(
    model=LLM_MODEL, input=input, tools=tools,
)
print(response.output_text)
# -> call: search_docs('Pro plan features')
# -> result: 3 docs found
# "The Pro plan costs $49/month and includes priority support,
#  API access, 100GB storage, and up to 10 team members."

Anthropic

import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
import anthropic

LLM_MODEL = "claude-opus-4-6"
client = anthropic.Anthropic()

# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.environ["OPENAI_API_KEY"],
    model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)

def search_docs(query: str) -> str:
    """Search the knowledge base for relevant documents."""
    print(f"-> call: search_docs({query!r})")
    results = collection.query(query_texts=[query], n_results=3)
    docs = results["documents"][0]
    print(f"-> result: {len(docs)} docs found")
    return "\n\n".join(docs)

tools = [{
    "name": "search_docs",
    "description": "Search the knowledge base for relevant documents.",
    "input_schema": {
        "type": "object",
        "properties": {
            "query": {"type": "string"},
        },
        "required": ["query"],
    },
}]

messages = [{"role": "user", "content": "What's included in the Pro plan?"}]

# step 1: LLM calls the search tool
response = client.messages.create(
    model=LLM_MODEL, max_tokens=1024, tools=tools, messages=messages,
)
tool_block = next(b for b in response.content if b.type == "tool_use")
result = search_docs(**tool_block.input)

# step 2: send results back, LLM generates answer
messages.append({"role": "assistant", "content": response.content})
messages.append({"role": "user", "content": [{
    "type": "tool_result",
    "tool_use_id": tool_block.id,
    "content": result,
}]})

response = client.messages.create(
    model=LLM_MODEL, max_tokens=1024, tools=tools, messages=messages,
)
print(response.content[0].text)
# -> call: search_docs('Pro plan features')
# -> result: 3 docs found
# "The Pro plan costs $49/month and includes priority support,
#  API access, 100GB storage, and up to 10 team members."

Gemini

import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from google import genai
from google.genai import types

LLM_MODEL = "gemini-pro-latest"
client = genai.Client()

# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.environ["OPENAI_API_KEY"],
    model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)

def search_docs(query: str) -> str:
    """Search the knowledge base for relevant documents."""
    print(f"-> call: search_docs({query!r})")
    results = collection.query(query_texts=[query], n_results=3)
    docs = results["documents"][0]
    print(f"-> result: {len(docs)} docs found")
    return "\n\n".join(docs)

# automatic function calling: SDK executes the tool and feeds results back
config = types.GenerateContentConfig(tools=[search_docs])

response = client.models.generate_content(
    model=LLM_MODEL,
    config=config,
    contents="What's included in the Pro plan?",
)
print(response.text)
# -> call: search_docs('Pro plan features')
# -> result: 3 docs found
# "The Pro plan costs $49/month and includes priority support,
#  API access, 100GB storage, and up to 10 team members."

Pydantic AI

import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from pydantic_ai import Agent

LLM_MODEL = "openai:gpt-5.4"

# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.environ["OPENAI_API_KEY"],
    model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)

agent = Agent(LLM_MODEL)

@agent.tool_plain
def search_docs(query: str) -> str:
    """Search the knowledge base for relevant documents."""
    print(f"-> call: search_docs({query!r})")
    results = collection.query(query_texts=[query], n_results=3)
    docs = results["documents"][0]
    print(f"-> result: {len(docs)} docs found")
    return "\n\n".join(docs)

result = agent.run_sync("What's included in the Pro plan?")
print(result.output)
# -> call: search_docs('Pro plan features')
# -> result: 3 docs found
# "The Pro plan costs $49/month and includes priority support,
#  API access, 100GB storage, and up to 10 team members."

LangGraph

import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from langchain.tools import tool
from langchain.agents import create_agent
from langchain_openai import ChatOpenAI

LLM_MODEL = "gpt-5.4"
model = ChatOpenAI(model=LLM_MODEL)

# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.environ["OPENAI_API_KEY"],
    model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)

@tool
def search_docs(query: str) -> str:
    """Search the knowledge base for relevant documents."""
    print(f"-> call: search_docs({query!r})")
    results = collection.query(query_texts=[query], n_results=3)
    docs = results["documents"][0]
    print(f"-> result: {len(docs)} docs found")
    return "\n\n".join(docs)

agent = create_agent(model, [search_docs])
result = agent.invoke({
    "messages": [("user", "What's included in the Pro plan?")]
})
print(result["messages"][-1].content)
# -> call: search_docs('Pro plan features')
# -> result: 3 docs found
# "The Pro plan costs $49/month and includes priority support,
#  API access, 100GB storage, and up to 10 team members."

AI SDK

import { ToolLoopAgent, tool } from "ai";
import { openai } from "@ai-sdk/openai";
import { LocalIndex } from "vectra";
import OpenAI from "openai";
import { z } from "zod";

const LLM_MODEL = "gpt-5.4";

// connect to indexed documents — see Setup scenario
const index = new LocalIndex("./acme_index");
const embeddingClient = new OpenAI();

async function embed(text: string) {
  const r = await embeddingClient.embeddings.create({
    model: "text-embedding-3-small",
    input: text,
  });
  return r.data[0].embedding;
}

const searchDocs = tool({
  description: "Search the knowledge base for relevant documents",
  inputSchema: z.object({ query: z.string() }),
  execute: async ({ query }) => {
    console.log(`-> call: searchDocs(${JSON.stringify(query)})`);
    const results = await index.queryItems(await embed(query), 3);
    const docs = results.map((r) => r.item.metadata.text);
    console.log(`-> result: ${docs.length} docs found`);
    return docs.join("\n\n");
  },
});

const agent = new ToolLoopAgent({
  model: openai(LLM_MODEL),
  tools: { searchDocs },
});

const result = await agent.generate({
  prompt: "What's included in the Pro plan?",
});
console.log(result.text);
// -> call: searchDocs("Pro plan features")
// -> result: 3 docs found
// "The Pro plan costs $49/month and includes priority support,
//  API access, 100GB storage, and up to 10 team members."

Mastra

import { Agent } from "@mastra/core/agent";
import { createTool } from "@mastra/core/tools";
import { LocalIndex } from "vectra";
import OpenAI from "openai";
import { z } from "zod";

const LLM_MODEL = "openai/gpt-5.4";

// connect to indexed documents — see Setup scenario
const index = new LocalIndex("./acme_index");
const embeddingClient = new OpenAI();

async function embed(text: string) {
  const r = await embeddingClient.embeddings.create({
    model: "text-embedding-3-small",
    input: text,
  });
  return r.data[0].embedding;
}

const searchDocs = createTool({
  id: "search-docs",
  description: "Search the knowledge base for relevant documents",
  inputSchema: z.object({ query: z.string() }),
  execute: async ({ query }) => {
    console.log(`-> call: searchDocs(${JSON.stringify(query)})`);
    const results = await index.queryItems(await embed(query), 3);
    const docs = results.map((r) => r.item.metadata.text);
    console.log(`-> result: ${docs.length} docs found`);
    return docs.join("\n\n");
  },
});

const agent = new Agent({
  name: "rag-agent",
  instructions: "You are a helpful assistant.",
  model: LLM_MODEL,
  tools: { searchDocs },
});

const result = await agent.generate("What's included in the Pro plan?");
console.log(result.text);
// -> call: searchDocs("Pro plan features")
// -> result: 3 docs found
// "The Pro plan costs $49/month and includes priority support,
//  API access, 100GB storage, and up to 10 team members."

Keyword Search

OpenAI

import json
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from openai import OpenAI

LLM_MODEL = "gpt-5.4"
client = OpenAI()

# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.environ["OPENAI_API_KEY"],
    model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)

# the tool schema shapes retrieval: list[str] keywords instead of a free-text query
# the description tells the LLM to decompose questions into specific search terms

def search_by_keywords(keywords: list[str]) -> str:
    """Search the knowledge base using specific keywords.
    Use precise terms, not full questions."""
    print(f"-> call: search_by_keywords({keywords})")
    query = " ".join(keywords)
    results = collection.query(query_texts=[query], n_results=3)
    docs = results["documents"][0]
    print(f"-> result: {len(docs)} docs found")
    return "\n\n".join(docs)

tools = [{
    "type": "function",
    "name": "search_by_keywords",
    "description": "Search the knowledge base using specific keywords. Use precise terms, not full questions.",
    "parameters": {
        "type": "object",
        "properties": {
            "keywords": {
                "type": "array",
                "items": {"type": "string"},
                "description": "Specific search keywords",
            },
        },
        "required": ["keywords"],
    },
}]

input = [{
    "role": "user",
    "content": "Can I get a refund if I cancel my annual Pro plan after a month?",
}]

# step 1: LLM decomposes question into keywords and calls the tool
response = client.responses.create(
    model=LLM_MODEL, input=input, tools=tools,
)
# handle all tool calls — the LLM may emit more than one
input += response.output
for item in response.output:
    if item.type == "function_call":
        result = search_by_keywords(**json.loads(item.arguments))
        input.append({
            "type": "function_call_output",
            "call_id": item.call_id,
            "output": result,
        })

# step 2: send results back, LLM generates answer
response = client.responses.create(
    model=LLM_MODEL, input=input, tools=tools,
)
print(response.output_text)
# -> call: search_by_keywords(['refund', 'cancel', 'annual', 'Pro'])
# -> result: 3 docs found
# "Yes — annual plans can be refunded within 30 days of purchase."

Anthropic

import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
import anthropic

LLM_MODEL = "claude-opus-4-6"
client = anthropic.Anthropic()

# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.environ["OPENAI_API_KEY"],
    model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)

# the tool schema shapes retrieval: list[str] keywords instead of a free-text query
# the description tells the LLM to decompose questions into specific search terms

def search_by_keywords(keywords: list[str]) -> str:
    """Search the knowledge base using specific keywords.
    Use precise terms, not full questions."""
    print(f"-> call: search_by_keywords({keywords})")
    query = " ".join(keywords)
    results = collection.query(query_texts=[query], n_results=3)
    docs = results["documents"][0]
    print(f"-> result: {len(docs)} docs found")
    return "\n\n".join(docs)

tools = [{
    "name": "search_by_keywords",
    "description": "Search the knowledge base using specific keywords. Use precise terms, not full questions.",
    "input_schema": {
        "type": "object",
        "properties": {
            "keywords": {
                "type": "array",
                "items": {"type": "string"},
                "description": "Specific search keywords",
            },
        },
        "required": ["keywords"],
    },
}]

messages = [{
    "role": "user",
    "content": "Can I get a refund if I cancel my annual Pro plan after a month?",
}]

# step 1: LLM decomposes question into keywords and calls the tool
response = client.messages.create(
    model=LLM_MODEL, max_tokens=1024, tools=tools, messages=messages,
)
# handle all tool_use blocks — the LLM may emit more than one
messages.append({"role": "assistant", "content": response.content})
tool_results = []
for block in response.content:
    if block.type == "tool_use":
        result = search_by_keywords(**block.input)
        tool_results.append({
            "type": "tool_result",
            "tool_use_id": block.id,
            "content": result,
        })
messages.append({"role": "user", "content": tool_results})

# step 2: send results back, LLM generates answer
response = client.messages.create(
    model=LLM_MODEL, max_tokens=1024, tools=tools, messages=messages,
)
print(response.content[0].text)
# -> call: search_by_keywords(['refund', 'cancel', 'annual', 'Pro'])
# -> result: 3 docs found
# "Yes — annual plans can be refunded within 30 days of purchase."

Gemini

import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from google import genai
from google.genai import types

LLM_MODEL = "gemini-pro-latest"
client = genai.Client()

# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.environ["OPENAI_API_KEY"],
    model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)

# the tool schema shapes retrieval: list[str] keywords instead of a free-text query
# the description tells the LLM to decompose questions into specific search terms

def search_by_keywords(keywords: list[str]) -> str:
    """Search the knowledge base using specific keywords.
    Use precise terms, not full questions."""
    print(f"-> call: search_by_keywords({keywords})")
    query = " ".join(keywords)
    results = collection.query(query_texts=[query], n_results=3)
    docs = results["documents"][0]
    print(f"-> result: {len(docs)} docs found")
    return "\n\n".join(docs)

# automatic function calling: SDK executes the tool and feeds results back
config = types.GenerateContentConfig(tools=[search_by_keywords])

response = client.models.generate_content(
    model=LLM_MODEL,
    config=config,
    contents="Can I get a refund if I cancel my annual Pro plan after a month?",
)
print(response.text)
# -> call: search_by_keywords(['refund', 'cancel', 'annual', 'Pro'])
# -> result: 3 docs found
# "Yes — annual plans can be refunded within 30 days of purchase."

Pydantic AI

import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from pydantic_ai import Agent

LLM_MODEL = "openai:gpt-5.4"

# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.environ["OPENAI_API_KEY"],
    model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)

agent = Agent(LLM_MODEL)

# the tool schema shapes retrieval: list[str] keywords instead of a free-text query
# the description tells the LLM to decompose questions into specific search terms

@agent.tool_plain
def search_by_keywords(keywords: list[str]) -> str:
    """Search the knowledge base using specific keywords.
    Use precise terms, not full questions."""
    print(f"-> call: search_by_keywords({keywords})")
    query = " ".join(keywords)
    results = collection.query(query_texts=[query], n_results=3)
    docs = results["documents"][0]
    print(f"-> result: {len(docs)} docs found")
    return "\n\n".join(docs)

result = agent.run_sync(
    "Can I get a refund if I cancel my annual Pro plan after a month?",
)
print(result.output)
# -> call: search_by_keywords(['refund', 'cancel', 'annual', 'Pro'])
# -> result: 3 docs found
# "Yes — annual plans can be refunded within 30 days of purchase."

LangGraph

import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from langchain.tools import tool
from langchain.agents import create_agent
from langchain_openai import ChatOpenAI

LLM_MODEL = "gpt-5.4"
model = ChatOpenAI(model=LLM_MODEL)

# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.environ["OPENAI_API_KEY"],
    model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)

# the tool schema shapes retrieval: list[str] keywords instead of a free-text query
# the description tells the LLM to decompose questions into specific search terms

@tool
def search_by_keywords(keywords: list[str]) -> str:
    """Search the knowledge base using specific keywords.
    Use precise terms, not full questions."""
    print(f"-> call: search_by_keywords({keywords})")
    query = " ".join(keywords)
    results = collection.query(query_texts=[query], n_results=3)
    docs = results["documents"][0]
    print(f"-> result: {len(docs)} docs found")
    return "\n\n".join(docs)

agent = create_agent(model, [search_by_keywords])
result = agent.invoke({
    "messages": [(
        "user",
        "Can I get a refund if I cancel my annual Pro plan after a month?",
    )]
})
print(result["messages"][-1].content)
# -> call: search_by_keywords(['refund', 'cancel', 'annual', 'Pro'])
# -> result: 3 docs found
# "Yes — annual plans can be refunded within 30 days of purchase."

AI SDK

import { ToolLoopAgent, tool } from "ai";
import { openai } from "@ai-sdk/openai";
import { LocalIndex } from "vectra";
import OpenAI from "openai";
import { z } from "zod";

const LLM_MODEL = "gpt-5.4";

// connect to indexed documents — see Setup scenario
const index = new LocalIndex("./acme_index");
const embeddingClient = new OpenAI();

async function embed(text: string) {
  const r = await embeddingClient.embeddings.create({
    model: "text-embedding-3-small",
    input: text,
  });
  return r.data[0].embedding;
}

// the tool schema shapes retrieval: string[] keywords instead of a free-text query
// the description tells the LLM to decompose questions into specific search terms

const searchByKeywords = tool({
  description:
    "Search the knowledge base using specific keywords. Use precise terms, not full questions.",
  inputSchema: z.object({
    keywords: z.array(z.string()).describe("Specific search keywords"),
  }),
  execute: async ({ keywords }) => {
    console.log(`-> call: searchByKeywords(${JSON.stringify(keywords)})`);
    const results = await index.queryItems(
      await embed(keywords.join(" ")),
      3,
    );
    const docs = results.map((r) => r.item.metadata.text);
    console.log(`-> result: ${docs.length} docs found`);
    return docs.join("\n\n");
  },
});

const agent = new ToolLoopAgent({
  model: openai(LLM_MODEL),
  tools: { searchByKeywords },
});

const result = await agent.generate({
  prompt: "Can I get a refund if I cancel my annual Pro plan after a month?",
});
console.log(result.text);
// -> call: searchByKeywords(["refund", "cancel", "annual", "Pro"])
// -> result: 3 docs found
// "Yes — annual plans can be refunded within 30 days of purchase."

Mastra

import { Agent } from "@mastra/core/agent";
import { createTool } from "@mastra/core/tools";
import { LocalIndex } from "vectra";
import OpenAI from "openai";
import { z } from "zod";

const LLM_MODEL = "openai/gpt-5.4";

// connect to indexed documents — see Setup scenario
const index = new LocalIndex("./acme_index");
const embeddingClient = new OpenAI();

async function embed(text: string) {
  const r = await embeddingClient.embeddings.create({
    model: "text-embedding-3-small",
    input: text,
  });
  return r.data[0].embedding;
}

// the tool schema shapes retrieval: string[] keywords instead of a free-text query
// the description tells the LLM to decompose questions into specific search terms

const searchByKeywords = createTool({
  id: "search-by-keywords",
  description:
    "Search the knowledge base using specific keywords. Use precise terms, not full questions.",
  inputSchema: z.object({
    keywords: z.array(z.string()).describe("Specific search keywords"),
  }),
  execute: async ({ keywords }) => {
    console.log(`-> call: searchByKeywords(${JSON.stringify(keywords)})`);
    const results = await index.queryItems(
      await embed(keywords.join(" ")),
      3,
    );
    const docs = results.map((r) => r.item.metadata.text);
    console.log(`-> result: ${docs.length} docs found`);
    return docs.join("\n\n");
  },
});

const agent = new Agent({
  name: "rag-agent",
  instructions: "You are a helpful assistant.",
  model: LLM_MODEL,
  tools: { searchByKeywords },
});

const result = await agent.generate(
  "Can I get a refund if I cancel my annual Pro plan after a month?",
);
console.log(result.text);
// -> call: searchByKeywords(["refund", "cancel", "annual", "Pro"])
// -> result: 3 docs found
// "Yes — annual plans can be refunded within 30 days of purchase."

Agentic RAG

OpenAI

import json
import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from openai import OpenAI

LLM_MODEL = "gpt-5.4"
client = OpenAI()

# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.environ["OPENAI_API_KEY"],
    model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)

def search_docs(query: str) -> str:
    """Search the knowledge base for relevant documents."""
    print(f"-> call: search_docs({query!r})")
    results = collection.query(query_texts=[query], n_results=3)
    docs = results["documents"][0]
    print(f"-> result: {len(docs)} docs found")
    return "\n\n".join(docs)

tools = [{
    "type": "function",
    "name": "search_docs",
    "description": "Search the knowledge base for relevant documents.",
    "parameters": {
        "type": "object",
        "properties": {
            "query": {"type": "string"},
        },
        "required": ["query"],
    },
}]

input = [
    {"role": "developer", "content": (
        "You are a support agent. Search the knowledge base to answer questions. "
        "If results don't fully answer the question, search again with different terms."
    )},
    {"role": "user", "content": (
        "I'm choosing between Pro and Enterprise. I need SSO and at least "
        "99.9% uptime. Which plan should I pick and what's the price difference?"
    )},
]

# agentic loop: LLM searches multiple times until it has enough info
while True:
    response = client.responses.create(
        model=LLM_MODEL, input=input, tools=tools,
    )
    tool_calls = [i for i in response.output if i.type == "function_call"]
    if not tool_calls:
        break
    input += response.output
    for tc in tool_calls:
        result = search_docs(**json.loads(tc.arguments))
        input.append({
            "type": "function_call_output",
            "call_id": tc.call_id,
            "output": result,
        })

print(response.output_text)
# -> call: search_docs('SSO uptime Pro Enterprise')
# -> result: 3 docs found
# -> call: search_docs('Pro Enterprise pricing comparison')
# -> result: 3 docs found
# "Based on your requirements ... Enterprise plan at $199/month."

Anthropic

import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
import anthropic

LLM_MODEL = "claude-opus-4-6"
client = anthropic.Anthropic()

# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.environ["OPENAI_API_KEY"],
    model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)

def search_docs(query: str) -> str:
    """Search the knowledge base for relevant documents."""
    print(f"-> call: search_docs({query!r})")
    results = collection.query(query_texts=[query], n_results=3)
    docs = results["documents"][0]
    print(f"-> result: {len(docs)} docs found")
    return "\n\n".join(docs)

tools = [{
    "name": "search_docs",
    "description": "Search the knowledge base for relevant documents.",
    "input_schema": {
        "type": "object",
        "properties": {
            "query": {"type": "string"},
        },
        "required": ["query"],
    },
}]

system = (
    "You are a support agent. Search the knowledge base to answer questions. "
    "If results don't fully answer the question, search again with different terms."
)

messages = [{"role": "user", "content": (
    "I'm choosing between Pro and Enterprise. I need SSO and at least "
    "99.9% uptime. Which plan should I pick and what's the price difference?"
)}]

# agentic loop: LLM searches multiple times until it has enough info
while True:
    response = client.messages.create(
        model=LLM_MODEL, max_tokens=1024,
        system=system, tools=tools, messages=messages,
    )
    if response.stop_reason != "tool_use":
        break
    messages.append({"role": "assistant", "content": response.content})
    tool_results = []
    for block in response.content:
        if block.type == "tool_use":
            result = search_docs(**block.input)
            tool_results.append({
                "type": "tool_result",
                "tool_use_id": block.id,
                "content": result,
            })
    messages.append({"role": "user", "content": tool_results})

print(response.content[0].text)
# -> call: search_docs('SSO uptime Pro Enterprise')
# -> result: 3 docs found
# -> call: search_docs('Pro Enterprise pricing comparison')
# -> result: 3 docs found
# "Based on your requirements ... Enterprise plan at $199/month."

Gemini

import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from google import genai
from google.genai import types

LLM_MODEL = "gemini-pro-latest"
client = genai.Client()

# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.environ["OPENAI_API_KEY"],
    model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)

def search_docs(query: str) -> str:
    """Search the knowledge base for relevant documents."""
    print(f"-> call: search_docs({query!r})")
    results = collection.query(query_texts=[query], n_results=3)
    docs = results["documents"][0]
    print(f"-> result: {len(docs)} docs found")
    return "\n\n".join(docs)

# automatic function calling: SDK runs the multi-step ReAct loop
config = types.GenerateContentConfig(
    tools=[search_docs],
    system_instruction=(
        "You are a support agent. Search the knowledge base to answer questions. "
        "If results don't fully answer the question, search again with different terms."
    ),
)

response = client.models.generate_content(
    model=LLM_MODEL,
    config=config,
    contents=(
        "I'm choosing between Pro and Enterprise. I need SSO and at least "
        "99.9% uptime. Which plan should I pick and what's the price difference?"
    ),
)
print(response.text)
# -> call: search_docs('SSO uptime Pro Enterprise')
# -> result: 3 docs found
# -> call: search_docs('Pro Enterprise pricing comparison')
# -> result: 3 docs found
# "Based on your requirements ... Enterprise plan at $199/month."

Pydantic AI

import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from pydantic_ai import Agent

LLM_MODEL = "openai:gpt-5.4"

# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.environ["OPENAI_API_KEY"],
    model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)

agent = Agent(
    LLM_MODEL,
    instructions=(
        "You are a support agent. Search the knowledge base to answer questions. "
        "If results don't fully answer the question, search again with different terms."
    ),
)

@agent.tool_plain
def search_docs(query: str) -> str:
    """Search the knowledge base for relevant documents."""
    print(f"-> call: search_docs({query!r})")
    results = collection.query(query_texts=[query], n_results=3)
    docs = results["documents"][0]
    print(f"-> result: {len(docs)} docs found")
    return "\n\n".join(docs)

# run_sync handles the multi-step loop automatically
result = agent.run_sync(
    "I'm choosing between Pro and Enterprise. I need SSO and at least "
    "99.9% uptime. Which plan should I pick and what's the price difference?",
)
print(result.output)
# -> call: search_docs('SSO uptime Pro Enterprise')
# -> result: 3 docs found
# -> call: search_docs('Pro Enterprise pricing comparison')
# -> result: 3 docs found
# "Based on your requirements ... Enterprise plan at $199/month."

LangGraph

import os
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from langchain.tools import tool
from langchain.agents import create_agent
from langchain_openai import ChatOpenAI
from langchain_core.messages import SystemMessage

LLM_MODEL = "gpt-5.4"
model = ChatOpenAI(model=LLM_MODEL)

# connect to indexed documents — see Setup scenario
db = chromadb.PersistentClient(path="./acme_index")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.environ["OPENAI_API_KEY"],
    model_name="text-embedding-3-small",
)
collection = db.get_collection("acme_docs", embedding_function=openai_ef)

@tool
def search_docs(query: str) -> str:
    """Search the knowledge base for relevant documents."""
    print(f"-> call: search_docs({query!r})")
    results = collection.query(query_texts=[query], n_results=3)
    docs = results["documents"][0]
    print(f"-> result: {len(docs)} docs found")
    return "\n\n".join(docs)

# invoke handles the multi-step loop automatically
agent = create_agent(model, [search_docs])
result = agent.invoke({
    "messages": [
        SystemMessage(content=(
            "You are a support agent. Search the knowledge base to answer questions. "
            "If results don't fully answer the question, search again with different terms."
        )),
        ("user", (
            "I'm choosing between Pro and Enterprise. I need SSO and at least "
            "99.9% uptime. Which plan should I pick and what's the price difference?"
        )),
    ]
})
print(result["messages"][-1].content)
# -> call: search_docs('SSO uptime Pro Enterprise')
# -> result: 3 docs found
# -> call: search_docs('Pro Enterprise pricing comparison')
# -> result: 3 docs found
# "Based on your requirements ... Enterprise plan at $199/month."

AI SDK

import { ToolLoopAgent, tool } from "ai";
import { openai } from "@ai-sdk/openai";
import { LocalIndex } from "vectra";
import OpenAI from "openai";
import { z } from "zod";

const LLM_MODEL = "gpt-5.4";

// connect to indexed documents — see Setup scenario
const index = new LocalIndex("./acme_index");
const embeddingClient = new OpenAI();

async function embed(text: string) {
  const r = await embeddingClient.embeddings.create({
    model: "text-embedding-3-small",
    input: text,
  });
  return r.data[0].embedding;
}

const searchDocs = tool({
  description: "Search the knowledge base for relevant documents",
  inputSchema: z.object({ query: z.string() }),
  execute: async ({ query }) => {
    console.log(`-> call: searchDocs(${JSON.stringify(query)})`);
    const results = await index.queryItems(await embed(query), 3);
    const docs = results.map((r) => r.item.metadata.text);
    console.log(`-> result: ${docs.length} docs found`);
    return docs.join("\n\n");
  },
});

// ToolLoopAgent handles the multi-step loop automatically
const agent = new ToolLoopAgent({
  model: openai(LLM_MODEL),
  system: `\
You are a support agent. Search the knowledge base to answer questions.
If results don't fully answer the question, search again with different terms.
`,
  tools: { searchDocs },
});

const result = await agent.generate({
  prompt: `\
I'm choosing between Pro and Enterprise. I need SSO and at least
99.9% uptime. Which plan should I pick and what's the price difference?
`,
});
console.log(result.text);
// -> call: searchDocs("SSO uptime Pro Enterprise")
// -> result: 3 docs found
// -> call: searchDocs("Pro Enterprise pricing comparison")
// -> result: 3 docs found
// "Based on your requirements ... Enterprise plan at $199/month."

Mastra

import { Agent } from "@mastra/core/agent";
import { createTool } from "@mastra/core/tools";
import { LocalIndex } from "vectra";
import OpenAI from "openai";
import { z } from "zod";

const LLM_MODEL = "openai/gpt-5.4";

// connect to indexed documents — see Setup scenario
const index = new LocalIndex("./acme_index");
const embeddingClient = new OpenAI();

async function embed(text: string) {
  const r = await embeddingClient.embeddings.create({
    model: "text-embedding-3-small",
    input: text,
  });
  return r.data[0].embedding;
}

const searchDocs = createTool({
  id: "search-docs",
  description: "Search the knowledge base for relevant documents",
  inputSchema: z.object({ query: z.string() }),
  execute: async ({ query }) => {
    console.log(`-> call: searchDocs(${JSON.stringify(query)})`);
    const results = await index.queryItems(await embed(query), 3);
    const docs = results.map((r) => r.item.metadata.text);
    console.log(`-> result: ${docs.length} docs found`);
    return docs.join("\n\n");
  },
});

const agent = new Agent({
  name: "rag-agent",
  instructions: `\
You are a support agent. Search the knowledge base to answer questions.
If results don't fully answer the question, search again with different terms.
`,
  model: LLM_MODEL,
  tools: { searchDocs },
});

// maxSteps controls the ReAct loop iterations (default: 1)
// without maxSteps > 1, Mastra won't loop back after a tool call
const result = await agent.generate(
  `\
I'm choosing between Pro and Enterprise. I need SSO and at least
99.9% uptime. Which plan should I pick and what's the price difference?
`,
  { maxSteps: 5 },
);
console.log(result.text);
// -> call: searchDocs("SSO uptime Pro Enterprise")
// -> result: 3 docs found
// -> call: searchDocs("Pro Enterprise pricing comparison")
// -> result: 3 docs found
// "Based on your requirements ... Enterprise plan at $199/month."