Context

Select frameworks to compare

Pick one or more frameworks from the bar above

Runtime Context

OpenAI

import json
from openai import OpenAI

LLM_MODEL = "gpt-5.4"
client = OpenAI()

# mock database — in production these are real queries
ORDERS = {
    "user_123": [
        {"id": "ORD_99", "item": "Laptop Stand", "status": "shipped"},
        {"id": "ORD_100", "item": "USB Hub", "status": "processing"},
    ],
}

# status is chosen by the LLM — user_id comes from your app (auth, session, etc.)
# only status appears in the tool schema — the LLM can't see or fabricate user_id
user_id = "user_123"

def get_orders(status: str) -> str:
    """Get orders filtered by status."""
    print(f"-> call: get_orders({status}) for {user_id}")
    matches = [o for o in ORDERS[user_id] if o["status"] == status]
    result = ", ".join(f'{o["id"]}: {o["item"]}' for o in matches) or "No orders found."
    print(f"-> result: {result}")
    return result

tools = [{
    "type": "function",
    "name": "get_orders",
    "description": "Get orders filtered by status.",
    "parameters": {
        "type": "object",
        "properties": {
            "status": {"type": "string"},
        },
        "required": ["status"],
    },
}]

input = [{"role": "user", "content": "Do I have any shipped orders?"}]

# step 1: LLM decides to call the tool
response = client.responses.create(
    model=LLM_MODEL, input=input, tools=tools,
)
tool_call = next(i for i in response.output if i.type == "function_call")
result = get_orders(**json.loads(tool_call.arguments))

# step 2: send tool result back, LLM generates final response
input += response.output
input.append({
    "type": "function_call_output",
    "call_id": tool_call.call_id,
    "output": result,
})

response = client.responses.create(
    model=LLM_MODEL, input=input, tools=tools,
)
print(response.output_text)
# -> call: get_orders(shipped) for user_123
# -> result: ORD_99: Laptop Stand
# "Your shipped order is ORD_99: Laptop Stand."

Anthropic

import anthropic

LLM_MODEL = "claude-opus-4-6"
client = anthropic.Anthropic()

# mock database — in production these are real queries
ORDERS = {
    "user_123": [
        {"id": "ORD_99", "item": "Laptop Stand", "status": "shipped"},
        {"id": "ORD_100", "item": "USB Hub", "status": "processing"},
    ],
}

# status is chosen by the LLM — user_id comes from your app (auth, session, etc.)
# only status appears in the tool schema — the LLM can't see or fabricate user_id
user_id = "user_123"

def get_orders(status: str) -> str:
    """Get orders filtered by status."""
    print(f"-> call: get_orders({status}) for {user_id}")
    matches = [o for o in ORDERS[user_id] if o["status"] == status]
    result = ", ".join(f'{o["id"]}: {o["item"]}' for o in matches) or "No orders found."
    print(f"-> result: {result}")
    return result

tools = [{
    "name": "get_orders",
    "description": "Get orders filtered by status.",
    "input_schema": {
        "type": "object",
        "properties": {
            "status": {"type": "string"},
        },
        "required": ["status"],
    },
}]

messages = [{"role": "user", "content": "Do I have any shipped orders?"}]

# step 1: LLM decides to call the tool
response = client.messages.create(
    model=LLM_MODEL, max_tokens=1024, tools=tools, messages=messages,
)
tool_block = next(b for b in response.content if b.type == "tool_use")
result = get_orders(**tool_block.input)

# step 2: send tool result back, LLM generates final response
messages.append({"role": "assistant", "content": response.content})
messages.append({"role": "user", "content": [{
    "type": "tool_result",
    "tool_use_id": tool_block.id,
    "content": result,
}]})

response = client.messages.create(
    model=LLM_MODEL, max_tokens=1024, tools=tools, messages=messages,
)
print(response.content[0].text)
# -> call: get_orders(shipped) for user_123
# -> result: ORD_99: Laptop Stand
# "Your shipped order is ORD_99: Laptop Stand."

Gemini

from google import genai
from google.genai import types

LLM_MODEL = "gemini-pro-latest"
client = genai.Client()

# mock database — in production these are real queries
ORDERS = {
    "user_123": [
        {"id": "ORD_99", "item": "Laptop Stand", "status": "shipped"},
        {"id": "ORD_100", "item": "USB Hub", "status": "processing"},
    ],
}

# status is chosen by the LLM — user_id comes from your app (auth, session, etc.)
# only status appears in the tool schema — the LLM can't see or fabricate user_id
user_id = "user_123"

def get_orders(status: str) -> str:
    """Get orders filtered by status."""
    print(f"-> call: get_orders({status}) for {user_id}")
    matches = [o for o in ORDERS[user_id] if o["status"] == status]
    result = ", ".join(f'{o["id"]}: {o["item"]}' for o in matches) or "No orders found."
    print(f"-> result: {result}")
    return result

# automatic function calling: SDK executes the tool and feeds results back
config = types.GenerateContentConfig(tools=[get_orders])

response = client.models.generate_content(
    model=LLM_MODEL,
    config=config,
    contents="Do I have any shipped orders?",
)
print(response.text)
# -> call: get_orders(shipped) for user_123
# -> result: ORD_99: Laptop Stand
# "Your shipped order is ORD_99: Laptop Stand."

Pydantic AI

from dataclasses import dataclass
from pydantic_ai import Agent, RunContext

LLM_MODEL = "openai:gpt-5.4"

# mock database — in production these are real queries
ORDERS = {
    "user_123": [
        {"id": "ORD_99", "item": "Laptop Stand", "status": "shipped"},
        {"id": "ORD_100", "item": "USB Hub", "status": "processing"},
    ],
}

@dataclass
class Deps:
    user_id: str

agent = Agent(LLM_MODEL, deps_type=Deps)

# status is chosen by the LLM — user_id comes from your app (auth, session, etc.)
# RunContext is hidden from the LLM — it can't see or fabricate user_id
@agent.tool
def get_orders(ctx: RunContext[Deps], status: str) -> str:
    """Get orders filtered by status."""
    print(f"-> call: get_orders({status}) for {ctx.deps.user_id}")
    matches = [o for o in ORDERS[ctx.deps.user_id] if o["status"] == status]
    result = ", ".join(f'{o["id"]}: {o["item"]}' for o in matches) or "No orders found."
    print(f"-> result: {result}")
    return result

result = agent.run_sync(
    "Do I have any shipped orders?",
    deps=Deps(user_id="user_123"),
)
print(result.output)
# -> call: get_orders(shipped) for user_123
# -> result: ORD_99: Laptop Stand
# "Your shipped order is ORD_99: Laptop Stand."

LangGraph

from dataclasses import dataclass
from langchain.tools import tool, ToolRuntime
from langchain.agents import create_agent
from langchain_openai import ChatOpenAI

LLM_MODEL = "gpt-5.4"
model = ChatOpenAI(model=LLM_MODEL)

# mock database — in production these are real queries
ORDERS = {
    "user_123": [
        {"id": "ORD_99", "item": "Laptop Stand", "status": "shipped"},
        {"id": "ORD_100", "item": "USB Hub", "status": "processing"},
    ],
}

@dataclass
class UserContext:
    user_id: str

# status is chosen by the LLM — user_id comes from your app (auth, session, etc.)
# ToolRuntime is hidden from the LLM — it can't see or fabricate user_id
@tool
def get_orders(status: str, runtime: ToolRuntime) -> str:
    """Get orders filtered by status."""
    user_id = runtime.context.user_id
    print(f"-> call: get_orders({status}) for {user_id}")
    matches = [o for o in ORDERS[user_id] if o["status"] == status]
    result = ", ".join(f'{o["id"]}: {o["item"]}' for o in matches) or "No orders found."
    print(f"-> result: {result}")
    return result

agent = create_agent(model, [get_orders], context_schema=UserContext)
result = agent.invoke(
    {"messages": [("user", "Do I have any shipped orders?")]},
    context=UserContext(user_id="user_123"),
)
print(result["messages"][-1].content)
# -> call: get_orders(shipped) for user_123
# -> result: ORD_99: Laptop Stand
# "Your shipped order is ORD_99: Laptop Stand."

AI SDK

import { ToolLoopAgent, tool } from "ai";
import { openai } from "@ai-sdk/openai";
import { z } from "zod";

const LLM_MODEL = "gpt-5.4";

// mock database — in production these are real queries
const ORDERS: Record<string, { id: string; item: string; status: string }[]> = {
  user_123: [
    { id: "ORD_99", item: "Laptop Stand", status: "shipped" },
    { id: "ORD_100", item: "USB Hub", status: "processing" },
  ],
};

// status is chosen by the LLM — userId comes from your app (auth, session, etc.)
// only status appears in the tool schema — the LLM can't see or fabricate userId
function createTools(userId: string) {
  const getOrders = tool({
    description: "Get orders filtered by status.",
    inputSchema: z.object({ status: z.string() }),
    execute: async ({ status }) => {
      console.log(`-> call: get_orders(${status}) for ${userId}`);
      const matches = ORDERS[userId].filter((o) => o.status === status);
      const result = matches.map((o) => `${o.id}: ${o.item}`).join(", ") || "No orders found.";
      console.log(`-> result: ${result}`);
      return result;
    },
  });
  return { getOrders };
}

const tools = createTools("user_123");

const agent = new ToolLoopAgent({
  model: openai(LLM_MODEL),
  tools,
});

const result = await agent.generate({
  prompt: "Do I have any shipped orders?",
});
console.log(result.text);
// -> call: get_orders(shipped) for user_123
// -> result: ORD_99: Laptop Stand
// "Your shipped order is ORD_99: Laptop Stand."

Mastra

import { Agent } from "@mastra/core/agent";
import { createTool } from "@mastra/core/tools";
import { RequestContext } from "@mastra/core/request-context";
import { z } from "zod";

const LLM_MODEL = "openai/gpt-5.4";

// mock database — in production these are real queries
const ORDERS: Record<string, { id: string; item: string; status: string }[]> = {
  user_123: [
    { id: "ORD_99", item: "Laptop Stand", status: "shipped" },
    { id: "ORD_100", item: "USB Hub", status: "processing" },
  ],
};

// status is chosen by the LLM — userId comes from your app (auth, session, etc.)
// requestContext is hidden from the LLM — it can't see or fabricate userId
const getOrders = createTool({
  id: "get-orders",
  description: "Get orders filtered by status.",
  inputSchema: z.object({ status: z.string() }),
  requestContextSchema: z.object({ userId: z.string() }),
  execute: async ({ status }, { requestContext }) => {
    const userId = requestContext.get("userId");
    console.log(`-> call: get_orders(${status}) for ${userId}`);
    const matches = ORDERS[userId].filter((o) => o.status === status);
    const result = matches.map((o) => `${o.id}: ${o.item}`).join(", ") || "No orders found.";
    console.log(`-> result: ${result}`);
    return result;
  },
});

const agent = new Agent({
  name: "order-agent",
  instructions: "You are a helpful assistant.",
  model: LLM_MODEL,
  tools: { getOrders },
});

const requestContext = new RequestContext([["userId", "user_123"]]);

const result = await agent.generate("Do I have any shipped orders?", {
  requestContext,
});
console.log(result.text);
// -> call: get_orders(shipped) for user_123
// -> result: ORD_99: Laptop Stand
// "Your shipped order is ORD_99: Laptop Stand."

Dynamic Instructions

OpenAI

from openai import OpenAI

LLM_MODEL = "gpt-5.4"
client = OpenAI()

# mock database — in production these are real queries
CUSTOMERS = {
    "user_123": {"name": "Acme Corp", "plan": "enterprise"},
    "user_456": {"name": "Jane Smith", "plan": "free"},
}
OVERDUE_INVOICES = {
    "user_123": [{"id": "INV-42", "amount": 1200}],
    "user_456": [],
}

# instructions are built at request time from live data — not a static string
# the LLM sees personalized context without querying the database itself
def build_instructions(user_id: str) -> str:
    customer = CUSTOMERS[user_id]
    overdue = OVERDUE_INVOICES[user_id]
    lines = [f"Customer: {customer['name']}, plan: {customer['plan']}."]
    if overdue:
        lines.append(
            f"ALERT: {len(overdue)} overdue invoice(s). Prioritize payment resolution."
        )
    if customer["plan"] == "enterprise":
        lines.append("This is a premium customer. Offer direct escalation.")
    return "\n".join(lines)

# same agent, same question — behavior changes based on who's asking
response = client.responses.create(
    model=LLM_MODEL,
    instructions=build_instructions("user_123"),
    input="I need help with my account.",
)
print(response.output_text)
# "I see there's an overdue invoice on your account. Let me help
#  resolve that. As a premium customer, I can escalate directly."

response = client.responses.create(
    model=LLM_MODEL,
    instructions=build_instructions("user_456"),
    input="I need help with my account.",
)
print(response.output_text)
# "Sure, I'd be happy to help! What do you need assistance with?"

Anthropic

import anthropic

LLM_MODEL = "claude-opus-4-6"
client = anthropic.Anthropic()

# mock database — in production these are real queries
CUSTOMERS = {
    "user_123": {"name": "Acme Corp", "plan": "enterprise"},
    "user_456": {"name": "Jane Smith", "plan": "free"},
}
OVERDUE_INVOICES = {
    "user_123": [{"id": "INV-42", "amount": 1200}],
    "user_456": [],
}

# instructions are built at request time from live data — not a static string
# the LLM sees personalized context without querying the database itself
def build_instructions(user_id: str) -> str:
    customer = CUSTOMERS[user_id]
    overdue = OVERDUE_INVOICES[user_id]
    lines = [f"Customer: {customer['name']}, plan: {customer['plan']}."]
    if overdue:
        lines.append(
            f"ALERT: {len(overdue)} overdue invoice(s). Prioritize payment resolution."
        )
    if customer["plan"] == "enterprise":
        lines.append("This is a premium customer. Offer direct escalation.")
    return "\n".join(lines)

# same agent, same question — behavior changes based on who's asking
response = client.messages.create(
    model=LLM_MODEL,
    max_tokens=1024,
    system=build_instructions("user_123"),
    messages=[{"role": "user", "content": "I need help with my account."}],
)
print(response.content[0].text)
# "I see there's an overdue invoice on your account. Let me help
#  resolve that. As a premium customer, I can escalate directly."

response = client.messages.create(
    model=LLM_MODEL,
    max_tokens=1024,
    system=build_instructions("user_456"),
    messages=[{"role": "user", "content": "I need help with my account."}],
)
print(response.content[0].text)
# "Sure, I'd be happy to help! What do you need assistance with?"

Gemini

from google import genai
from google.genai import types

LLM_MODEL = "gemini-pro-latest"
client = genai.Client()

# mock database — in production these are real queries
CUSTOMERS = {
    "user_123": {"name": "Acme Corp", "plan": "enterprise"},
    "user_456": {"name": "Jane Smith", "plan": "free"},
}
OVERDUE_INVOICES = {
    "user_123": [{"id": "INV-42", "amount": 1200}],
    "user_456": [],
}

# instructions are built at request time from live data — not a static string
# the LLM sees personalized context without querying the database itself
def build_instructions(user_id: str) -> str:
    customer = CUSTOMERS[user_id]
    overdue = OVERDUE_INVOICES[user_id]
    lines = [f"Customer: {customer['name']}, plan: {customer['plan']}."]
    if overdue:
        lines.append(
            f"ALERT: {len(overdue)} overdue invoice(s). Prioritize payment resolution."
        )
    if customer["plan"] == "enterprise":
        lines.append("This is a premium customer. Offer direct escalation.")
    return "\n".join(lines)

# same agent, same question — behavior changes based on who's asking
response = client.models.generate_content(
    model=LLM_MODEL,
    config=types.GenerateContentConfig(
        system_instruction=build_instructions("user_123"),
    ),
    contents="I need help with my account.",
)
print(response.text)
# "I see there's an overdue invoice on your account. Let me help
#  resolve that. As a premium customer, I can escalate directly."

response = client.models.generate_content(
    model=LLM_MODEL,
    config=types.GenerateContentConfig(
        system_instruction=build_instructions("user_456"),
    ),
    contents="I need help with my account.",
)
print(response.text)
# "Sure, I'd be happy to help! What do you need assistance with?"

Pydantic AI

from dataclasses import dataclass
from pydantic_ai import Agent, RunContext

LLM_MODEL = "openai:gpt-5.4"

# mock database — in production these are real queries
CUSTOMERS = {
    "user_123": {"name": "Acme Corp", "plan": "enterprise"},
    "user_456": {"name": "Jane Smith", "plan": "free"},
}
OVERDUE_INVOICES = {
    "user_123": [{"id": "INV-42", "amount": 1200}],
    "user_456": [],
}

@dataclass
class Deps:
    user_id: str

agent = Agent(LLM_MODEL, deps_type=Deps)

# instructions are built at request time from live data — not a static string
# the LLM sees personalized context without querying the database itself
@agent.instructions
def dynamic_instructions(ctx: RunContext[Deps]) -> str:
    customer = CUSTOMERS[ctx.deps.user_id]
    overdue = OVERDUE_INVOICES[ctx.deps.user_id]
    lines = [f"Customer: {customer['name']}, plan: {customer['plan']}."]
    if overdue:
        lines.append(
            f"ALERT: {len(overdue)} overdue invoice(s). Prioritize payment resolution."
        )
    if customer["plan"] == "enterprise":
        lines.append("This is a premium customer. Offer direct escalation.")
    return "\n".join(lines)

# same agent, same question — behavior changes based on who's asking
result = agent.run_sync(
    "I need help with my account.",
    deps=Deps(user_id="user_123"),
)
print(result.output)
# "I see there's an overdue invoice on your account. Let me help
#  resolve that. As a premium customer, I can escalate directly."

result = agent.run_sync(
    "I need help with my account.",
    deps=Deps(user_id="user_456"),
)
print(result.output)
# "Sure, I'd be happy to help! What do you need assistance with?"

LangGraph

from dataclasses import dataclass
from langchain.agents import create_agent
from langchain.agents.middleware import dynamic_prompt
from langchain_openai import ChatOpenAI

LLM_MODEL = "gpt-5.4"
model = ChatOpenAI(model=LLM_MODEL)

# mock database — in production these are real queries
CUSTOMERS = {
    "user_123": {"name": "Acme Corp", "plan": "enterprise"},
    "user_456": {"name": "Jane Smith", "plan": "free"},
}
OVERDUE_INVOICES = {
    "user_123": [{"id": "INV-42", "amount": 1200}],
    "user_456": [],
}

@dataclass
class UserContext:
    user_id: str

# @dynamic_prompt middleware builds the system message from context at runtime
# the LLM sees personalized context without querying the database itself
@dynamic_prompt
def build_prompt(request):
    user_id = request.runtime.context.user_id
    customer = CUSTOMERS[user_id]
    overdue = OVERDUE_INVOICES[user_id]
    lines = [f"Customer: {customer['name']}, plan: {customer['plan']}."]
    if overdue:
        lines.append(
            f"ALERT: {len(overdue)} overdue invoice(s). Prioritize payment resolution."
        )
    if customer["plan"] == "enterprise":
        lines.append("This is a premium customer. Offer direct escalation.")
    return "\n".join(lines)

agent = create_agent(
    model, tools=[], middleware=[build_prompt], context_schema=UserContext,
)

# same agent, same question — behavior changes based on who's asking
result = agent.invoke(
    {"messages": [("user", "I need help with my account.")]},
    context=UserContext(user_id="user_123"),
)
print(result["messages"][-1].content)
# "I see there's an overdue invoice on your account. Let me help
#  resolve that. As a premium customer, I can escalate directly."

result = agent.invoke(
    {"messages": [("user", "I need help with my account.")]},
    context=UserContext(user_id="user_456"),
)
print(result["messages"][-1].content)
# "Sure, I'd be happy to help! What do you need assistance with?"

AI SDK

import { ToolLoopAgent } from "ai";
import { openai } from "@ai-sdk/openai";
import { z } from "zod";

const LLM_MODEL = "gpt-5.4";

// mock database — in production these are real queries
const CUSTOMERS: Record<string, { name: string; plan: string }> = {
  user_123: { name: "Acme Corp", plan: "enterprise" },
  user_456: { name: "Jane Smith", plan: "free" },
};
const OVERDUE_INVOICES: Record<string, { id: string; amount: number }[]> = {
  user_123: [{ id: "INV-42", amount: 1200 }],
  user_456: [],
};

function buildInstructions(userId: string): string {
  const customer = CUSTOMERS[userId];
  const overdue = OVERDUE_INVOICES[userId];
  const lines = [`Customer: ${customer.name}, plan: ${customer.plan}.`];
  if (overdue.length) {
    lines.push(
      `ALERT: ${overdue.length} overdue invoice(s). Prioritize payment resolution.`,
    );
  }
  if (customer.plan === "enterprise") {
    lines.push("This is a premium customer. Offer direct escalation.");
  }
  return lines.join("\n");
}

// callOptionsSchema defines per-request parameters — prepareCall builds the prompt
// the LLM sees personalized context without querying the database itself
const agent = new ToolLoopAgent({
  model: openai(LLM_MODEL),
  callOptionsSchema: z.object({
    userId: z.string(),
  }),
  prepareCall: async ({ options, ...settings }) => ({
    ...settings,
    instructions: buildInstructions(options.userId),
  }),
});

// same agent, same question — behavior changes based on who's asking
let result = await agent.generate({
  prompt: "I need help with my account.",
  options: { userId: "user_123" },
});
console.log(result.text);
// "I see there's an overdue invoice on your account. Let me help
//  resolve that. As a premium customer, I can escalate directly."

result = await agent.generate({
  prompt: "I need help with my account.",
  options: { userId: "user_456" },
});
console.log(result.text);
// "Sure, I'd be happy to help! What do you need assistance with?"

Mastra

import { Agent } from "@mastra/core/agent";
import { RequestContext } from "@mastra/core/request-context";

const LLM_MODEL = "openai/gpt-5.4";

// mock database — in production these are real queries
const CUSTOMERS: Record<string, { name: string; plan: string }> = {
  user_123: { name: "Acme Corp", plan: "enterprise" },
  user_456: { name: "Jane Smith", plan: "free" },
};
const OVERDUE_INVOICES: Record<string, { id: string; amount: number }[]> = {
  user_123: [{ id: "INV-42", amount: 1200 }],
  user_456: [],
};

// instructions function receives requestContext — not a static string
// the LLM sees personalized context without querying the database itself
const agent = new Agent({
  name: "support-agent",
  model: LLM_MODEL,
  instructions: ({ requestContext }) => {
    const userId = requestContext.get("userId");
    const customer = CUSTOMERS[userId];
    const overdue = OVERDUE_INVOICES[userId];
    const lines = [`Customer: ${customer.name}, plan: ${customer.plan}.`];
    if (overdue.length) {
      lines.push(
        `ALERT: ${overdue.length} overdue invoice(s). Prioritize payment resolution.`,
      );
    }
    if (customer.plan === "enterprise") {
      lines.push("This is a premium customer. Offer direct escalation.");
    }
    return lines.join("\n");
  },
});

// same agent, same question — behavior changes based on who's asking
let requestContext = new RequestContext([["userId", "user_123"]]);
let result = await agent.generate("I need help with my account.", {
  requestContext,
});
console.log(result.text);
// "I see there's an overdue invoice on your account. Let me help
//  resolve that. As a premium customer, I can escalate directly."

requestContext = new RequestContext([["userId", "user_456"]]);
result = await agent.generate("I need help with my account.", {
  requestContext,
});
console.log(result.text);
// "Sure, I'd be happy to help! What do you need assistance with?"

Prompt Caching

OpenAI

from pathlib import Path
from openai import OpenAI

LLM_MODEL = "gpt-5.4"
client = OpenAI()

# prompt caching is automatic — no opt-in, no code changes
# any prefix >= 1024 tokens is cached on first request, reused on subsequent ones
# static content (instructions, examples) should go first for best hit rate

KNOWLEDGE_BASE = Path("knowledge_base.txt").read_text()  # ~4100 tokens

# request 1: cold cache — prompt is processed and cached automatically
r1 = client.responses.create(
    model=LLM_MODEL,
    instructions=KNOWLEDGE_BASE,
    input="I keep getting 429 errors. What should I do?",
)
print(r1.output_text)
cached_1 = r1.usage.input_tokens_details.cached_tokens
print(f"Cached tokens: {cached_1}")
# -> Cached tokens: 0 (cache miss — prefix is now stored)

# request 2: warm cache — identical instruction prefix served from cache
r2 = client.responses.create(
    model=LLM_MODEL,
    instructions=KNOWLEDGE_BASE,
    input="How do I fix SSO login failures?",
)
print(r2.output_text)
cached_2 = r2.usage.input_tokens_details.cached_tokens
print(f"Cached tokens: {cached_2}")
# -> Cached tokens: 3328 (cache hit — lower cost, lower latency)

You are a senior support agent for Acme Cloud Platform.

Product tiers:
- Starter: 10 GB storage, 100 API calls/day, email support, single region,
  shared infrastructure, community forums access, basic monitoring dashboard.
- Professional: 100 GB storage, 10,000 API calls/day, priority email + chat,
  multi-region replication, 99.9% SLA, custom domains, advanced analytics,
  team collaboration (up to 25 seats), webhook integrations, staging environments.
- Enterprise: unlimited storage, unlimited API calls, 24/7 phone support,
  dedicated account manager, SOC2/HIPAA compliance, custom SLA, SSO/SAML,
  on-premise deployment option, audit logging, data residency controls,
  custom integrations, priority feature requests, quarterly business reviews.

Billing policies:
- Monthly billing on the 1st. Annual contracts get 20% discount.
- Overages billed at 1.5x the per-unit rate. Overage alerts at 80% and 95%.
- Downgrades take effect next billing cycle. Upgrades are immediate with prorated credit.
- Refunds within 14 days of charge, minus any usage above the free tier.
- Enterprise contracts require 60-day written cancellation notice.
- Payment methods: credit card, ACH transfer, wire transfer (Enterprise only).
- Invoices available in PDF format from the billing dashboard.
- Tax exemption requires W-9 or equivalent on file before the billing cycle.

API reference:
- Base URL: https://api.acme.example.com/v2
- Authentication: Bearer token in Authorization header.
- Rate limits: per-tier (see above), with burst allowance of 2x for 30 seconds.
- Pagination: cursor-based, max 100 items per page, use `next_cursor` param.
- Versioning: date-based (e.g., `2025-01-15`), set via `Acme-Version` header.
- SDKs available: Python (`pip install acme-sdk`), Node.js (`npm install @acme/sdk`),
  Go (`go get github.com/acme/sdk-go`), Java (Maven Central: `com.acme:sdk`).
- Webhook events: `resource.created`, `resource.updated`, `resource.deleted`,
  `invoice.paid`, `invoice.overdue`, `usage.threshold`, `deployment.status`.
- Idempotency: supply `Idempotency-Key` header for safe retries on POST/PUT.

API endpoints:
- Projects: POST /v2/projects (create), GET /v2/projects (list),
  GET /v2/projects/:id, PATCH /v2/projects/:id (update),
  DELETE /v2/projects/:id. Supports filtering by `status`
  (active, archived, suspended) and `created_after` timestamp.
- Resources: POST /v2/projects/:id/resources,
  GET /v2/projects/:id/resources, GET /v2/resources/:id,
  PATCH /v2/resources/:id, DELETE /v2/resources/:id.
  Resource types: compute, storage, database, cache, queue, function.
  Each resource has `status` (provisioning, running, stopped, error, deleting).
- Deployments: POST /v2/projects/:id/deployments (create from config or Git ref),
  GET /v2/projects/:id/deployments, GET /v2/deployments/:id,
  POST /v2/deployments/:id/rollback (revert to previous version).
  Deployment strategies: rolling (default), blue-green, canary (Enterprise only).
- API Keys: POST /v2/api-keys (create with scopes), GET /v2/api-keys (list),
  DELETE /v2/api-keys/:id (revoke), POST /v2/api-keys/:id/rotate.
  Scopes: read, write, admin, billing. Keys can be restricted to specific projects.
- Webhooks: POST /v2/webhooks (register endpoint), GET /v2/webhooks,
  PATCH /v2/webhooks/:id (update URL or events), DELETE /v2/webhooks/:id,
  POST /v2/webhooks/:id/test (send test payload).
  Each webhook has a signing secret for payload verification.
- Usage: GET /v2/usage/summary (current period), GET /v2/usage/history,
  GET /v2/usage/breakdown (by resource type). Supports `granularity` param:
  hourly, daily, monthly. Data retained for 13 months.
- Audit logs: GET /v2/audit-logs (filterable by actor, action, resource, date).
  Actions tracked: resource.create, resource.delete, config.update, key.rotate,
  member.invite, member.remove, permission.change, billing.update.

Error codes:
- 400 Bad Request: malformed JSON, missing required fields, invalid param values.
  Response includes `errors` array with field-level details and `error_code`.
- 401 Unauthorized: missing or invalid API key. Check `Authorization` header.
  If key was recently rotated, allow 30 seconds for propagation.
- 403 Forbidden: valid key but insufficient scopes. Check key scopes in dashboard.
  For project-scoped keys, verify the resource belongs to an authorized project.
- 404 Not Found: resource does not exist or caller lacks access. Acme returns 404
  (not 403) for resources the caller cannot access to prevent leaking existence.
- 409 Conflict: resource name collision or concurrent modification detected.
  Retry with updated `If-Match` ETag header for optimistic concurrency.
- 422 Unprocessable Entity: valid JSON but semantically invalid (e.g., enabling
  Enterprise features on a Starter plan). Response includes `reason` field.
- 429 Too Many Requests: rate limit exceeded. Check `Retry-After` header for
  wait time. `X-RateLimit-Limit` and `X-RateLimit-Remaining` headers show quota.
  Implement exponential backoff with jitter for retries.
- 500 Internal Server Error: unexpected failure. Include `X-Request-Id` header
  when contacting support. Safe to retry for idempotent requests.
- 502 Bad Gateway: upstream dependency failure. Usually transient, retry after
  5-10 seconds. If persistent, check status.acme.example.com.
- 503 Service Unavailable: planned maintenance or capacity limits. `Retry-After`
  header indicates expected recovery time.

Integration patterns:
- Webhook setup: register endpoint via API or dashboard, implement HMAC-SHA256
  signature verification using webhook signing secret, return 200 within 30s,
  handle duplicate deliveries idempotently (use `X-Acme-Delivery-Id`).
  Failed deliveries retry at 1m, 5m, 30m, 1h intervals.
  Dead letter queue available on Enterprise tier.
- OAuth integration: Acme supports OAuth 2.0 authorization code flow.
  Register app in developer portal for client_id and client_secret.
  Authorization URL: https://auth.acme.example.com/oauth/authorize.
  Token URL: https://auth.acme.example.com/oauth/token.
  Scopes: read:projects, write:projects, read:resources, write:resources,
  read:billing, admin. Tokens expire in 1 hour, refresh tokens in 30 days.
- CI/CD integration: use `ACME_API_KEY` environment variable in pipelines.
  GitHub Actions: `github.com/acme/deploy-action`.
  GitLab CI: include `https://templates.acme.example.com/gitlab-ci.yml`.
  Trigger deployments via `POST /v2/projects/:id/deployments` with `git_ref`.
- Terraform provider: `terraform-provider-acme` on Terraform Registry.
  Supports all resource types. State locking via Acme backend.
  Import existing resources: `terraform import acme_resource.name resource-id`.
- SDK quickstart: install SDK, set `ACME_API_KEY` env var, initialize client.
  Python: `from acme import AcmeClient; client = AcmeClient()`.
  Node.js: `import { AcmeClient } from '@acme/sdk'; const client = new AcmeClient()`.
  All SDKs support automatic retry, request logging, and custom HTTP injection.

Account management:
- Team members: invite via email, assign roles (viewer, editor, admin, owner).
  Viewer: read-only. Editor: create/modify resources. Admin: manage team,
  API keys, billing. Owner: full access including deletion and transfer.
- SSO configuration: Settings > Security > SSO. Supported providers: Okta,
  Azure AD, Google Workspace, OneLogin, PingFederate, custom SAML 2.0.
  SCIM provisioning for automatic user sync (Enterprise only).
  JIT provisioning creates accounts on first SSO login.
- API key management: keys are project-scoped by default. Global keys for admins.
  Rotation generates new secret, old secret valid for 24 hours.
  Key analytics in dashboard: last used, request count, error rate.
- Account deletion: requires owner role, 30-day grace period, all resources
  must be stopped and data exports completed.

Data management:
- Backups: automatic daily snapshots retained for 30 days (Professional) or
  90 days (Enterprise). Manual snapshots via `POST /v2/resources/:id/snapshots`.
  Cross-region replication available on Enterprise.
  Restore: `POST /v2/resources/:id/restore` with `snapshot_id`.
- Data export: full account export via dashboard (Settings > Data > Export).
  Formats: JSON, CSV, Parquet. Large exports processed asynchronously.
  API: `POST /v2/exports` to start, `GET /v2/exports/:id` to check status.
- Data retention: active data retained while account is active. Deleted resources
  purged after 30 days. Audit logs: 90 days (Professional), 1 year (Enterprise).
  Usage data: 13 months. Backups: per tier retention policy.
- Data residency: regions: us-east-1, us-west-2, eu-west-1, eu-central-1,
  ap-southeast-1, ap-northeast-1. Enterprise can restrict data to specific
  regions for compliance. Cross-region replication configurable per resource.

Monitoring and alerting:
- Metrics: CPU utilization, memory usage, network I/O, disk I/O, request latency
  (p50, p95, p99), error rate, active connections. 1-minute granularity,
  retained 15 days (raw) and 13 months (aggregated hourly).
- Alerts: configure via dashboard or `POST /v2/alerts`. Conditions: threshold
  (above/below), anomaly detection (ML-based), rate of change. Channels: email,
  Slack, PagerDuty, OpsGenie, custom webhook. Cooldown: minimum 5 minutes.
- Dashboards: custom drag-and-drop widgets. Types: time series, gauge, table,
  log viewer, status map. Shareable via link or embeddable in external tools.
- Log management: structured JSON logs. Search via dashboard or `GET /v2/logs`.
  Forwarding: Datadog, Splunk, Elasticsearch, CloudWatch, custom HTTP endpoint.
  Retention: 7 days (Starter), 30 days (Professional), 90 days (Enterprise).
- Health checks: HTTP/TCP checks for compute resources. Configure path, interval
  (10s-300s), timeout (2s-30s), healthy/unhealthy thresholds. Auto-restart on fail.

Troubleshooting runbook:
- API 429 errors: check rate limits for current tier, suggest upgrade or implement
  exponential backoff with jitter. Check `X-RateLimit-Remaining` response header.
  If burst limit hit, wait 30 seconds. If sustained limit hit, batch requests.
- Data sync lag > 5 min: check region status page at status.acme.example.com,
  verify replication config in dashboard, then escalate to infra team via PagerDuty.
  Common cause: large batch imports exceeding ingestion throughput.
- SSO login failures: verify SAML metadata URL, check certificate expiry, confirm
  ACS URL matches dashboard config (must use HTTPS). Common fix: re-upload IdP
  metadata. If using OIDC, verify redirect URI whitelist and token endpoint.
  Check clock skew between IdP and SP (must be < 5 minutes).
- Storage quota warnings: recommend archival policy (auto-archive after 90 days),
  enable compression for text-heavy datasets, or upgrade tier. Check for orphaned
  uploads via `GET /v2/storage/orphaned` endpoint.
- Webhook delivery failures: verify endpoint returns 2xx within 30 seconds,
  check retry logs in dashboard (3 retries with exponential backoff over 1 hour).
  Verify TLS certificate is valid. Check `X-Acme-Signature` validation logic.
  Failed webhooks are stored for 72 hours and can be replayed from the dashboard.
- Database connection errors: check connection pool settings (recommended max 20
  per instance), verify VPC peering configuration, ensure security group allows
  inbound on port 5432. Connection timeout default is 10 seconds.
- Slow query performance: enable query profiling via dashboard, check for missing
  indexes using `EXPLAIN ANALYZE`, review connection pool saturation metrics.
- Deployment failures: check build logs in dashboard, verify Dockerfile or
  buildpack configuration, ensure resource limits are sufficient. Common causes:
  dependency resolution failures, port binding conflicts, health check timeouts.
- Certificate errors: verify custom domain DNS (CNAME or A record), check
  auto-renewal status. Acme uses Let's Encrypt for automatic provisioning.
  Manual certificate upload supported for Enterprise with custom CA.
- Memory pressure: review container memory limits vs actual usage in metrics.
  Enable profiling via `POST /v2/resources/:id/profile?type=memory`. Common
  causes: memory leaks, unbounded caches, large file processing without streaming.

Security policies:
- All data encrypted at rest (AES-256) and in transit (TLS 1.3).
- API keys can be scoped to specific resources and actions.
- IP allowlisting available on Professional and Enterprise tiers.
- MFA required for all admin accounts; optional for team members.
- Audit logs retained for 1 year (Enterprise) or 90 days (Professional).
- Penetration testing: customers may test their own instances with 7 days notice.
- Vulnerability disclosure: security@acme.example.com, PGP key on security page.
- Compliance: SOC2 Type II (annual), HIPAA BAA for Enterprise, GDPR DPA on
  request, ISO 27001 certified. Reports in Settings > Security > Reports.
- Network isolation: VPC peering, private endpoints for AWS/GCP/Azure.
  No public exposure for database resources.
- Secret management: encrypted vault, accessible via `GET /v2/projects/:id/secrets`.
  Injected as env vars at runtime. Versioning with rollback. Rotation reminders.

Escalation rules:
- Billing disputes over $500: escalate to finance@acme.example.com.
- Data loss reports: escalate immediately to incident commander, page on-call SRE.
- Security incidents: escalate to security@acme.example.com with severity rating.
- Legal/compliance requests: route to legal@acme.example.com.
- Feature requests: log in feedback tracker, do not promise timelines.
- SLA breach claims: collect timestamps and affected resources, route to SRE lead.
- Account compromise: immediately disable API keys, force password reset, escalate.

Response guidelines:
- Be concise and direct. Lead with the answer, then explain.
- Include relevant doc links: https://docs.acme.example.com/
- For code examples, use the customer's programming language if known.
- Never share internal pricing, cost structures, or roadmap details.
- Always confirm the customer's tier before recommending tier-specific features.
- For outage-related questions, reference status.acme.example.com for live updates.
- When suggesting config changes, warn about potential downtime or side effects.
- Use ticket number format ACM-XXXXX when referencing support cases.

Anthropic

from pathlib import Path
import anthropic

LLM_MODEL = "claude-opus-4-6"
client = anthropic.Anthropic()

# prompt caching requires explicit opt-in via cache_control breakpoints
# marks a prefix boundary — everything up to this point is cached
# 25% surcharge on cache writes, 90% discount on cache reads, 5-min TTL

KNOWLEDGE_BASE = Path("knowledge_base.txt").read_text()  # ~4100 tokens

# cache_control breakpoint on the system block — up to 4 breakpoints allowed
system = [{
    "type": "text",
    "text": KNOWLEDGE_BASE,
    "cache_control": {"type": "ephemeral"},  # 5-min TTL, refreshed on each hit
}]

# request 1: cache write — prompt stored, surcharge applies
r1 = client.messages.create(
    model=LLM_MODEL, max_tokens=1024, system=system,
    messages=[{"role": "user", "content": "I keep getting 429 errors. What should I do?"}],
)
print(r1.content[0].text)
print(f"Cache write: {r1.usage.cache_creation_input_tokens} tokens")
print(f"Cache read:  {r1.usage.cache_read_input_tokens} tokens")
# -> Cache write: 4182 tokens, Cache read: 0 tokens

# request 2: cache hit — prefix served from cache at 90% discount
r2 = client.messages.create(
    model=LLM_MODEL, max_tokens=1024, system=system,
    messages=[{"role": "user", "content": "How do I fix SSO login failures?"}],
)
print(r2.content[0].text)
print(f"Cache write: {r2.usage.cache_creation_input_tokens} tokens")
print(f"Cache read:  {r2.usage.cache_read_input_tokens} tokens")
# -> Cache write: 0 tokens, Cache read: 4182 tokens

You are a senior support agent for Acme Cloud Platform.

Product tiers:
- Starter: 10 GB storage, 100 API calls/day, email support, single region,
  shared infrastructure, community forums access, basic monitoring dashboard.
- Professional: 100 GB storage, 10,000 API calls/day, priority email + chat,
  multi-region replication, 99.9% SLA, custom domains, advanced analytics,
  team collaboration (up to 25 seats), webhook integrations, staging environments.
- Enterprise: unlimited storage, unlimited API calls, 24/7 phone support,
  dedicated account manager, SOC2/HIPAA compliance, custom SLA, SSO/SAML,
  on-premise deployment option, audit logging, data residency controls,
  custom integrations, priority feature requests, quarterly business reviews.

Billing policies:
- Monthly billing on the 1st. Annual contracts get 20% discount.
- Overages billed at 1.5x the per-unit rate. Overage alerts at 80% and 95%.
- Downgrades take effect next billing cycle. Upgrades are immediate with prorated credit.
- Refunds within 14 days of charge, minus any usage above the free tier.
- Enterprise contracts require 60-day written cancellation notice.
- Payment methods: credit card, ACH transfer, wire transfer (Enterprise only).
- Invoices available in PDF format from the billing dashboard.
- Tax exemption requires W-9 or equivalent on file before the billing cycle.

API reference:
- Base URL: https://api.acme.example.com/v2
- Authentication: Bearer token in Authorization header.
- Rate limits: per-tier (see above), with burst allowance of 2x for 30 seconds.
- Pagination: cursor-based, max 100 items per page, use `next_cursor` param.
- Versioning: date-based (e.g., `2025-01-15`), set via `Acme-Version` header.
- SDKs available: Python (`pip install acme-sdk`), Node.js (`npm install @acme/sdk`),
  Go (`go get github.com/acme/sdk-go`), Java (Maven Central: `com.acme:sdk`).
- Webhook events: `resource.created`, `resource.updated`, `resource.deleted`,
  `invoice.paid`, `invoice.overdue`, `usage.threshold`, `deployment.status`.
- Idempotency: supply `Idempotency-Key` header for safe retries on POST/PUT.

API endpoints:
- Projects: POST /v2/projects (create), GET /v2/projects (list),
  GET /v2/projects/:id, PATCH /v2/projects/:id (update),
  DELETE /v2/projects/:id. Supports filtering by `status`
  (active, archived, suspended) and `created_after` timestamp.
- Resources: POST /v2/projects/:id/resources,
  GET /v2/projects/:id/resources, GET /v2/resources/:id,
  PATCH /v2/resources/:id, DELETE /v2/resources/:id.
  Resource types: compute, storage, database, cache, queue, function.
  Each resource has `status` (provisioning, running, stopped, error, deleting).
- Deployments: POST /v2/projects/:id/deployments (create from config or Git ref),
  GET /v2/projects/:id/deployments, GET /v2/deployments/:id,
  POST /v2/deployments/:id/rollback (revert to previous version).
  Deployment strategies: rolling (default), blue-green, canary (Enterprise only).
- API Keys: POST /v2/api-keys (create with scopes), GET /v2/api-keys (list),
  DELETE /v2/api-keys/:id (revoke), POST /v2/api-keys/:id/rotate.
  Scopes: read, write, admin, billing. Keys can be restricted to specific projects.
- Webhooks: POST /v2/webhooks (register endpoint), GET /v2/webhooks,
  PATCH /v2/webhooks/:id (update URL or events), DELETE /v2/webhooks/:id,
  POST /v2/webhooks/:id/test (send test payload).
  Each webhook has a signing secret for payload verification.
- Usage: GET /v2/usage/summary (current period), GET /v2/usage/history,
  GET /v2/usage/breakdown (by resource type). Supports `granularity` param:
  hourly, daily, monthly. Data retained for 13 months.
- Audit logs: GET /v2/audit-logs (filterable by actor, action, resource, date).
  Actions tracked: resource.create, resource.delete, config.update, key.rotate,
  member.invite, member.remove, permission.change, billing.update.

Error codes:
- 400 Bad Request: malformed JSON, missing required fields, invalid param values.
  Response includes `errors` array with field-level details and `error_code`.
- 401 Unauthorized: missing or invalid API key. Check `Authorization` header.
  If key was recently rotated, allow 30 seconds for propagation.
- 403 Forbidden: valid key but insufficient scopes. Check key scopes in dashboard.
  For project-scoped keys, verify the resource belongs to an authorized project.
- 404 Not Found: resource does not exist or caller lacks access. Acme returns 404
  (not 403) for resources the caller cannot access to prevent leaking existence.
- 409 Conflict: resource name collision or concurrent modification detected.
  Retry with updated `If-Match` ETag header for optimistic concurrency.
- 422 Unprocessable Entity: valid JSON but semantically invalid (e.g., enabling
  Enterprise features on a Starter plan). Response includes `reason` field.
- 429 Too Many Requests: rate limit exceeded. Check `Retry-After` header for
  wait time. `X-RateLimit-Limit` and `X-RateLimit-Remaining` headers show quota.
  Implement exponential backoff with jitter for retries.
- 500 Internal Server Error: unexpected failure. Include `X-Request-Id` header
  when contacting support. Safe to retry for idempotent requests.
- 502 Bad Gateway: upstream dependency failure. Usually transient, retry after
  5-10 seconds. If persistent, check status.acme.example.com.
- 503 Service Unavailable: planned maintenance or capacity limits. `Retry-After`
  header indicates expected recovery time.

Integration patterns:
- Webhook setup: register endpoint via API or dashboard, implement HMAC-SHA256
  signature verification using webhook signing secret, return 200 within 30s,
  handle duplicate deliveries idempotently (use `X-Acme-Delivery-Id`).
  Failed deliveries retry at 1m, 5m, 30m, 1h intervals.
  Dead letter queue available on Enterprise tier.
- OAuth integration: Acme supports OAuth 2.0 authorization code flow.
  Register app in developer portal for client_id and client_secret.
  Authorization URL: https://auth.acme.example.com/oauth/authorize.
  Token URL: https://auth.acme.example.com/oauth/token.
  Scopes: read:projects, write:projects, read:resources, write:resources,
  read:billing, admin. Tokens expire in 1 hour, refresh tokens in 30 days.
- CI/CD integration: use `ACME_API_KEY` environment variable in pipelines.
  GitHub Actions: `github.com/acme/deploy-action`.
  GitLab CI: include `https://templates.acme.example.com/gitlab-ci.yml`.
  Trigger deployments via `POST /v2/projects/:id/deployments` with `git_ref`.
- Terraform provider: `terraform-provider-acme` on Terraform Registry.
  Supports all resource types. State locking via Acme backend.
  Import existing resources: `terraform import acme_resource.name resource-id`.
- SDK quickstart: install SDK, set `ACME_API_KEY` env var, initialize client.
  Python: `from acme import AcmeClient; client = AcmeClient()`.
  Node.js: `import { AcmeClient } from '@acme/sdk'; const client = new AcmeClient()`.
  All SDKs support automatic retry, request logging, and custom HTTP injection.

Account management:
- Team members: invite via email, assign roles (viewer, editor, admin, owner).
  Viewer: read-only. Editor: create/modify resources. Admin: manage team,
  API keys, billing. Owner: full access including deletion and transfer.
- SSO configuration: Settings > Security > SSO. Supported providers: Okta,
  Azure AD, Google Workspace, OneLogin, PingFederate, custom SAML 2.0.
  SCIM provisioning for automatic user sync (Enterprise only).
  JIT provisioning creates accounts on first SSO login.
- API key management: keys are project-scoped by default. Global keys for admins.
  Rotation generates new secret, old secret valid for 24 hours.
  Key analytics in dashboard: last used, request count, error rate.
- Account deletion: requires owner role, 30-day grace period, all resources
  must be stopped and data exports completed.

Data management:
- Backups: automatic daily snapshots retained for 30 days (Professional) or
  90 days (Enterprise). Manual snapshots via `POST /v2/resources/:id/snapshots`.
  Cross-region replication available on Enterprise.
  Restore: `POST /v2/resources/:id/restore` with `snapshot_id`.
- Data export: full account export via dashboard (Settings > Data > Export).
  Formats: JSON, CSV, Parquet. Large exports processed asynchronously.
  API: `POST /v2/exports` to start, `GET /v2/exports/:id` to check status.
- Data retention: active data retained while account is active. Deleted resources
  purged after 30 days. Audit logs: 90 days (Professional), 1 year (Enterprise).
  Usage data: 13 months. Backups: per tier retention policy.
- Data residency: regions: us-east-1, us-west-2, eu-west-1, eu-central-1,
  ap-southeast-1, ap-northeast-1. Enterprise can restrict data to specific
  regions for compliance. Cross-region replication configurable per resource.

Monitoring and alerting:
- Metrics: CPU utilization, memory usage, network I/O, disk I/O, request latency
  (p50, p95, p99), error rate, active connections. 1-minute granularity,
  retained 15 days (raw) and 13 months (aggregated hourly).
- Alerts: configure via dashboard or `POST /v2/alerts`. Conditions: threshold
  (above/below), anomaly detection (ML-based), rate of change. Channels: email,
  Slack, PagerDuty, OpsGenie, custom webhook. Cooldown: minimum 5 minutes.
- Dashboards: custom drag-and-drop widgets. Types: time series, gauge, table,
  log viewer, status map. Shareable via link or embeddable in external tools.
- Log management: structured JSON logs. Search via dashboard or `GET /v2/logs`.
  Forwarding: Datadog, Splunk, Elasticsearch, CloudWatch, custom HTTP endpoint.
  Retention: 7 days (Starter), 30 days (Professional), 90 days (Enterprise).
- Health checks: HTTP/TCP checks for compute resources. Configure path, interval
  (10s-300s), timeout (2s-30s), healthy/unhealthy thresholds. Auto-restart on fail.

Troubleshooting runbook:
- API 429 errors: check rate limits for current tier, suggest upgrade or implement
  exponential backoff with jitter. Check `X-RateLimit-Remaining` response header.
  If burst limit hit, wait 30 seconds. If sustained limit hit, batch requests.
- Data sync lag > 5 min: check region status page at status.acme.example.com,
  verify replication config in dashboard, then escalate to infra team via PagerDuty.
  Common cause: large batch imports exceeding ingestion throughput.
- SSO login failures: verify SAML metadata URL, check certificate expiry, confirm
  ACS URL matches dashboard config (must use HTTPS). Common fix: re-upload IdP
  metadata. If using OIDC, verify redirect URI whitelist and token endpoint.
  Check clock skew between IdP and SP (must be < 5 minutes).
- Storage quota warnings: recommend archival policy (auto-archive after 90 days),
  enable compression for text-heavy datasets, or upgrade tier. Check for orphaned
  uploads via `GET /v2/storage/orphaned` endpoint.
- Webhook delivery failures: verify endpoint returns 2xx within 30 seconds,
  check retry logs in dashboard (3 retries with exponential backoff over 1 hour).
  Verify TLS certificate is valid. Check `X-Acme-Signature` validation logic.
  Failed webhooks are stored for 72 hours and can be replayed from the dashboard.
- Database connection errors: check connection pool settings (recommended max 20
  per instance), verify VPC peering configuration, ensure security group allows
  inbound on port 5432. Connection timeout default is 10 seconds.
- Slow query performance: enable query profiling via dashboard, check for missing
  indexes using `EXPLAIN ANALYZE`, review connection pool saturation metrics.
- Deployment failures: check build logs in dashboard, verify Dockerfile or
  buildpack configuration, ensure resource limits are sufficient. Common causes:
  dependency resolution failures, port binding conflicts, health check timeouts.
- Certificate errors: verify custom domain DNS (CNAME or A record), check
  auto-renewal status. Acme uses Let's Encrypt for automatic provisioning.
  Manual certificate upload supported for Enterprise with custom CA.
- Memory pressure: review container memory limits vs actual usage in metrics.
  Enable profiling via `POST /v2/resources/:id/profile?type=memory`. Common
  causes: memory leaks, unbounded caches, large file processing without streaming.

Security policies:
- All data encrypted at rest (AES-256) and in transit (TLS 1.3).
- API keys can be scoped to specific resources and actions.
- IP allowlisting available on Professional and Enterprise tiers.
- MFA required for all admin accounts; optional for team members.
- Audit logs retained for 1 year (Enterprise) or 90 days (Professional).
- Penetration testing: customers may test their own instances with 7 days notice.
- Vulnerability disclosure: security@acme.example.com, PGP key on security page.
- Compliance: SOC2 Type II (annual), HIPAA BAA for Enterprise, GDPR DPA on
  request, ISO 27001 certified. Reports in Settings > Security > Reports.
- Network isolation: VPC peering, private endpoints for AWS/GCP/Azure.
  No public exposure for database resources.
- Secret management: encrypted vault, accessible via `GET /v2/projects/:id/secrets`.
  Injected as env vars at runtime. Versioning with rollback. Rotation reminders.

Escalation rules:
- Billing disputes over $500: escalate to finance@acme.example.com.
- Data loss reports: escalate immediately to incident commander, page on-call SRE.
- Security incidents: escalate to security@acme.example.com with severity rating.
- Legal/compliance requests: route to legal@acme.example.com.
- Feature requests: log in feedback tracker, do not promise timelines.
- SLA breach claims: collect timestamps and affected resources, route to SRE lead.
- Account compromise: immediately disable API keys, force password reset, escalate.

Response guidelines:
- Be concise and direct. Lead with the answer, then explain.
- Include relevant doc links: https://docs.acme.example.com/
- For code examples, use the customer's programming language if known.
- Never share internal pricing, cost structures, or roadmap details.
- Always confirm the customer's tier before recommending tier-specific features.
- For outage-related questions, reference status.acme.example.com for live updates.
- When suggesting config changes, warn about potential downtime or side effects.
- Use ticket number format ACM-XXXXX when referencing support cases.

Gemini

from pathlib import Path
from google import genai
from google.genai import types

LLM_MODEL = "gemini-pro-latest"
client = genai.Client()

# Gemini caching creates a named server-side resource with explicit lifecycle
# create once, reference across many requests, delete when done
# note: cached content must be >= 1024 tokens

KNOWLEDGE_BASE = Path("knowledge_base.txt").read_text()  # ~4100 tokens

# step 1: create a named cache with explicit TTL
cache = client.caches.create(
    model=LLM_MODEL,
    config=types.CreateCachedContentConfig(
        system_instruction=KNOWLEDGE_BASE,
        ttl="300s",  # 5-minute TTL — extend with client.caches.update()
    ),
)
print(f"Cache created: {cache.name}")

# step 2: reference the cache — no need to resend the prompt
r1 = client.models.generate_content(
    model=LLM_MODEL,
    contents="I keep getting 429 errors. What should I do?",
    config=types.GenerateContentConfig(cached_content=cache.name),
)
print(r1.text)
print(f"Cached tokens: {r1.usage_metadata.cached_content_token_count}")
# -> Cached tokens: 4182

# same cache, different question — cached tokens reused
r2 = client.models.generate_content(
    model=LLM_MODEL,
    contents="How do I fix SSO login failures?",
    config=types.GenerateContentConfig(cached_content=cache.name),
)
print(r2.text)
print(f"Cached tokens: {r2.usage_metadata.cached_content_token_count}")
# -> Cached tokens: 4182

# step 3: cleanup — delete when done, or let TTL expire
client.caches.delete(name=cache.name)

You are a senior support agent for Acme Cloud Platform.

Product tiers:
- Starter: 10 GB storage, 100 API calls/day, email support, single region,
  shared infrastructure, community forums access, basic monitoring dashboard.
- Professional: 100 GB storage, 10,000 API calls/day, priority email + chat,
  multi-region replication, 99.9% SLA, custom domains, advanced analytics,
  team collaboration (up to 25 seats), webhook integrations, staging environments.
- Enterprise: unlimited storage, unlimited API calls, 24/7 phone support,
  dedicated account manager, SOC2/HIPAA compliance, custom SLA, SSO/SAML,
  on-premise deployment option, audit logging, data residency controls,
  custom integrations, priority feature requests, quarterly business reviews.

Billing policies:
- Monthly billing on the 1st. Annual contracts get 20% discount.
- Overages billed at 1.5x the per-unit rate. Overage alerts at 80% and 95%.
- Downgrades take effect next billing cycle. Upgrades are immediate with prorated credit.
- Refunds within 14 days of charge, minus any usage above the free tier.
- Enterprise contracts require 60-day written cancellation notice.
- Payment methods: credit card, ACH transfer, wire transfer (Enterprise only).
- Invoices available in PDF format from the billing dashboard.
- Tax exemption requires W-9 or equivalent on file before the billing cycle.

API reference:
- Base URL: https://api.acme.example.com/v2
- Authentication: Bearer token in Authorization header.
- Rate limits: per-tier (see above), with burst allowance of 2x for 30 seconds.
- Pagination: cursor-based, max 100 items per page, use `next_cursor` param.
- Versioning: date-based (e.g., `2025-01-15`), set via `Acme-Version` header.
- SDKs available: Python (`pip install acme-sdk`), Node.js (`npm install @acme/sdk`),
  Go (`go get github.com/acme/sdk-go`), Java (Maven Central: `com.acme:sdk`).
- Webhook events: `resource.created`, `resource.updated`, `resource.deleted`,
  `invoice.paid`, `invoice.overdue`, `usage.threshold`, `deployment.status`.
- Idempotency: supply `Idempotency-Key` header for safe retries on POST/PUT.

API endpoints:
- Projects: POST /v2/projects (create), GET /v2/projects (list),
  GET /v2/projects/:id, PATCH /v2/projects/:id (update),
  DELETE /v2/projects/:id. Supports filtering by `status`
  (active, archived, suspended) and `created_after` timestamp.
- Resources: POST /v2/projects/:id/resources,
  GET /v2/projects/:id/resources, GET /v2/resources/:id,
  PATCH /v2/resources/:id, DELETE /v2/resources/:id.
  Resource types: compute, storage, database, cache, queue, function.
  Each resource has `status` (provisioning, running, stopped, error, deleting).
- Deployments: POST /v2/projects/:id/deployments (create from config or Git ref),
  GET /v2/projects/:id/deployments, GET /v2/deployments/:id,
  POST /v2/deployments/:id/rollback (revert to previous version).
  Deployment strategies: rolling (default), blue-green, canary (Enterprise only).
- API Keys: POST /v2/api-keys (create with scopes), GET /v2/api-keys (list),
  DELETE /v2/api-keys/:id (revoke), POST /v2/api-keys/:id/rotate.
  Scopes: read, write, admin, billing. Keys can be restricted to specific projects.
- Webhooks: POST /v2/webhooks (register endpoint), GET /v2/webhooks,
  PATCH /v2/webhooks/:id (update URL or events), DELETE /v2/webhooks/:id,
  POST /v2/webhooks/:id/test (send test payload).
  Each webhook has a signing secret for payload verification.
- Usage: GET /v2/usage/summary (current period), GET /v2/usage/history,
  GET /v2/usage/breakdown (by resource type). Supports `granularity` param:
  hourly, daily, monthly. Data retained for 13 months.
- Audit logs: GET /v2/audit-logs (filterable by actor, action, resource, date).
  Actions tracked: resource.create, resource.delete, config.update, key.rotate,
  member.invite, member.remove, permission.change, billing.update.

Error codes:
- 400 Bad Request: malformed JSON, missing required fields, invalid param values.
  Response includes `errors` array with field-level details and `error_code`.
- 401 Unauthorized: missing or invalid API key. Check `Authorization` header.
  If key was recently rotated, allow 30 seconds for propagation.
- 403 Forbidden: valid key but insufficient scopes. Check key scopes in dashboard.
  For project-scoped keys, verify the resource belongs to an authorized project.
- 404 Not Found: resource does not exist or caller lacks access. Acme returns 404
  (not 403) for resources the caller cannot access to prevent leaking existence.
- 409 Conflict: resource name collision or concurrent modification detected.
  Retry with updated `If-Match` ETag header for optimistic concurrency.
- 422 Unprocessable Entity: valid JSON but semantically invalid (e.g., enabling
  Enterprise features on a Starter plan). Response includes `reason` field.
- 429 Too Many Requests: rate limit exceeded. Check `Retry-After` header for
  wait time. `X-RateLimit-Limit` and `X-RateLimit-Remaining` headers show quota.
  Implement exponential backoff with jitter for retries.
- 500 Internal Server Error: unexpected failure. Include `X-Request-Id` header
  when contacting support. Safe to retry for idempotent requests.
- 502 Bad Gateway: upstream dependency failure. Usually transient, retry after
  5-10 seconds. If persistent, check status.acme.example.com.
- 503 Service Unavailable: planned maintenance or capacity limits. `Retry-After`
  header indicates expected recovery time.

Integration patterns:
- Webhook setup: register endpoint via API or dashboard, implement HMAC-SHA256
  signature verification using webhook signing secret, return 200 within 30s,
  handle duplicate deliveries idempotently (use `X-Acme-Delivery-Id`).
  Failed deliveries retry at 1m, 5m, 30m, 1h intervals.
  Dead letter queue available on Enterprise tier.
- OAuth integration: Acme supports OAuth 2.0 authorization code flow.
  Register app in developer portal for client_id and client_secret.
  Authorization URL: https://auth.acme.example.com/oauth/authorize.
  Token URL: https://auth.acme.example.com/oauth/token.
  Scopes: read:projects, write:projects, read:resources, write:resources,
  read:billing, admin. Tokens expire in 1 hour, refresh tokens in 30 days.
- CI/CD integration: use `ACME_API_KEY` environment variable in pipelines.
  GitHub Actions: `github.com/acme/deploy-action`.
  GitLab CI: include `https://templates.acme.example.com/gitlab-ci.yml`.
  Trigger deployments via `POST /v2/projects/:id/deployments` with `git_ref`.
- Terraform provider: `terraform-provider-acme` on Terraform Registry.
  Supports all resource types. State locking via Acme backend.
  Import existing resources: `terraform import acme_resource.name resource-id`.
- SDK quickstart: install SDK, set `ACME_API_KEY` env var, initialize client.
  Python: `from acme import AcmeClient; client = AcmeClient()`.
  Node.js: `import { AcmeClient } from '@acme/sdk'; const client = new AcmeClient()`.
  All SDKs support automatic retry, request logging, and custom HTTP injection.

Account management:
- Team members: invite via email, assign roles (viewer, editor, admin, owner).
  Viewer: read-only. Editor: create/modify resources. Admin: manage team,
  API keys, billing. Owner: full access including deletion and transfer.
- SSO configuration: Settings > Security > SSO. Supported providers: Okta,
  Azure AD, Google Workspace, OneLogin, PingFederate, custom SAML 2.0.
  SCIM provisioning for automatic user sync (Enterprise only).
  JIT provisioning creates accounts on first SSO login.
- API key management: keys are project-scoped by default. Global keys for admins.
  Rotation generates new secret, old secret valid for 24 hours.
  Key analytics in dashboard: last used, request count, error rate.
- Account deletion: requires owner role, 30-day grace period, all resources
  must be stopped and data exports completed.

Data management:
- Backups: automatic daily snapshots retained for 30 days (Professional) or
  90 days (Enterprise). Manual snapshots via `POST /v2/resources/:id/snapshots`.
  Cross-region replication available on Enterprise.
  Restore: `POST /v2/resources/:id/restore` with `snapshot_id`.
- Data export: full account export via dashboard (Settings > Data > Export).
  Formats: JSON, CSV, Parquet. Large exports processed asynchronously.
  API: `POST /v2/exports` to start, `GET /v2/exports/:id` to check status.
- Data retention: active data retained while account is active. Deleted resources
  purged after 30 days. Audit logs: 90 days (Professional), 1 year (Enterprise).
  Usage data: 13 months. Backups: per tier retention policy.
- Data residency: regions: us-east-1, us-west-2, eu-west-1, eu-central-1,
  ap-southeast-1, ap-northeast-1. Enterprise can restrict data to specific
  regions for compliance. Cross-region replication configurable per resource.

Monitoring and alerting:
- Metrics: CPU utilization, memory usage, network I/O, disk I/O, request latency
  (p50, p95, p99), error rate, active connections. 1-minute granularity,
  retained 15 days (raw) and 13 months (aggregated hourly).
- Alerts: configure via dashboard or `POST /v2/alerts`. Conditions: threshold
  (above/below), anomaly detection (ML-based), rate of change. Channels: email,
  Slack, PagerDuty, OpsGenie, custom webhook. Cooldown: minimum 5 minutes.
- Dashboards: custom drag-and-drop widgets. Types: time series, gauge, table,
  log viewer, status map. Shareable via link or embeddable in external tools.
- Log management: structured JSON logs. Search via dashboard or `GET /v2/logs`.
  Forwarding: Datadog, Splunk, Elasticsearch, CloudWatch, custom HTTP endpoint.
  Retention: 7 days (Starter), 30 days (Professional), 90 days (Enterprise).
- Health checks: HTTP/TCP checks for compute resources. Configure path, interval
  (10s-300s), timeout (2s-30s), healthy/unhealthy thresholds. Auto-restart on fail.

Troubleshooting runbook:
- API 429 errors: check rate limits for current tier, suggest upgrade or implement
  exponential backoff with jitter. Check `X-RateLimit-Remaining` response header.
  If burst limit hit, wait 30 seconds. If sustained limit hit, batch requests.
- Data sync lag > 5 min: check region status page at status.acme.example.com,
  verify replication config in dashboard, then escalate to infra team via PagerDuty.
  Common cause: large batch imports exceeding ingestion throughput.
- SSO login failures: verify SAML metadata URL, check certificate expiry, confirm
  ACS URL matches dashboard config (must use HTTPS). Common fix: re-upload IdP
  metadata. If using OIDC, verify redirect URI whitelist and token endpoint.
  Check clock skew between IdP and SP (must be < 5 minutes).
- Storage quota warnings: recommend archival policy (auto-archive after 90 days),
  enable compression for text-heavy datasets, or upgrade tier. Check for orphaned
  uploads via `GET /v2/storage/orphaned` endpoint.
- Webhook delivery failures: verify endpoint returns 2xx within 30 seconds,
  check retry logs in dashboard (3 retries with exponential backoff over 1 hour).
  Verify TLS certificate is valid. Check `X-Acme-Signature` validation logic.
  Failed webhooks are stored for 72 hours and can be replayed from the dashboard.
- Database connection errors: check connection pool settings (recommended max 20
  per instance), verify VPC peering configuration, ensure security group allows
  inbound on port 5432. Connection timeout default is 10 seconds.
- Slow query performance: enable query profiling via dashboard, check for missing
  indexes using `EXPLAIN ANALYZE`, review connection pool saturation metrics.
- Deployment failures: check build logs in dashboard, verify Dockerfile or
  buildpack configuration, ensure resource limits are sufficient. Common causes:
  dependency resolution failures, port binding conflicts, health check timeouts.
- Certificate errors: verify custom domain DNS (CNAME or A record), check
  auto-renewal status. Acme uses Let's Encrypt for automatic provisioning.
  Manual certificate upload supported for Enterprise with custom CA.
- Memory pressure: review container memory limits vs actual usage in metrics.
  Enable profiling via `POST /v2/resources/:id/profile?type=memory`. Common
  causes: memory leaks, unbounded caches, large file processing without streaming.

Security policies:
- All data encrypted at rest (AES-256) and in transit (TLS 1.3).
- API keys can be scoped to specific resources and actions.
- IP allowlisting available on Professional and Enterprise tiers.
- MFA required for all admin accounts; optional for team members.
- Audit logs retained for 1 year (Enterprise) or 90 days (Professional).
- Penetration testing: customers may test their own instances with 7 days notice.
- Vulnerability disclosure: security@acme.example.com, PGP key on security page.
- Compliance: SOC2 Type II (annual), HIPAA BAA for Enterprise, GDPR DPA on
  request, ISO 27001 certified. Reports in Settings > Security > Reports.
- Network isolation: VPC peering, private endpoints for AWS/GCP/Azure.
  No public exposure for database resources.
- Secret management: encrypted vault, accessible via `GET /v2/projects/:id/secrets`.
  Injected as env vars at runtime. Versioning with rollback. Rotation reminders.

Escalation rules:
- Billing disputes over $500: escalate to finance@acme.example.com.
- Data loss reports: escalate immediately to incident commander, page on-call SRE.
- Security incidents: escalate to security@acme.example.com with severity rating.
- Legal/compliance requests: route to legal@acme.example.com.
- Feature requests: log in feedback tracker, do not promise timelines.
- SLA breach claims: collect timestamps and affected resources, route to SRE lead.
- Account compromise: immediately disable API keys, force password reset, escalate.

Response guidelines:
- Be concise and direct. Lead with the answer, then explain.
- Include relevant doc links: https://docs.acme.example.com/
- For code examples, use the customer's programming language if known.
- Never share internal pricing, cost structures, or roadmap details.
- Always confirm the customer's tier before recommending tier-specific features.
- For outage-related questions, reference status.acme.example.com for live updates.
- When suggesting config changes, warn about potential downtime or side effects.
- Use ticket number format ACM-XXXXX when referencing support cases.

Pydantic AI

from pathlib import Path
from pydantic_ai import Agent

LLM_MODEL = "openai:gpt-5.4"

# prompt caching is automatic with OpenAI — no extra settings needed
# any prefix >= 1024 tokens is cached on first request, reused on subsequent ones
# static content (instructions, examples) should go first for best hit rate

KNOWLEDGE_BASE = Path("knowledge_base.txt").read_text()  # ~4100 tokens

agent = Agent(LLM_MODEL, instructions=KNOWLEDGE_BASE)

# request 1: cold cache — prompt is processed and cached automatically
result = agent.run_sync("I keep getting 429 errors. What should I do?")
print(result.output)
print(f"Usage: {result.usage()}")
# -> cache_read_tokens = 0 (cache miss — prefix is now stored)

# request 2: warm cache — identical instruction prefix served from cache
result = agent.run_sync("How do I fix SSO login failures?")
print(result.output)
print(f"Usage: {result.usage()}")
# -> cache_read_tokens = 2816 (cache hit — lower cost, lower latency)

You are a senior support agent for Acme Cloud Platform.

Product tiers:
- Starter: 10 GB storage, 100 API calls/day, email support, single region,
  shared infrastructure, community forums access, basic monitoring dashboard.
- Professional: 100 GB storage, 10,000 API calls/day, priority email + chat,
  multi-region replication, 99.9% SLA, custom domains, advanced analytics,
  team collaboration (up to 25 seats), webhook integrations, staging environments.
- Enterprise: unlimited storage, unlimited API calls, 24/7 phone support,
  dedicated account manager, SOC2/HIPAA compliance, custom SLA, SSO/SAML,
  on-premise deployment option, audit logging, data residency controls,
  custom integrations, priority feature requests, quarterly business reviews.

Billing policies:
- Monthly billing on the 1st. Annual contracts get 20% discount.
- Overages billed at 1.5x the per-unit rate. Overage alerts at 80% and 95%.
- Downgrades take effect next billing cycle. Upgrades are immediate with prorated credit.
- Refunds within 14 days of charge, minus any usage above the free tier.
- Enterprise contracts require 60-day written cancellation notice.
- Payment methods: credit card, ACH transfer, wire transfer (Enterprise only).
- Invoices available in PDF format from the billing dashboard.
- Tax exemption requires W-9 or equivalent on file before the billing cycle.

API reference:
- Base URL: https://api.acme.example.com/v2
- Authentication: Bearer token in Authorization header.
- Rate limits: per-tier (see above), with burst allowance of 2x for 30 seconds.
- Pagination: cursor-based, max 100 items per page, use `next_cursor` param.
- Versioning: date-based (e.g., `2025-01-15`), set via `Acme-Version` header.
- SDKs available: Python (`pip install acme-sdk`), Node.js (`npm install @acme/sdk`),
  Go (`go get github.com/acme/sdk-go`), Java (Maven Central: `com.acme:sdk`).
- Webhook events: `resource.created`, `resource.updated`, `resource.deleted`,
  `invoice.paid`, `invoice.overdue`, `usage.threshold`, `deployment.status`.
- Idempotency: supply `Idempotency-Key` header for safe retries on POST/PUT.

API endpoints:
- Projects: POST /v2/projects (create), GET /v2/projects (list),
  GET /v2/projects/:id, PATCH /v2/projects/:id (update),
  DELETE /v2/projects/:id. Supports filtering by `status`
  (active, archived, suspended) and `created_after` timestamp.
- Resources: POST /v2/projects/:id/resources,
  GET /v2/projects/:id/resources, GET /v2/resources/:id,
  PATCH /v2/resources/:id, DELETE /v2/resources/:id.
  Resource types: compute, storage, database, cache, queue, function.
  Each resource has `status` (provisioning, running, stopped, error, deleting).
- Deployments: POST /v2/projects/:id/deployments (create from config or Git ref),
  GET /v2/projects/:id/deployments, GET /v2/deployments/:id,
  POST /v2/deployments/:id/rollback (revert to previous version).
  Deployment strategies: rolling (default), blue-green, canary (Enterprise only).
- API Keys: POST /v2/api-keys (create with scopes), GET /v2/api-keys (list),
  DELETE /v2/api-keys/:id (revoke), POST /v2/api-keys/:id/rotate.
  Scopes: read, write, admin, billing. Keys can be restricted to specific projects.
- Webhooks: POST /v2/webhooks (register endpoint), GET /v2/webhooks,
  PATCH /v2/webhooks/:id (update URL or events), DELETE /v2/webhooks/:id,
  POST /v2/webhooks/:id/test (send test payload).
  Each webhook has a signing secret for payload verification.
- Usage: GET /v2/usage/summary (current period), GET /v2/usage/history,
  GET /v2/usage/breakdown (by resource type). Supports `granularity` param:
  hourly, daily, monthly. Data retained for 13 months.
- Audit logs: GET /v2/audit-logs (filterable by actor, action, resource, date).
  Actions tracked: resource.create, resource.delete, config.update, key.rotate,
  member.invite, member.remove, permission.change, billing.update.

Error codes:
- 400 Bad Request: malformed JSON, missing required fields, invalid param values.
  Response includes `errors` array with field-level details and `error_code`.
- 401 Unauthorized: missing or invalid API key. Check `Authorization` header.
  If key was recently rotated, allow 30 seconds for propagation.
- 403 Forbidden: valid key but insufficient scopes. Check key scopes in dashboard.
  For project-scoped keys, verify the resource belongs to an authorized project.
- 404 Not Found: resource does not exist or caller lacks access. Acme returns 404
  (not 403) for resources the caller cannot access to prevent leaking existence.
- 409 Conflict: resource name collision or concurrent modification detected.
  Retry with updated `If-Match` ETag header for optimistic concurrency.
- 422 Unprocessable Entity: valid JSON but semantically invalid (e.g., enabling
  Enterprise features on a Starter plan). Response includes `reason` field.
- 429 Too Many Requests: rate limit exceeded. Check `Retry-After` header for
  wait time. `X-RateLimit-Limit` and `X-RateLimit-Remaining` headers show quota.
  Implement exponential backoff with jitter for retries.
- 500 Internal Server Error: unexpected failure. Include `X-Request-Id` header
  when contacting support. Safe to retry for idempotent requests.
- 502 Bad Gateway: upstream dependency failure. Usually transient, retry after
  5-10 seconds. If persistent, check status.acme.example.com.
- 503 Service Unavailable: planned maintenance or capacity limits. `Retry-After`
  header indicates expected recovery time.

Integration patterns:
- Webhook setup: register endpoint via API or dashboard, implement HMAC-SHA256
  signature verification using webhook signing secret, return 200 within 30s,
  handle duplicate deliveries idempotently (use `X-Acme-Delivery-Id`).
  Failed deliveries retry at 1m, 5m, 30m, 1h intervals.
  Dead letter queue available on Enterprise tier.
- OAuth integration: Acme supports OAuth 2.0 authorization code flow.
  Register app in developer portal for client_id and client_secret.
  Authorization URL: https://auth.acme.example.com/oauth/authorize.
  Token URL: https://auth.acme.example.com/oauth/token.
  Scopes: read:projects, write:projects, read:resources, write:resources,
  read:billing, admin. Tokens expire in 1 hour, refresh tokens in 30 days.
- CI/CD integration: use `ACME_API_KEY` environment variable in pipelines.
  GitHub Actions: `github.com/acme/deploy-action`.
  GitLab CI: include `https://templates.acme.example.com/gitlab-ci.yml`.
  Trigger deployments via `POST /v2/projects/:id/deployments` with `git_ref`.
- Terraform provider: `terraform-provider-acme` on Terraform Registry.
  Supports all resource types. State locking via Acme backend.
  Import existing resources: `terraform import acme_resource.name resource-id`.
- SDK quickstart: install SDK, set `ACME_API_KEY` env var, initialize client.
  Python: `from acme import AcmeClient; client = AcmeClient()`.
  Node.js: `import { AcmeClient } from '@acme/sdk'; const client = new AcmeClient()`.
  All SDKs support automatic retry, request logging, and custom HTTP injection.

Account management:
- Team members: invite via email, assign roles (viewer, editor, admin, owner).
  Viewer: read-only. Editor: create/modify resources. Admin: manage team,
  API keys, billing. Owner: full access including deletion and transfer.
- SSO configuration: Settings > Security > SSO. Supported providers: Okta,
  Azure AD, Google Workspace, OneLogin, PingFederate, custom SAML 2.0.
  SCIM provisioning for automatic user sync (Enterprise only).
  JIT provisioning creates accounts on first SSO login.
- API key management: keys are project-scoped by default. Global keys for admins.
  Rotation generates new secret, old secret valid for 24 hours.
  Key analytics in dashboard: last used, request count, error rate.
- Account deletion: requires owner role, 30-day grace period, all resources
  must be stopped and data exports completed.

Data management:
- Backups: automatic daily snapshots retained for 30 days (Professional) or
  90 days (Enterprise). Manual snapshots via `POST /v2/resources/:id/snapshots`.
  Cross-region replication available on Enterprise.
  Restore: `POST /v2/resources/:id/restore` with `snapshot_id`.
- Data export: full account export via dashboard (Settings > Data > Export).
  Formats: JSON, CSV, Parquet. Large exports processed asynchronously.
  API: `POST /v2/exports` to start, `GET /v2/exports/:id` to check status.
- Data retention: active data retained while account is active. Deleted resources
  purged after 30 days. Audit logs: 90 days (Professional), 1 year (Enterprise).
  Usage data: 13 months. Backups: per tier retention policy.
- Data residency: regions: us-east-1, us-west-2, eu-west-1, eu-central-1,
  ap-southeast-1, ap-northeast-1. Enterprise can restrict data to specific
  regions for compliance. Cross-region replication configurable per resource.

Monitoring and alerting:
- Metrics: CPU utilization, memory usage, network I/O, disk I/O, request latency
  (p50, p95, p99), error rate, active connections. 1-minute granularity,
  retained 15 days (raw) and 13 months (aggregated hourly).
- Alerts: configure via dashboard or `POST /v2/alerts`. Conditions: threshold
  (above/below), anomaly detection (ML-based), rate of change. Channels: email,
  Slack, PagerDuty, OpsGenie, custom webhook. Cooldown: minimum 5 minutes.
- Dashboards: custom drag-and-drop widgets. Types: time series, gauge, table,
  log viewer, status map. Shareable via link or embeddable in external tools.
- Log management: structured JSON logs. Search via dashboard or `GET /v2/logs`.
  Forwarding: Datadog, Splunk, Elasticsearch, CloudWatch, custom HTTP endpoint.
  Retention: 7 days (Starter), 30 days (Professional), 90 days (Enterprise).
- Health checks: HTTP/TCP checks for compute resources. Configure path, interval
  (10s-300s), timeout (2s-30s), healthy/unhealthy thresholds. Auto-restart on fail.

Troubleshooting runbook:
- API 429 errors: check rate limits for current tier, suggest upgrade or implement
  exponential backoff with jitter. Check `X-RateLimit-Remaining` response header.
  If burst limit hit, wait 30 seconds. If sustained limit hit, batch requests.
- Data sync lag > 5 min: check region status page at status.acme.example.com,
  verify replication config in dashboard, then escalate to infra team via PagerDuty.
  Common cause: large batch imports exceeding ingestion throughput.
- SSO login failures: verify SAML metadata URL, check certificate expiry, confirm
  ACS URL matches dashboard config (must use HTTPS). Common fix: re-upload IdP
  metadata. If using OIDC, verify redirect URI whitelist and token endpoint.
  Check clock skew between IdP and SP (must be < 5 minutes).
- Storage quota warnings: recommend archival policy (auto-archive after 90 days),
  enable compression for text-heavy datasets, or upgrade tier. Check for orphaned
  uploads via `GET /v2/storage/orphaned` endpoint.
- Webhook delivery failures: verify endpoint returns 2xx within 30 seconds,
  check retry logs in dashboard (3 retries with exponential backoff over 1 hour).
  Verify TLS certificate is valid. Check `X-Acme-Signature` validation logic.
  Failed webhooks are stored for 72 hours and can be replayed from the dashboard.
- Database connection errors: check connection pool settings (recommended max 20
  per instance), verify VPC peering configuration, ensure security group allows
  inbound on port 5432. Connection timeout default is 10 seconds.
- Slow query performance: enable query profiling via dashboard, check for missing
  indexes using `EXPLAIN ANALYZE`, review connection pool saturation metrics.
- Deployment failures: check build logs in dashboard, verify Dockerfile or
  buildpack configuration, ensure resource limits are sufficient. Common causes:
  dependency resolution failures, port binding conflicts, health check timeouts.
- Certificate errors: verify custom domain DNS (CNAME or A record), check
  auto-renewal status. Acme uses Let's Encrypt for automatic provisioning.
  Manual certificate upload supported for Enterprise with custom CA.
- Memory pressure: review container memory limits vs actual usage in metrics.
  Enable profiling via `POST /v2/resources/:id/profile?type=memory`. Common
  causes: memory leaks, unbounded caches, large file processing without streaming.

Security policies:
- All data encrypted at rest (AES-256) and in transit (TLS 1.3).
- API keys can be scoped to specific resources and actions.
- IP allowlisting available on Professional and Enterprise tiers.
- MFA required for all admin accounts; optional for team members.
- Audit logs retained for 1 year (Enterprise) or 90 days (Professional).
- Penetration testing: customers may test their own instances with 7 days notice.
- Vulnerability disclosure: security@acme.example.com, PGP key on security page.
- Compliance: SOC2 Type II (annual), HIPAA BAA for Enterprise, GDPR DPA on
  request, ISO 27001 certified. Reports in Settings > Security > Reports.
- Network isolation: VPC peering, private endpoints for AWS/GCP/Azure.
  No public exposure for database resources.
- Secret management: encrypted vault, accessible via `GET /v2/projects/:id/secrets`.
  Injected as env vars at runtime. Versioning with rollback. Rotation reminders.

Escalation rules:
- Billing disputes over $500: escalate to finance@acme.example.com.
- Data loss reports: escalate immediately to incident commander, page on-call SRE.
- Security incidents: escalate to security@acme.example.com with severity rating.
- Legal/compliance requests: route to legal@acme.example.com.
- Feature requests: log in feedback tracker, do not promise timelines.
- SLA breach claims: collect timestamps and affected resources, route to SRE lead.
- Account compromise: immediately disable API keys, force password reset, escalate.

Response guidelines:
- Be concise and direct. Lead with the answer, then explain.
- Include relevant doc links: https://docs.acme.example.com/
- For code examples, use the customer's programming language if known.
- Never share internal pricing, cost structures, or roadmap details.
- Always confirm the customer's tier before recommending tier-specific features.
- For outage-related questions, reference status.acme.example.com for live updates.
- When suggesting config changes, warn about potential downtime or side effects.
- Use ticket number format ACM-XXXXX when referencing support cases.

LangGraph

from pathlib import Path
from langchain.agents import create_agent
from langchain_openai import ChatOpenAI
from langchain_core.messages import SystemMessage

LLM_MODEL = "gpt-5.4"
model = ChatOpenAI(model=LLM_MODEL)

# OpenAI caching is automatic — no cache_control needed
# identical prefixes >= 1024 tokens are cached and reused
# static content (instructions, examples) should go first for best hit rate

KNOWLEDGE_BASE = Path("knowledge_base.txt").read_text()  # ~4100 tokens

system = SystemMessage(content=KNOWLEDGE_BASE)

agent = create_agent(model, tools=[])

# request 1: cold cache — prompt is processed and cached automatically
result = agent.invoke({
    "messages": [system, ("user", "I keep getting 429 errors. What should I do?")],
})
ai_msg = result["messages"][-1]
print(ai_msg.content)
usage = ai_msg.response_metadata["token_usage"]
print(f"Cached tokens: {usage['prompt_tokens_details']['cached_tokens']}")
# -> Cached tokens: 0 (cache miss — prefix is now stored)

# request 2: warm cache — identical prefix served from cache
result = agent.invoke({
    "messages": [system, ("user", "How do I fix SSO login failures?")],
})
ai_msg = result["messages"][-1]
print(ai_msg.content)
usage = ai_msg.response_metadata["token_usage"]
print(f"Cached tokens: {usage['prompt_tokens_details']['cached_tokens']}")
# -> Cached tokens: 2816 (cache hit — lower cost, lower latency)

You are a senior support agent for Acme Cloud Platform.

Product tiers:
- Starter: 10 GB storage, 100 API calls/day, email support, single region,
  shared infrastructure, community forums access, basic monitoring dashboard.
- Professional: 100 GB storage, 10,000 API calls/day, priority email + chat,
  multi-region replication, 99.9% SLA, custom domains, advanced analytics,
  team collaboration (up to 25 seats), webhook integrations, staging environments.
- Enterprise: unlimited storage, unlimited API calls, 24/7 phone support,
  dedicated account manager, SOC2/HIPAA compliance, custom SLA, SSO/SAML,
  on-premise deployment option, audit logging, data residency controls,
  custom integrations, priority feature requests, quarterly business reviews.

Billing policies:
- Monthly billing on the 1st. Annual contracts get 20% discount.
- Overages billed at 1.5x the per-unit rate. Overage alerts at 80% and 95%.
- Downgrades take effect next billing cycle. Upgrades are immediate with prorated credit.
- Refunds within 14 days of charge, minus any usage above the free tier.
- Enterprise contracts require 60-day written cancellation notice.
- Payment methods: credit card, ACH transfer, wire transfer (Enterprise only).
- Invoices available in PDF format from the billing dashboard.
- Tax exemption requires W-9 or equivalent on file before the billing cycle.

API reference:
- Base URL: https://api.acme.example.com/v2
- Authentication: Bearer token in Authorization header.
- Rate limits: per-tier (see above), with burst allowance of 2x for 30 seconds.
- Pagination: cursor-based, max 100 items per page, use `next_cursor` param.
- Versioning: date-based (e.g., `2025-01-15`), set via `Acme-Version` header.
- SDKs available: Python (`pip install acme-sdk`), Node.js (`npm install @acme/sdk`),
  Go (`go get github.com/acme/sdk-go`), Java (Maven Central: `com.acme:sdk`).
- Webhook events: `resource.created`, `resource.updated`, `resource.deleted`,
  `invoice.paid`, `invoice.overdue`, `usage.threshold`, `deployment.status`.
- Idempotency: supply `Idempotency-Key` header for safe retries on POST/PUT.

API endpoints:
- Projects: POST /v2/projects (create), GET /v2/projects (list),
  GET /v2/projects/:id, PATCH /v2/projects/:id (update),
  DELETE /v2/projects/:id. Supports filtering by `status`
  (active, archived, suspended) and `created_after` timestamp.
- Resources: POST /v2/projects/:id/resources,
  GET /v2/projects/:id/resources, GET /v2/resources/:id,
  PATCH /v2/resources/:id, DELETE /v2/resources/:id.
  Resource types: compute, storage, database, cache, queue, function.
  Each resource has `status` (provisioning, running, stopped, error, deleting).
- Deployments: POST /v2/projects/:id/deployments (create from config or Git ref),
  GET /v2/projects/:id/deployments, GET /v2/deployments/:id,
  POST /v2/deployments/:id/rollback (revert to previous version).
  Deployment strategies: rolling (default), blue-green, canary (Enterprise only).
- API Keys: POST /v2/api-keys (create with scopes), GET /v2/api-keys (list),
  DELETE /v2/api-keys/:id (revoke), POST /v2/api-keys/:id/rotate.
  Scopes: read, write, admin, billing. Keys can be restricted to specific projects.
- Webhooks: POST /v2/webhooks (register endpoint), GET /v2/webhooks,
  PATCH /v2/webhooks/:id (update URL or events), DELETE /v2/webhooks/:id,
  POST /v2/webhooks/:id/test (send test payload).
  Each webhook has a signing secret for payload verification.
- Usage: GET /v2/usage/summary (current period), GET /v2/usage/history,
  GET /v2/usage/breakdown (by resource type). Supports `granularity` param:
  hourly, daily, monthly. Data retained for 13 months.
- Audit logs: GET /v2/audit-logs (filterable by actor, action, resource, date).
  Actions tracked: resource.create, resource.delete, config.update, key.rotate,
  member.invite, member.remove, permission.change, billing.update.

Error codes:
- 400 Bad Request: malformed JSON, missing required fields, invalid param values.
  Response includes `errors` array with field-level details and `error_code`.
- 401 Unauthorized: missing or invalid API key. Check `Authorization` header.
  If key was recently rotated, allow 30 seconds for propagation.
- 403 Forbidden: valid key but insufficient scopes. Check key scopes in dashboard.
  For project-scoped keys, verify the resource belongs to an authorized project.
- 404 Not Found: resource does not exist or caller lacks access. Acme returns 404
  (not 403) for resources the caller cannot access to prevent leaking existence.
- 409 Conflict: resource name collision or concurrent modification detected.
  Retry with updated `If-Match` ETag header for optimistic concurrency.
- 422 Unprocessable Entity: valid JSON but semantically invalid (e.g., enabling
  Enterprise features on a Starter plan). Response includes `reason` field.
- 429 Too Many Requests: rate limit exceeded. Check `Retry-After` header for
  wait time. `X-RateLimit-Limit` and `X-RateLimit-Remaining` headers show quota.
  Implement exponential backoff with jitter for retries.
- 500 Internal Server Error: unexpected failure. Include `X-Request-Id` header
  when contacting support. Safe to retry for idempotent requests.
- 502 Bad Gateway: upstream dependency failure. Usually transient, retry after
  5-10 seconds. If persistent, check status.acme.example.com.
- 503 Service Unavailable: planned maintenance or capacity limits. `Retry-After`
  header indicates expected recovery time.

Integration patterns:
- Webhook setup: register endpoint via API or dashboard, implement HMAC-SHA256
  signature verification using webhook signing secret, return 200 within 30s,
  handle duplicate deliveries idempotently (use `X-Acme-Delivery-Id`).
  Failed deliveries retry at 1m, 5m, 30m, 1h intervals.
  Dead letter queue available on Enterprise tier.
- OAuth integration: Acme supports OAuth 2.0 authorization code flow.
  Register app in developer portal for client_id and client_secret.
  Authorization URL: https://auth.acme.example.com/oauth/authorize.
  Token URL: https://auth.acme.example.com/oauth/token.
  Scopes: read:projects, write:projects, read:resources, write:resources,
  read:billing, admin. Tokens expire in 1 hour, refresh tokens in 30 days.
- CI/CD integration: use `ACME_API_KEY` environment variable in pipelines.
  GitHub Actions: `github.com/acme/deploy-action`.
  GitLab CI: include `https://templates.acme.example.com/gitlab-ci.yml`.
  Trigger deployments via `POST /v2/projects/:id/deployments` with `git_ref`.
- Terraform provider: `terraform-provider-acme` on Terraform Registry.
  Supports all resource types. State locking via Acme backend.
  Import existing resources: `terraform import acme_resource.name resource-id`.
- SDK quickstart: install SDK, set `ACME_API_KEY` env var, initialize client.
  Python: `from acme import AcmeClient; client = AcmeClient()`.
  Node.js: `import { AcmeClient } from '@acme/sdk'; const client = new AcmeClient()`.
  All SDKs support automatic retry, request logging, and custom HTTP injection.

Account management:
- Team members: invite via email, assign roles (viewer, editor, admin, owner).
  Viewer: read-only. Editor: create/modify resources. Admin: manage team,
  API keys, billing. Owner: full access including deletion and transfer.
- SSO configuration: Settings > Security > SSO. Supported providers: Okta,
  Azure AD, Google Workspace, OneLogin, PingFederate, custom SAML 2.0.
  SCIM provisioning for automatic user sync (Enterprise only).
  JIT provisioning creates accounts on first SSO login.
- API key management: keys are project-scoped by default. Global keys for admins.
  Rotation generates new secret, old secret valid for 24 hours.
  Key analytics in dashboard: last used, request count, error rate.
- Account deletion: requires owner role, 30-day grace period, all resources
  must be stopped and data exports completed.

Data management:
- Backups: automatic daily snapshots retained for 30 days (Professional) or
  90 days (Enterprise). Manual snapshots via `POST /v2/resources/:id/snapshots`.
  Cross-region replication available on Enterprise.
  Restore: `POST /v2/resources/:id/restore` with `snapshot_id`.
- Data export: full account export via dashboard (Settings > Data > Export).
  Formats: JSON, CSV, Parquet. Large exports processed asynchronously.
  API: `POST /v2/exports` to start, `GET /v2/exports/:id` to check status.
- Data retention: active data retained while account is active. Deleted resources
  purged after 30 days. Audit logs: 90 days (Professional), 1 year (Enterprise).
  Usage data: 13 months. Backups: per tier retention policy.
- Data residency: regions: us-east-1, us-west-2, eu-west-1, eu-central-1,
  ap-southeast-1, ap-northeast-1. Enterprise can restrict data to specific
  regions for compliance. Cross-region replication configurable per resource.

Monitoring and alerting:
- Metrics: CPU utilization, memory usage, network I/O, disk I/O, request latency
  (p50, p95, p99), error rate, active connections. 1-minute granularity,
  retained 15 days (raw) and 13 months (aggregated hourly).
- Alerts: configure via dashboard or `POST /v2/alerts`. Conditions: threshold
  (above/below), anomaly detection (ML-based), rate of change. Channels: email,
  Slack, PagerDuty, OpsGenie, custom webhook. Cooldown: minimum 5 minutes.
- Dashboards: custom drag-and-drop widgets. Types: time series, gauge, table,
  log viewer, status map. Shareable via link or embeddable in external tools.
- Log management: structured JSON logs. Search via dashboard or `GET /v2/logs`.
  Forwarding: Datadog, Splunk, Elasticsearch, CloudWatch, custom HTTP endpoint.
  Retention: 7 days (Starter), 30 days (Professional), 90 days (Enterprise).
- Health checks: HTTP/TCP checks for compute resources. Configure path, interval
  (10s-300s), timeout (2s-30s), healthy/unhealthy thresholds. Auto-restart on fail.

Troubleshooting runbook:
- API 429 errors: check rate limits for current tier, suggest upgrade or implement
  exponential backoff with jitter. Check `X-RateLimit-Remaining` response header.
  If burst limit hit, wait 30 seconds. If sustained limit hit, batch requests.
- Data sync lag > 5 min: check region status page at status.acme.example.com,
  verify replication config in dashboard, then escalate to infra team via PagerDuty.
  Common cause: large batch imports exceeding ingestion throughput.
- SSO login failures: verify SAML metadata URL, check certificate expiry, confirm
  ACS URL matches dashboard config (must use HTTPS). Common fix: re-upload IdP
  metadata. If using OIDC, verify redirect URI whitelist and token endpoint.
  Check clock skew between IdP and SP (must be < 5 minutes).
- Storage quota warnings: recommend archival policy (auto-archive after 90 days),
  enable compression for text-heavy datasets, or upgrade tier. Check for orphaned
  uploads via `GET /v2/storage/orphaned` endpoint.
- Webhook delivery failures: verify endpoint returns 2xx within 30 seconds,
  check retry logs in dashboard (3 retries with exponential backoff over 1 hour).
  Verify TLS certificate is valid. Check `X-Acme-Signature` validation logic.
  Failed webhooks are stored for 72 hours and can be replayed from the dashboard.
- Database connection errors: check connection pool settings (recommended max 20
  per instance), verify VPC peering configuration, ensure security group allows
  inbound on port 5432. Connection timeout default is 10 seconds.
- Slow query performance: enable query profiling via dashboard, check for missing
  indexes using `EXPLAIN ANALYZE`, review connection pool saturation metrics.
- Deployment failures: check build logs in dashboard, verify Dockerfile or
  buildpack configuration, ensure resource limits are sufficient. Common causes:
  dependency resolution failures, port binding conflicts, health check timeouts.
- Certificate errors: verify custom domain DNS (CNAME or A record), check
  auto-renewal status. Acme uses Let's Encrypt for automatic provisioning.
  Manual certificate upload supported for Enterprise with custom CA.
- Memory pressure: review container memory limits vs actual usage in metrics.
  Enable profiling via `POST /v2/resources/:id/profile?type=memory`. Common
  causes: memory leaks, unbounded caches, large file processing without streaming.

Security policies:
- All data encrypted at rest (AES-256) and in transit (TLS 1.3).
- API keys can be scoped to specific resources and actions.
- IP allowlisting available on Professional and Enterprise tiers.
- MFA required for all admin accounts; optional for team members.
- Audit logs retained for 1 year (Enterprise) or 90 days (Professional).
- Penetration testing: customers may test their own instances with 7 days notice.
- Vulnerability disclosure: security@acme.example.com, PGP key on security page.
- Compliance: SOC2 Type II (annual), HIPAA BAA for Enterprise, GDPR DPA on
  request, ISO 27001 certified. Reports in Settings > Security > Reports.
- Network isolation: VPC peering, private endpoints for AWS/GCP/Azure.
  No public exposure for database resources.
- Secret management: encrypted vault, accessible via `GET /v2/projects/:id/secrets`.
  Injected as env vars at runtime. Versioning with rollback. Rotation reminders.

Escalation rules:
- Billing disputes over $500: escalate to finance@acme.example.com.
- Data loss reports: escalate immediately to incident commander, page on-call SRE.
- Security incidents: escalate to security@acme.example.com with severity rating.
- Legal/compliance requests: route to legal@acme.example.com.
- Feature requests: log in feedback tracker, do not promise timelines.
- SLA breach claims: collect timestamps and affected resources, route to SRE lead.
- Account compromise: immediately disable API keys, force password reset, escalate.

Response guidelines:
- Be concise and direct. Lead with the answer, then explain.
- Include relevant doc links: https://docs.acme.example.com/
- For code examples, use the customer's programming language if known.
- Never share internal pricing, cost structures, or roadmap details.
- Always confirm the customer's tier before recommending tier-specific features.
- For outage-related questions, reference status.acme.example.com for live updates.
- When suggesting config changes, warn about potential downtime or side effects.
- Use ticket number format ACM-XXXXX when referencing support cases.

AI SDK

import { readFileSync } from "fs";
import { generateText } from "ai";
import { openai } from "@ai-sdk/openai";

const LLM_MODEL = "gpt-5.4";

// prompt caching is automatic with OpenAI — no providerOptions needed
// identical prefixes >= 1024 tokens are cached and reused
// static content (instructions, examples) should go first for best hit rate

const KNOWLEDGE_BASE = readFileSync("knowledge_base.txt", "utf-8"); // ~4100 tokens

// request 1: cold cache — prompt is processed and cached automatically
const r1 = await generateText({
  model: openai(LLM_MODEL),
  system: KNOWLEDGE_BASE,
  prompt: "I keep getting 429 errors. What should I do?",
});
console.log(r1.text);
console.log(`Cached tokens: ${r1.usage.cachedInputTokens ?? 0}`);
// -> Cached tokens: 0 (cache miss — prefix is now stored)

// request 2: warm cache — identical prefix served from cache
const r2 = await generateText({
  model: openai(LLM_MODEL),
  system: KNOWLEDGE_BASE,
  prompt: "How do I fix SSO login failures?",
});
console.log(r2.text);
console.log(`Cached tokens: ${r2.usage.cachedInputTokens ?? 0}`);
// -> Cached tokens: 3200 (cache hit — lower cost, lower latency)

You are a senior support agent for Acme Cloud Platform.

Product tiers:
- Starter: 10 GB storage, 100 API calls/day, email support, single region,
  shared infrastructure, community forums access, basic monitoring dashboard.
- Professional: 100 GB storage, 10,000 API calls/day, priority email + chat,
  multi-region replication, 99.9% SLA, custom domains, advanced analytics,
  team collaboration (up to 25 seats), webhook integrations, staging environments.
- Enterprise: unlimited storage, unlimited API calls, 24/7 phone support,
  dedicated account manager, SOC2/HIPAA compliance, custom SLA, SSO/SAML,
  on-premise deployment option, audit logging, data residency controls,
  custom integrations, priority feature requests, quarterly business reviews.

Billing policies:
- Monthly billing on the 1st. Annual contracts get 20% discount.
- Overages billed at 1.5x the per-unit rate. Overage alerts at 80% and 95%.
- Downgrades take effect next billing cycle. Upgrades are immediate with prorated credit.
- Refunds within 14 days of charge, minus any usage above the free tier.
- Enterprise contracts require 60-day written cancellation notice.
- Payment methods: credit card, ACH transfer, wire transfer (Enterprise only).
- Invoices available in PDF format from the billing dashboard.
- Tax exemption requires W-9 or equivalent on file before the billing cycle.

API reference:
- Base URL: https://api.acme.example.com/v2
- Authentication: Bearer token in Authorization header.
- Rate limits: per-tier (see above), with burst allowance of 2x for 30 seconds.
- Pagination: cursor-based, max 100 items per page, use `next_cursor` param.
- Versioning: date-based (e.g., `2025-01-15`), set via `Acme-Version` header.
- SDKs available: Python (`pip install acme-sdk`), Node.js (`npm install @acme/sdk`),
  Go (`go get github.com/acme/sdk-go`), Java (Maven Central: `com.acme:sdk`).
- Webhook events: `resource.created`, `resource.updated`, `resource.deleted`,
  `invoice.paid`, `invoice.overdue`, `usage.threshold`, `deployment.status`.
- Idempotency: supply `Idempotency-Key` header for safe retries on POST/PUT.

API endpoints:
- Projects: POST /v2/projects (create), GET /v2/projects (list),
  GET /v2/projects/:id, PATCH /v2/projects/:id (update),
  DELETE /v2/projects/:id. Supports filtering by `status`
  (active, archived, suspended) and `created_after` timestamp.
- Resources: POST /v2/projects/:id/resources,
  GET /v2/projects/:id/resources, GET /v2/resources/:id,
  PATCH /v2/resources/:id, DELETE /v2/resources/:id.
  Resource types: compute, storage, database, cache, queue, function.
  Each resource has `status` (provisioning, running, stopped, error, deleting).
- Deployments: POST /v2/projects/:id/deployments (create from config or Git ref),
  GET /v2/projects/:id/deployments, GET /v2/deployments/:id,
  POST /v2/deployments/:id/rollback (revert to previous version).
  Deployment strategies: rolling (default), blue-green, canary (Enterprise only).
- API Keys: POST /v2/api-keys (create with scopes), GET /v2/api-keys (list),
  DELETE /v2/api-keys/:id (revoke), POST /v2/api-keys/:id/rotate.
  Scopes: read, write, admin, billing. Keys can be restricted to specific projects.
- Webhooks: POST /v2/webhooks (register endpoint), GET /v2/webhooks,
  PATCH /v2/webhooks/:id (update URL or events), DELETE /v2/webhooks/:id,
  POST /v2/webhooks/:id/test (send test payload).
  Each webhook has a signing secret for payload verification.
- Usage: GET /v2/usage/summary (current period), GET /v2/usage/history,
  GET /v2/usage/breakdown (by resource type). Supports `granularity` param:
  hourly, daily, monthly. Data retained for 13 months.
- Audit logs: GET /v2/audit-logs (filterable by actor, action, resource, date).
  Actions tracked: resource.create, resource.delete, config.update, key.rotate,
  member.invite, member.remove, permission.change, billing.update.

Error codes:
- 400 Bad Request: malformed JSON, missing required fields, invalid param values.
  Response includes `errors` array with field-level details and `error_code`.
- 401 Unauthorized: missing or invalid API key. Check `Authorization` header.
  If key was recently rotated, allow 30 seconds for propagation.
- 403 Forbidden: valid key but insufficient scopes. Check key scopes in dashboard.
  For project-scoped keys, verify the resource belongs to an authorized project.
- 404 Not Found: resource does not exist or caller lacks access. Acme returns 404
  (not 403) for resources the caller cannot access to prevent leaking existence.
- 409 Conflict: resource name collision or concurrent modification detected.
  Retry with updated `If-Match` ETag header for optimistic concurrency.
- 422 Unprocessable Entity: valid JSON but semantically invalid (e.g., enabling
  Enterprise features on a Starter plan). Response includes `reason` field.
- 429 Too Many Requests: rate limit exceeded. Check `Retry-After` header for
  wait time. `X-RateLimit-Limit` and `X-RateLimit-Remaining` headers show quota.
  Implement exponential backoff with jitter for retries.
- 500 Internal Server Error: unexpected failure. Include `X-Request-Id` header
  when contacting support. Safe to retry for idempotent requests.
- 502 Bad Gateway: upstream dependency failure. Usually transient, retry after
  5-10 seconds. If persistent, check status.acme.example.com.
- 503 Service Unavailable: planned maintenance or capacity limits. `Retry-After`
  header indicates expected recovery time.

Integration patterns:
- Webhook setup: register endpoint via API or dashboard, implement HMAC-SHA256
  signature verification using webhook signing secret, return 200 within 30s,
  handle duplicate deliveries idempotently (use `X-Acme-Delivery-Id`).
  Failed deliveries retry at 1m, 5m, 30m, 1h intervals.
  Dead letter queue available on Enterprise tier.
- OAuth integration: Acme supports OAuth 2.0 authorization code flow.
  Register app in developer portal for client_id and client_secret.
  Authorization URL: https://auth.acme.example.com/oauth/authorize.
  Token URL: https://auth.acme.example.com/oauth/token.
  Scopes: read:projects, write:projects, read:resources, write:resources,
  read:billing, admin. Tokens expire in 1 hour, refresh tokens in 30 days.
- CI/CD integration: use `ACME_API_KEY` environment variable in pipelines.
  GitHub Actions: `github.com/acme/deploy-action`.
  GitLab CI: include `https://templates.acme.example.com/gitlab-ci.yml`.
  Trigger deployments via `POST /v2/projects/:id/deployments` with `git_ref`.
- Terraform provider: `terraform-provider-acme` on Terraform Registry.
  Supports all resource types. State locking via Acme backend.
  Import existing resources: `terraform import acme_resource.name resource-id`.
- SDK quickstart: install SDK, set `ACME_API_KEY` env var, initialize client.
  Python: `from acme import AcmeClient; client = AcmeClient()`.
  Node.js: `import { AcmeClient } from '@acme/sdk'; const client = new AcmeClient()`.
  All SDKs support automatic retry, request logging, and custom HTTP injection.

Account management:
- Team members: invite via email, assign roles (viewer, editor, admin, owner).
  Viewer: read-only. Editor: create/modify resources. Admin: manage team,
  API keys, billing. Owner: full access including deletion and transfer.
- SSO configuration: Settings > Security > SSO. Supported providers: Okta,
  Azure AD, Google Workspace, OneLogin, PingFederate, custom SAML 2.0.
  SCIM provisioning for automatic user sync (Enterprise only).
  JIT provisioning creates accounts on first SSO login.
- API key management: keys are project-scoped by default. Global keys for admins.
  Rotation generates new secret, old secret valid for 24 hours.
  Key analytics in dashboard: last used, request count, error rate.
- Account deletion: requires owner role, 30-day grace period, all resources
  must be stopped and data exports completed.

Data management:
- Backups: automatic daily snapshots retained for 30 days (Professional) or
  90 days (Enterprise). Manual snapshots via `POST /v2/resources/:id/snapshots`.
  Cross-region replication available on Enterprise.
  Restore: `POST /v2/resources/:id/restore` with `snapshot_id`.
- Data export: full account export via dashboard (Settings > Data > Export).
  Formats: JSON, CSV, Parquet. Large exports processed asynchronously.
  API: `POST /v2/exports` to start, `GET /v2/exports/:id` to check status.
- Data retention: active data retained while account is active. Deleted resources
  purged after 30 days. Audit logs: 90 days (Professional), 1 year (Enterprise).
  Usage data: 13 months. Backups: per tier retention policy.
- Data residency: regions: us-east-1, us-west-2, eu-west-1, eu-central-1,
  ap-southeast-1, ap-northeast-1. Enterprise can restrict data to specific
  regions for compliance. Cross-region replication configurable per resource.

Monitoring and alerting:
- Metrics: CPU utilization, memory usage, network I/O, disk I/O, request latency
  (p50, p95, p99), error rate, active connections. 1-minute granularity,
  retained 15 days (raw) and 13 months (aggregated hourly).
- Alerts: configure via dashboard or `POST /v2/alerts`. Conditions: threshold
  (above/below), anomaly detection (ML-based), rate of change. Channels: email,
  Slack, PagerDuty, OpsGenie, custom webhook. Cooldown: minimum 5 minutes.
- Dashboards: custom drag-and-drop widgets. Types: time series, gauge, table,
  log viewer, status map. Shareable via link or embeddable in external tools.
- Log management: structured JSON logs. Search via dashboard or `GET /v2/logs`.
  Forwarding: Datadog, Splunk, Elasticsearch, CloudWatch, custom HTTP endpoint.
  Retention: 7 days (Starter), 30 days (Professional), 90 days (Enterprise).
- Health checks: HTTP/TCP checks for compute resources. Configure path, interval
  (10s-300s), timeout (2s-30s), healthy/unhealthy thresholds. Auto-restart on fail.

Troubleshooting runbook:
- API 429 errors: check rate limits for current tier, suggest upgrade or implement
  exponential backoff with jitter. Check `X-RateLimit-Remaining` response header.
  If burst limit hit, wait 30 seconds. If sustained limit hit, batch requests.
- Data sync lag > 5 min: check region status page at status.acme.example.com,
  verify replication config in dashboard, then escalate to infra team via PagerDuty.
  Common cause: large batch imports exceeding ingestion throughput.
- SSO login failures: verify SAML metadata URL, check certificate expiry, confirm
  ACS URL matches dashboard config (must use HTTPS). Common fix: re-upload IdP
  metadata. If using OIDC, verify redirect URI whitelist and token endpoint.
  Check clock skew between IdP and SP (must be < 5 minutes).
- Storage quota warnings: recommend archival policy (auto-archive after 90 days),
  enable compression for text-heavy datasets, or upgrade tier. Check for orphaned
  uploads via `GET /v2/storage/orphaned` endpoint.
- Webhook delivery failures: verify endpoint returns 2xx within 30 seconds,
  check retry logs in dashboard (3 retries with exponential backoff over 1 hour).
  Verify TLS certificate is valid. Check `X-Acme-Signature` validation logic.
  Failed webhooks are stored for 72 hours and can be replayed from the dashboard.
- Database connection errors: check connection pool settings (recommended max 20
  per instance), verify VPC peering configuration, ensure security group allows
  inbound on port 5432. Connection timeout default is 10 seconds.
- Slow query performance: enable query profiling via dashboard, check for missing
  indexes using `EXPLAIN ANALYZE`, review connection pool saturation metrics.
- Deployment failures: check build logs in dashboard, verify Dockerfile or
  buildpack configuration, ensure resource limits are sufficient. Common causes:
  dependency resolution failures, port binding conflicts, health check timeouts.
- Certificate errors: verify custom domain DNS (CNAME or A record), check
  auto-renewal status. Acme uses Let's Encrypt for automatic provisioning.
  Manual certificate upload supported for Enterprise with custom CA.
- Memory pressure: review container memory limits vs actual usage in metrics.
  Enable profiling via `POST /v2/resources/:id/profile?type=memory`. Common
  causes: memory leaks, unbounded caches, large file processing without streaming.

Security policies:
- All data encrypted at rest (AES-256) and in transit (TLS 1.3).
- API keys can be scoped to specific resources and actions.
- IP allowlisting available on Professional and Enterprise tiers.
- MFA required for all admin accounts; optional for team members.
- Audit logs retained for 1 year (Enterprise) or 90 days (Professional).
- Penetration testing: customers may test their own instances with 7 days notice.
- Vulnerability disclosure: security@acme.example.com, PGP key on security page.
- Compliance: SOC2 Type II (annual), HIPAA BAA for Enterprise, GDPR DPA on
  request, ISO 27001 certified. Reports in Settings > Security > Reports.
- Network isolation: VPC peering, private endpoints for AWS/GCP/Azure.
  No public exposure for database resources.
- Secret management: encrypted vault, accessible via `GET /v2/projects/:id/secrets`.
  Injected as env vars at runtime. Versioning with rollback. Rotation reminders.

Escalation rules:
- Billing disputes over $500: escalate to finance@acme.example.com.
- Data loss reports: escalate immediately to incident commander, page on-call SRE.
- Security incidents: escalate to security@acme.example.com with severity rating.
- Legal/compliance requests: route to legal@acme.example.com.
- Feature requests: log in feedback tracker, do not promise timelines.
- SLA breach claims: collect timestamps and affected resources, route to SRE lead.
- Account compromise: immediately disable API keys, force password reset, escalate.

Response guidelines:
- Be concise and direct. Lead with the answer, then explain.
- Include relevant doc links: https://docs.acme.example.com/
- For code examples, use the customer's programming language if known.
- Never share internal pricing, cost structures, or roadmap details.
- Always confirm the customer's tier before recommending tier-specific features.
- For outage-related questions, reference status.acme.example.com for live updates.
- When suggesting config changes, warn about potential downtime or side effects.
- Use ticket number format ACM-XXXXX when referencing support cases.

Mastra

import { readFileSync } from "fs";
import { Agent } from "@mastra/core/agent";

const LLM_MODEL = "openai/gpt-5.4";

// prompt caching is automatic with OpenAI — no providerOptions needed
// identical prefixes >= 1024 tokens are cached and reused
// static content (instructions, examples) should go first for best hit rate

const KNOWLEDGE_BASE = readFileSync("knowledge_base.txt", "utf-8"); // ~4100 tokens

const agent = new Agent({
  name: "support-agent",
  instructions: KNOWLEDGE_BASE,
  model: LLM_MODEL,
});

// request 1: cold cache — prompt is processed and cached automatically
const r1 = await agent.generate("I keep getting 429 errors. What should I do?");
console.log(r1.text);
console.log(`Cached tokens: ${r1.usage.cachedInputTokens ?? 0}`);
// -> Cached tokens: 0 (cache miss — prefix is now stored)

// request 2: warm cache — identical instruction prefix served from cache
const r2 = await agent.generate("How do I fix SSO login failures?");
console.log(r2.text);
console.log(`Cached tokens: ${r2.usage.cachedInputTokens ?? 0}`);
// -> Cached tokens: 3200 (cache hit — lower cost, lower latency)

You are a senior support agent for Acme Cloud Platform.

Product tiers:
- Starter: 10 GB storage, 100 API calls/day, email support, single region,
  shared infrastructure, community forums access, basic monitoring dashboard.
- Professional: 100 GB storage, 10,000 API calls/day, priority email + chat,
  multi-region replication, 99.9% SLA, custom domains, advanced analytics,
  team collaboration (up to 25 seats), webhook integrations, staging environments.
- Enterprise: unlimited storage, unlimited API calls, 24/7 phone support,
  dedicated account manager, SOC2/HIPAA compliance, custom SLA, SSO/SAML,
  on-premise deployment option, audit logging, data residency controls,
  custom integrations, priority feature requests, quarterly business reviews.

Billing policies:
- Monthly billing on the 1st. Annual contracts get 20% discount.
- Overages billed at 1.5x the per-unit rate. Overage alerts at 80% and 95%.
- Downgrades take effect next billing cycle. Upgrades are immediate with prorated credit.
- Refunds within 14 days of charge, minus any usage above the free tier.
- Enterprise contracts require 60-day written cancellation notice.
- Payment methods: credit card, ACH transfer, wire transfer (Enterprise only).
- Invoices available in PDF format from the billing dashboard.
- Tax exemption requires W-9 or equivalent on file before the billing cycle.

API reference:
- Base URL: https://api.acme.example.com/v2
- Authentication: Bearer token in Authorization header.
- Rate limits: per-tier (see above), with burst allowance of 2x for 30 seconds.
- Pagination: cursor-based, max 100 items per page, use `next_cursor` param.
- Versioning: date-based (e.g., `2025-01-15`), set via `Acme-Version` header.
- SDKs available: Python (`pip install acme-sdk`), Node.js (`npm install @acme/sdk`),
  Go (`go get github.com/acme/sdk-go`), Java (Maven Central: `com.acme:sdk`).
- Webhook events: `resource.created`, `resource.updated`, `resource.deleted`,
  `invoice.paid`, `invoice.overdue`, `usage.threshold`, `deployment.status`.
- Idempotency: supply `Idempotency-Key` header for safe retries on POST/PUT.

API endpoints:
- Projects: POST /v2/projects (create), GET /v2/projects (list),
  GET /v2/projects/:id, PATCH /v2/projects/:id (update),
  DELETE /v2/projects/:id. Supports filtering by `status`
  (active, archived, suspended) and `created_after` timestamp.
- Resources: POST /v2/projects/:id/resources,
  GET /v2/projects/:id/resources, GET /v2/resources/:id,
  PATCH /v2/resources/:id, DELETE /v2/resources/:id.
  Resource types: compute, storage, database, cache, queue, function.
  Each resource has `status` (provisioning, running, stopped, error, deleting).
- Deployments: POST /v2/projects/:id/deployments (create from config or Git ref),
  GET /v2/projects/:id/deployments, GET /v2/deployments/:id,
  POST /v2/deployments/:id/rollback (revert to previous version).
  Deployment strategies: rolling (default), blue-green, canary (Enterprise only).
- API Keys: POST /v2/api-keys (create with scopes), GET /v2/api-keys (list),
  DELETE /v2/api-keys/:id (revoke), POST /v2/api-keys/:id/rotate.
  Scopes: read, write, admin, billing. Keys can be restricted to specific projects.
- Webhooks: POST /v2/webhooks (register endpoint), GET /v2/webhooks,
  PATCH /v2/webhooks/:id (update URL or events), DELETE /v2/webhooks/:id,
  POST /v2/webhooks/:id/test (send test payload).
  Each webhook has a signing secret for payload verification.
- Usage: GET /v2/usage/summary (current period), GET /v2/usage/history,
  GET /v2/usage/breakdown (by resource type). Supports `granularity` param:
  hourly, daily, monthly. Data retained for 13 months.
- Audit logs: GET /v2/audit-logs (filterable by actor, action, resource, date).
  Actions tracked: resource.create, resource.delete, config.update, key.rotate,
  member.invite, member.remove, permission.change, billing.update.

Error codes:
- 400 Bad Request: malformed JSON, missing required fields, invalid param values.
  Response includes `errors` array with field-level details and `error_code`.
- 401 Unauthorized: missing or invalid API key. Check `Authorization` header.
  If key was recently rotated, allow 30 seconds for propagation.
- 403 Forbidden: valid key but insufficient scopes. Check key scopes in dashboard.
  For project-scoped keys, verify the resource belongs to an authorized project.
- 404 Not Found: resource does not exist or caller lacks access. Acme returns 404
  (not 403) for resources the caller cannot access to prevent leaking existence.
- 409 Conflict: resource name collision or concurrent modification detected.
  Retry with updated `If-Match` ETag header for optimistic concurrency.
- 422 Unprocessable Entity: valid JSON but semantically invalid (e.g., enabling
  Enterprise features on a Starter plan). Response includes `reason` field.
- 429 Too Many Requests: rate limit exceeded. Check `Retry-After` header for
  wait time. `X-RateLimit-Limit` and `X-RateLimit-Remaining` headers show quota.
  Implement exponential backoff with jitter for retries.
- 500 Internal Server Error: unexpected failure. Include `X-Request-Id` header
  when contacting support. Safe to retry for idempotent requests.
- 502 Bad Gateway: upstream dependency failure. Usually transient, retry after
  5-10 seconds. If persistent, check status.acme.example.com.
- 503 Service Unavailable: planned maintenance or capacity limits. `Retry-After`
  header indicates expected recovery time.

Integration patterns:
- Webhook setup: register endpoint via API or dashboard, implement HMAC-SHA256
  signature verification using webhook signing secret, return 200 within 30s,
  handle duplicate deliveries idempotently (use `X-Acme-Delivery-Id`).
  Failed deliveries retry at 1m, 5m, 30m, 1h intervals.
  Dead letter queue available on Enterprise tier.
- OAuth integration: Acme supports OAuth 2.0 authorization code flow.
  Register app in developer portal for client_id and client_secret.
  Authorization URL: https://auth.acme.example.com/oauth/authorize.
  Token URL: https://auth.acme.example.com/oauth/token.
  Scopes: read:projects, write:projects, read:resources, write:resources,
  read:billing, admin. Tokens expire in 1 hour, refresh tokens in 30 days.
- CI/CD integration: use `ACME_API_KEY` environment variable in pipelines.
  GitHub Actions: `github.com/acme/deploy-action`.
  GitLab CI: include `https://templates.acme.example.com/gitlab-ci.yml`.
  Trigger deployments via `POST /v2/projects/:id/deployments` with `git_ref`.
- Terraform provider: `terraform-provider-acme` on Terraform Registry.
  Supports all resource types. State locking via Acme backend.
  Import existing resources: `terraform import acme_resource.name resource-id`.
- SDK quickstart: install SDK, set `ACME_API_KEY` env var, initialize client.
  Python: `from acme import AcmeClient; client = AcmeClient()`.
  Node.js: `import { AcmeClient } from '@acme/sdk'; const client = new AcmeClient()`.
  All SDKs support automatic retry, request logging, and custom HTTP injection.

Account management:
- Team members: invite via email, assign roles (viewer, editor, admin, owner).
  Viewer: read-only. Editor: create/modify resources. Admin: manage team,
  API keys, billing. Owner: full access including deletion and transfer.
- SSO configuration: Settings > Security > SSO. Supported providers: Okta,
  Azure AD, Google Workspace, OneLogin, PingFederate, custom SAML 2.0.
  SCIM provisioning for automatic user sync (Enterprise only).
  JIT provisioning creates accounts on first SSO login.
- API key management: keys are project-scoped by default. Global keys for admins.
  Rotation generates new secret, old secret valid for 24 hours.
  Key analytics in dashboard: last used, request count, error rate.
- Account deletion: requires owner role, 30-day grace period, all resources
  must be stopped and data exports completed.

Data management:
- Backups: automatic daily snapshots retained for 30 days (Professional) or
  90 days (Enterprise). Manual snapshots via `POST /v2/resources/:id/snapshots`.
  Cross-region replication available on Enterprise.
  Restore: `POST /v2/resources/:id/restore` with `snapshot_id`.
- Data export: full account export via dashboard (Settings > Data > Export).
  Formats: JSON, CSV, Parquet. Large exports processed asynchronously.
  API: `POST /v2/exports` to start, `GET /v2/exports/:id` to check status.
- Data retention: active data retained while account is active. Deleted resources
  purged after 30 days. Audit logs: 90 days (Professional), 1 year (Enterprise).
  Usage data: 13 months. Backups: per tier retention policy.
- Data residency: regions: us-east-1, us-west-2, eu-west-1, eu-central-1,
  ap-southeast-1, ap-northeast-1. Enterprise can restrict data to specific
  regions for compliance. Cross-region replication configurable per resource.

Monitoring and alerting:
- Metrics: CPU utilization, memory usage, network I/O, disk I/O, request latency
  (p50, p95, p99), error rate, active connections. 1-minute granularity,
  retained 15 days (raw) and 13 months (aggregated hourly).
- Alerts: configure via dashboard or `POST /v2/alerts`. Conditions: threshold
  (above/below), anomaly detection (ML-based), rate of change. Channels: email,
  Slack, PagerDuty, OpsGenie, custom webhook. Cooldown: minimum 5 minutes.
- Dashboards: custom drag-and-drop widgets. Types: time series, gauge, table,
  log viewer, status map. Shareable via link or embeddable in external tools.
- Log management: structured JSON logs. Search via dashboard or `GET /v2/logs`.
  Forwarding: Datadog, Splunk, Elasticsearch, CloudWatch, custom HTTP endpoint.
  Retention: 7 days (Starter), 30 days (Professional), 90 days (Enterprise).
- Health checks: HTTP/TCP checks for compute resources. Configure path, interval
  (10s-300s), timeout (2s-30s), healthy/unhealthy thresholds. Auto-restart on fail.

Troubleshooting runbook:
- API 429 errors: check rate limits for current tier, suggest upgrade or implement
  exponential backoff with jitter. Check `X-RateLimit-Remaining` response header.
  If burst limit hit, wait 30 seconds. If sustained limit hit, batch requests.
- Data sync lag > 5 min: check region status page at status.acme.example.com,
  verify replication config in dashboard, then escalate to infra team via PagerDuty.
  Common cause: large batch imports exceeding ingestion throughput.
- SSO login failures: verify SAML metadata URL, check certificate expiry, confirm
  ACS URL matches dashboard config (must use HTTPS). Common fix: re-upload IdP
  metadata. If using OIDC, verify redirect URI whitelist and token endpoint.
  Check clock skew between IdP and SP (must be < 5 minutes).
- Storage quota warnings: recommend archival policy (auto-archive after 90 days),
  enable compression for text-heavy datasets, or upgrade tier. Check for orphaned
  uploads via `GET /v2/storage/orphaned` endpoint.
- Webhook delivery failures: verify endpoint returns 2xx within 30 seconds,
  check retry logs in dashboard (3 retries with exponential backoff over 1 hour).
  Verify TLS certificate is valid. Check `X-Acme-Signature` validation logic.
  Failed webhooks are stored for 72 hours and can be replayed from the dashboard.
- Database connection errors: check connection pool settings (recommended max 20
  per instance), verify VPC peering configuration, ensure security group allows
  inbound on port 5432. Connection timeout default is 10 seconds.
- Slow query performance: enable query profiling via dashboard, check for missing
  indexes using `EXPLAIN ANALYZE`, review connection pool saturation metrics.
- Deployment failures: check build logs in dashboard, verify Dockerfile or
  buildpack configuration, ensure resource limits are sufficient. Common causes:
  dependency resolution failures, port binding conflicts, health check timeouts.
- Certificate errors: verify custom domain DNS (CNAME or A record), check
  auto-renewal status. Acme uses Let's Encrypt for automatic provisioning.
  Manual certificate upload supported for Enterprise with custom CA.
- Memory pressure: review container memory limits vs actual usage in metrics.
  Enable profiling via `POST /v2/resources/:id/profile?type=memory`. Common
  causes: memory leaks, unbounded caches, large file processing without streaming.

Security policies:
- All data encrypted at rest (AES-256) and in transit (TLS 1.3).
- API keys can be scoped to specific resources and actions.
- IP allowlisting available on Professional and Enterprise tiers.
- MFA required for all admin accounts; optional for team members.
- Audit logs retained for 1 year (Enterprise) or 90 days (Professional).
- Penetration testing: customers may test their own instances with 7 days notice.
- Vulnerability disclosure: security@acme.example.com, PGP key on security page.
- Compliance: SOC2 Type II (annual), HIPAA BAA for Enterprise, GDPR DPA on
  request, ISO 27001 certified. Reports in Settings > Security > Reports.
- Network isolation: VPC peering, private endpoints for AWS/GCP/Azure.
  No public exposure for database resources.
- Secret management: encrypted vault, accessible via `GET /v2/projects/:id/secrets`.
  Injected as env vars at runtime. Versioning with rollback. Rotation reminders.

Escalation rules:
- Billing disputes over $500: escalate to finance@acme.example.com.
- Data loss reports: escalate immediately to incident commander, page on-call SRE.
- Security incidents: escalate to security@acme.example.com with severity rating.
- Legal/compliance requests: route to legal@acme.example.com.
- Feature requests: log in feedback tracker, do not promise timelines.
- SLA breach claims: collect timestamps and affected resources, route to SRE lead.
- Account compromise: immediately disable API keys, force password reset, escalate.

Response guidelines:
- Be concise and direct. Lead with the answer, then explain.
- Include relevant doc links: https://docs.acme.example.com/
- For code examples, use the customer's programming language if known.
- Never share internal pricing, cost structures, or roadmap details.
- Always confirm the customer's tier before recommending tier-specific features.
- For outage-related questions, reference status.acme.example.com for live updates.
- When suggesting config changes, warn about potential downtime or side effects.
- Use ticket number format ACM-XXXXX when referencing support cases.