API 调用实践
2163 字约 7 分钟
2026-05-20
直接调用 API 是最灵活的 AI 集成方式,适合构建定制化应用。本文以 Anthropic API(Claude)为主,兼顾 OpenAI 兼容接口。
1. 环境准备
# 安装 SDK
pip install anthropic # Anthropic
pip install openai # OpenAI
# 设置 API Key(推荐用环境变量,不要硬编码)
export ANTHROPIC_API_KEY="sk-ant-..."
export OPENAI_API_KEY="sk-..."import anthropic
import os
# SDK 会自动读取 ANTHROPIC_API_KEY 环境变量
client = anthropic.Anthropic()
# 或者显式传入(不推荐在生产代码中这样做)
client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))2. 基础消息 API
2.1 单轮对话
message = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[
{"role": "user", "content": "解释一下什么是向量数据库"}
]
)
# 响应结构
print(message.content[0].text) # 回复文本
print(message.model) # 使用的模型
print(message.usage.input_tokens) # 输入 token 数
print(message.usage.output_tokens) # 输出 token 数
print(message.stop_reason) # 停止原因: end_turn, max_tokens, tool_use2.2 带 System Prompt
message = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
system="你是一位资深 Python 工程师,回答要简洁直接,提供可运行的代码示例",
messages=[
{"role": "user", "content": "怎么用 asyncio 并发请求多个 API?"}
]
)2.3 多轮对话(维护历史)
class ConversationManager:
def __init__(self, system_prompt: str = "", model: str = "claude-sonnet-4-6"):
self.client = anthropic.Anthropic()
self.system = system_prompt
self.model = model
self.history = []
def chat(self, user_message: str) -> str:
self.history.append({"role": "user", "content": user_message})
response = self.client.messages.create(
model=self.model,
max_tokens=2048,
system=self.system,
messages=self.history
)
assistant_message = response.content[0].text
self.history.append({"role": "assistant", "content": assistant_message})
return assistant_message
def clear_history(self):
self.history = []
def get_token_count(self) -> int:
"""估算当前对话历史的 token 数"""
# 简单估算:每个字符约 0.5 个 token
return sum(len(m["content"]) for m in self.history) // 2
# 使用
conv = ConversationManager(system_prompt="你是一个数据分析助手")
print(conv.chat("我有一份销售数据,想分析季节性趋势"))
print(conv.chat("可以用什么可视化方式?"))
print(conv.chat("给我一个 matplotlib 的代码示例"))3. 流式输出(Streaming)
对于长回复,流式输出让用户立即看到内容,提升体验:
import sys
def stream_response(prompt: str, system: str = ""):
"""实时打印流式回复"""
with client.messages.stream(
model="claude-sonnet-4-6",
max_tokens=2048,
system=system,
messages=[{"role": "user", "content": prompt}]
) as stream:
full_response = ""
for text in stream.text_stream:
print(text, end="", flush=True)
full_response += text
print() # 换行
# 获取最终消息(含 token 统计)
final_message = stream.get_final_message()
print(f"\n[Token 消耗: 输入={final_message.usage.input_tokens}, "
f"输出={final_message.usage.output_tokens}]")
return full_response
# 异步流式(用于 FastAPI、async 应用)
import asyncio
async def async_stream(prompt: str):
async with client.messages.stream(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{"role": "user", "content": prompt}]
) as stream:
async for text in stream.text_stream:
yield text
# FastAPI SSE 端点示例
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
app = FastAPI()
@app.post("/chat/stream")
async def chat_stream(request: dict):
async def generate():
async for chunk in async_stream(request["message"]):
yield f"data: {chunk}\n\n"
yield "data: [DONE]\n\n"
return StreamingResponse(generate(), media_type="text/event-stream")4. 工具调用(Tool Use / Function Calling)
让模型能调用外部函数,是构建 Agent 的基础。
4.1 定义工具
tools = [
{
"name": "get_stock_price",
"description": "获取指定股票的当前价格和基本信息。只用于查询股票数据,不能用于预测涨跌。",
"input_schema": {
"type": "object",
"properties": {
"symbol": {
"type": "string",
"description": "股票代码,如 AAPL、TSLA、000001.SZ"
},
"currency": {
"type": "string",
"enum": ["USD", "CNY", "HKD"],
"description": "返回价格的货币单位,默认 USD"
}
},
"required": ["symbol"]
}
},
{
"name": "execute_python",
"description": "执行 Python 代码并返回结果。用于数学计算、数据处理等。",
"input_schema": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "要执行的 Python 代码"
}
},
"required": ["code"]
}
}
]4.2 完整工具调用循环
import json
def execute_tool(tool_name: str, tool_input: dict) -> str:
"""实际执行工具的函数"""
if tool_name == "get_stock_price":
# 实际调用你的股票 API
symbol = tool_input["symbol"]
# return your_stock_api.get_price(symbol)
return json.dumps({"symbol": symbol, "price": 150.25, "currency": "USD"})
elif tool_name == "execute_python":
import io, contextlib
output = io.StringIO()
try:
with contextlib.redirect_stdout(output):
exec(tool_input["code"])
return output.getvalue() or "代码执行成功(无输出)"
except Exception as e:
return f"执行错误:{str(e)}"
return f"工具 {tool_name} 未实现"
def run_agent_loop(user_message: str, system: str = "", max_iterations: int = 10):
"""完整的工具调用 Agent 循环"""
messages = [{"role": "user", "content": user_message}]
for iteration in range(max_iterations):
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=4096,
system=system,
tools=tools,
messages=messages
)
print(f"\n[迭代 {iteration+1}] stop_reason: {response.stop_reason}")
# 任务完成
if response.stop_reason == "end_turn":
# 提取最终文本回复
for block in response.content:
if hasattr(block, "text"):
return block.text
# 需要调用工具
elif response.stop_reason == "tool_use":
# 将模型响应加入历史
messages.append({"role": "assistant", "content": response.content})
# 执行所有工具调用
tool_results = []
for block in response.content:
if block.type == "tool_use":
print(f" → 调用工具: {block.name}({json.dumps(block.input, ensure_ascii=False)})")
result = execute_tool(block.name, block.input)
print(f" ← 结果: {result[:200]}...")
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": result
})
# 将工具结果加入历史
messages.append({"role": "user", "content": tool_results})
else:
break
return "达到最大迭代次数,未能完成任务"
# 使用
result = run_agent_loop(
"苹果公司当前股价是多少?把股价乘以2,再加100,结果是多少?",
system="你是一个金融分析助手,可以查询股价和做数学计算。"
)
print(f"\n最终回复:{result}")4.3 并行工具调用
模型会在一次响应中请求多个工具:
# Claude 会自动决定哪些工具可以并行调用
# 例如:"帮我同时查 AAPL 和 GOOGL 的股价"
# 模型会在一次响应中请求两个 get_stock_price 调用5. 成本优化
5.1 Prompt Caching(Anthropic 特有)
对频繁重复使用的长 System Prompt 开启缓存,节省约 90% 的输入 token 成本:
# 带缓存控制的 System Prompt
system_with_cache = [
{
"type": "text",
"text": """你是一个法律助手,专门解读以下法律文件:
[插入大量法律条文,可能有数万字...]
以上是你需要参考的完整法律文件。""",
"cache_control": {"type": "ephemeral"} # 标记为可缓存
}
]
# 第一次调用:正常收费
response1 = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
system=system_with_cache,
messages=[{"role": "user", "content": "第144条说的是什么?"}]
)
print(f"缓存创建: {response1.usage.cache_creation_input_tokens} tokens")
# 后续调用:System Prompt 部分命中缓存,只收 10% 的费用
response2 = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
system=system_with_cache,
messages=[{"role": "user", "content": "第88条呢?"}]
)
print(f"缓存命中: {response2.usage.cache_read_input_tokens} tokens") # 免费的那部分5.2 选择合适的模型
# 根据任务复杂度选模型
def smart_call(task_type: str, prompt: str) -> str:
model_map = {
"simple": "claude-haiku-4-5", # 简单问答、分类、翻译
"standard": "claude-sonnet-4-6", # 大多数任务
"complex": "claude-opus-4-7" # 复杂推理、代码生成
}
model = model_map.get(task_type, "claude-sonnet-4-6")
response = client.messages.create(
model=model,
max_tokens=1024,
messages=[{"role": "user", "content": prompt}]
)
return response.content[0].text
# 粗略成本参考(每百万 tokens)
COST = {
"claude-haiku-4-5": {"input": 0.8, "output": 4.0},
"claude-sonnet-4-6": {"input": 3.0, "output": 15.0},
"claude-opus-4-7": {"input": 15.0, "output": 75.0},
}5.3 批处理 API(Batch API)
对非实时任务,批处理 API 提供 50% 折扣,24小时内处理:
# 批量处理:适合大量离线任务
batch = client.messages.batches.create(
requests=[
{
"custom_id": f"task_{i}",
"params": {
"model": "claude-sonnet-4-6",
"max_tokens": 512,
"messages": [{"role": "user", "content": f"分析评论:{review}"}]
}
}
for i, review in enumerate(reviews)
]
)
print(f"批次 ID: {batch.id}")
# 轮询查询结果
import time
while True:
status = client.messages.batches.retrieve(batch.id)
if status.processing_status == "ended":
break
time.sleep(60) # 等1分钟再查
# 获取结果
for result in client.messages.batches.results(batch.id):
if result.result.type == "succeeded":
print(result.custom_id, result.result.message.content[0].text)6. 错误处理与重试
import time
import anthropic
from anthropic import (
APIError, APIConnectionError, APITimeoutError,
RateLimitError, APIStatusError
)
def robust_call(messages, system="", max_retries=3):
"""带重试和错误处理的 API 调用"""
for attempt in range(max_retries):
try:
return client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
system=system,
messages=messages
)
except RateLimitError as e:
# 速率限制:等待后重试
wait_time = 2 ** attempt # 指数退避
print(f"速率限制,等待 {wait_time}s 后重试...")
time.sleep(wait_time)
except APITimeoutError:
# 超时:重试
if attempt < max_retries - 1:
print(f"请求超时,重试 ({attempt+1}/{max_retries})")
time.sleep(1)
else:
raise
except APIStatusError as e:
if e.status_code == 529: # Overloaded
time.sleep(10)
elif e.status_code >= 500: # 服务器错误,可重试
time.sleep(2 ** attempt)
else: # 4xx 客户端错误,不重试
raise
except APIConnectionError:
# 网络问题:重试
time.sleep(2 ** attempt)
raise Exception("超过最大重试次数")7. 实用模式
7.1 结构化输出
强制模型返回 JSON(用于程序处理):
import json
def extract_structured_data(text: str, schema_desc: str) -> dict:
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
system=f"""你是一个数据提取助手。
始终以合法 JSON 格式输出,不包含任何其他内容。
输出结构:{schema_desc}""",
messages=[{"role": "user", "content": f"从以下文本提取数据:\n\n{text}"}]
)
try:
return json.loads(response.content[0].text)
except json.JSONDecodeError:
# 清理可能的 markdown 格式
text = response.content[0].text.strip()
if text.startswith("```"):
text = text.split("```")[1]
if text.startswith("json\n"):
text = text[5:]
return json.loads(text)
# 使用示例
result = extract_structured_data(
"张三,35岁,软件工程师,在北京工作,联系方式:13800138000",
'{"name": "string", "age": "int", "job": "string", "city": "string", "phone": "string"}'
)
print(result) # {'name': '张三', 'age': 35, ...}7.2 多模态(图像输入)
import base64
from pathlib import Path
def analyze_image(image_path: str, question: str) -> str:
# 读取图像并转为 base64
image_data = base64.standard_b64encode(
Path(image_path).read_bytes()
).decode("utf-8")
# 推断 MIME 类型
suffix = Path(image_path).suffix.lower()
media_types = {".jpg": "image/jpeg", ".jpeg": "image/jpeg",
".png": "image/png", ".gif": "image/gif", ".webp": "image/webp"}
media_type = media_types.get(suffix, "image/jpeg")
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": media_type,
"data": image_data,
},
},
{
"type": "text",
"text": question
}
],
}]
)
return response.content[0].text
# 使用
result = analyze_image("chart.png", "这张图表展示了什么趋势?有什么值得注意的异常点?")7.3 Token 计数(成本预估)
# 调用前计算 token 数量(不产生实际回复)
token_count = client.messages.count_tokens(
model="claude-sonnet-4-6",
system=system_prompt,
messages=messages
)
print(f"预计输入 tokens: {token_count.input_tokens}")
# 按当前价格估算成本
input_cost = token_count.input_tokens * 3.0 / 1_000_000 # $3/M tokens for Sonnet
print(f"预计输入成本: ${input_cost:.4f}")