Examples

Real-world code examples

Copy-paste examples for common scenarios. Each example is production-ready and includes error handling, best practices, and comments to help you understand what's happening.

Gemini: Complex reasoning task

Architecture design with high reasoning

Use Gemini's advanced reasoning for system design tasks

from openai import OpenAI
import os

# Initialize client
client = OpenAI(
    api_key=os.environ["ZAGUAN_API_KEY"],
    base_url="https://api.zaguanai.com/v1",
)

# Complex reasoning task
response = client.chat.completions.create(
    model="google/gemini-2.5-pro",
    messages=[
        {
            "role": "user",
            "content": """Design a fault-tolerant microservices architecture 
            for an e-commerce platform that handles 1M requests/day. Include:
            - Service boundaries
            - Data consistency strategies
            - Failure handling
            - Scaling approach"""
        }
    ],
    temperature=0.7,
    max_tokens=4096,
    extra_body={
        "reasoning_effort": "high"  # Maximum reasoning for complex task
    }
)

print(response.choices[0].message.content)

# Check token usage
print(f"\nTokens used: {response.usage.total_tokens}")
print(f"Finish reason: {response.choices[0].finish_reason}")

Gemini: Streaming with reasoning

Real-time response with thinking

Stream responses for better UX while using reasoning

from openai import OpenAI
import os

client = OpenAI(
    api_key=os.environ["ZAGUAN_API_KEY"],
    base_url="https://api.zaguanai.com/v1",
)

# Stream with reasoning enabled
stream = client.chat.completions.create(
    model="google/gemini-2.5-flash",
    messages=[
        {"role": "user", "content": "Explain quantum entanglement simply"}
    ],
    stream=True,
    extra_body={
        "reasoning_effort": "medium"
    }
)

# Process stream
print("Response: ", end="", flush=True)
for chunk in stream:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end="", flush=True)
print()  # New line at end

Gemini: Custom safety settings

Adjust content filtering for your use case

Configure safety thresholds for different harm categories

from openai import OpenAI
import os

client = OpenAI(
    api_key=os.environ["ZAGUAN_API_KEY"],
    base_url="https://api.zaguanai.com/v1",
)

# Example: Video game dialogue with relaxed dangerous content filter
response = client.chat.completions.create(
    model="google/gemini-2.0-flash",
    messages=[
        {"role": "user", "content": "Generate dialogue for a fantasy battle scene"}
    ],
    extra_body={
        "google": {
            "safety_settings": [
                {
                    "category": "HARM_CATEGORY_HARASSMENT",
                    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
                },
                {
                    "category": "HARM_CATEGORY_HATE_SPEECH",
                    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
                },
                {
                    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
                    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
                },
                {
                    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
                    "threshold": "BLOCK_ONLY_HIGH"  # More permissive for game content
                }
            ]
        }
    }
)

print(response.choices[0].message.content)

# Check if content was blocked
if response.choices[0].finish_reason == "SAFETY":
    print("\nWarning: Response was blocked due to safety filters")
    # Access safety ratings if needed
    # Note: Zaguán normalizes this to OpenAI format

Claude: Vision analysis

Analyze architecture diagrams

Use Claude's vision capabilities for image analysis

from openai import OpenAI
import base64
import os

client = OpenAI(
    api_key=os.environ["ZAGUAN_API_KEY"],
    base_url="https://api.zaguanai.com/v1",
)

# Load and encode image
def encode_image(image_path):
    with open(image_path, "rb") as f:
        return base64.b64encode(f.read()).decode()

image_data = encode_image("architecture_diagram.png")

# Analyze image
response = client.chat.completions.create(
    model="anthropic/claude-3-5-sonnet-20241022",
    messages=[
        {
            "role": "system",
            "content": "You are an expert software architect."
        },
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "Analyze this architecture diagram and suggest improvements"
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/png;base64,{image_data}"
                    }
                }
            ]
        }
    ],
    max_tokens=8192
)

print(response.choices[0].message.content)

Multi-turn conversation with function calling

Weather assistant with tools

Complete example of function calling with conversation history

from openai import OpenAI
import json
import os

client = OpenAI(
    api_key=os.environ["ZAGUAN_API_KEY"],
    base_url="https://api.zaguanai.com/v1",
)

# Define your function
def get_weather(city: str) -> dict:
    """Simulated weather API call"""
    # In production, call a real weather API
    return {
        "city": city,
        "temperature": 22,
        "condition": "sunny",
        "humidity": 65
    }

# Define tools
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get current weather for a city",
            "parameters": {
                "type": "object",
                "properties": {
                    "city": {
                        "type": "string",
                        "description": "City name"
                    }
                },
                "required": ["city"]
            }
        }
    }
]

# Start conversation
messages = [
    {"role": "user", "content": "What's the weather in Tokyo and Paris?"}
]

# First API call
response = client.chat.completions.create(
    model="google/gemini-2.0-flash",
    messages=messages,
    tools=tools,
    tool_choice="auto"
)

# Handle tool calls
response_message = response.choices[0].message
messages.append(response_message)

if response_message.tool_calls:
    # Execute each tool call
    for tool_call in response_message.tool_calls:
        function_name = tool_call.function.name
        function_args = json.loads(tool_call.function.arguments)
        
        # Call the function
        if function_name == "get_weather":
            result = get_weather(**function_args)
            
            # Add function result to messages
            messages.append({
                "role": "tool",
                "tool_call_id": tool_call.id,
                "content": json.dumps(result)
            })
    
    # Get final response
    final_response = client.chat.completions.create(
        model="google/gemini-2.0-flash",
        messages=messages
    )
    
    print(final_response.choices[0].message.content)
else:
    print(response_message.content)

Error handling & retries

Production-ready error handling

Handle errors gracefully with exponential backoff

from openai import OpenAI, APIError, RateLimitError, APITimeoutError
import time
import os

client = OpenAI(
    api_key=os.environ["ZAGUAN_API_KEY"],
    base_url="https://api.zaguanai.com/v1",
)

def create_completion_with_retry(
    model: str,
    messages: list,
    max_retries: int = 3,
    **kwargs
):
    """Create completion with exponential backoff retry"""
    
    for attempt in range(max_retries):
        try:
            response = client.chat.completions.create(
                model=model,
                messages=messages,
                **kwargs
            )
            
            # Check if response was truncated
            if response.choices[0].finish_reason == "length":
                print("Warning: Response truncated. Consider increasing max_tokens")
            
            return response
            
        except RateLimitError as e:
            if attempt == max_retries - 1:
                raise
            wait_time = 2 ** attempt  # Exponential backoff
            print(f"Rate limited. Retrying in {wait_time}s...")
            time.sleep(wait_time)
            
        except APITimeoutError as e:
            if attempt == max_retries - 1:
                raise
            print(f"Timeout. Retrying (attempt {attempt + 1}/{max_retries})...")
            time.sleep(1)
            
        except APIError as e:
            print(f"API error: {e}")
            raise

# Usage
try:
    response = create_completion_with_retry(
        model="anthropic/claude-3-5-sonnet-20241022",
        messages=[
            {"role": "user", "content": "Write a short story"}
        ],
        max_tokens=4096,
        temperature=0.7
    )
    print(response.choices[0].message.content)
    
except Exception as e:
    print(f"Failed after retries: {e}")

TypeScript/Node.js example

Gemini reasoning in TypeScript

Use advanced features in Node.js applications

import OpenAI from "openai";

const client = new OpenAI({
  apiKey: process.env.ZAGUAN_API_KEY,
  baseURL: "https://api.zaguanai.com/v1",
});

async function analyzeCode(code: string): Promise<string> {
  try {
    const response = await client.chat.completions.create({
      model: "google/gemini-2.5-pro",
      messages: [
        {
          role: "system",
          content: "You are an expert code reviewer.",
        },
        {
          role: "user",
          content: `Review this code and suggest improvements:\n\n${code}`,
        },
      ],
      temperature: 0.3,
      max_tokens: 2048,
      // @ts-ignore - extra_body is valid but not in types
      extra_body: {
        reasoning_effort: "high",
      },
    });

    const content = response.choices[0].message.content;
    if (!content) {
      throw new Error("No response content");
    }

    // Log usage
    console.log(`Tokens used: ${response.usage?.total_tokens}`);
    
    return content;
  } catch (error) {
    if (error instanceof OpenAI.APIError) {
      console.error(`API Error: ${error.status} - ${error.message}`);
    }
    throw error;
  }
}

// Usage
const sampleCode = `
function calculateTotal(items) {
  let total = 0;
  for (let i = 0; i < items.length; i++) {
    total += items[i].price;
  }
  return total;
}
`;

analyzeCode(sampleCode)
  .then((review) => console.log(review))
  .catch((error) => console.error("Failed:", error));

Tips for adapting examples

  • Environment variables: Always use environment variables for API keys. Never hardcode them in your source code.
  • Error handling: The retry example shows production-ready error handling. Adapt the backoff strategy to your needs.
  • Token limits: Adjust max_tokens based on your use case. Start conservative and increase if needed.
  • Model selection: Swap model names to test different providers. The code structure stays the same.
  • Logging: Add logging for token usage and finish reasons to monitor costs and quality.
💡 Pro tip: Start with the simplest example that matches your use case. Get it working first, then add advanced features like reasoning or vision. This helps you isolate issues and understand the impact of each feature.