Azure AI Foundry with Anthropic Claude Part 7: Production Patterns – Monitoring, Security, and Optimization

Azure AI Foundry with Anthropic Claude Part 7: Production Patterns – Monitoring, Security, and Optimization

Deploying Claude in production requires comprehensive monitoring, cost optimization, security hardening, and scaling strategies. This final part covers production-ready patterns for Azure AI Foundry with Claude, including Application Insights integration, prompt caching optimization, security best practices, and architectural patterns for high-availability systems.

Architecture Patterns

graph TB
    Client[Client Application]
    APIM[Azure API Management]
    AppService[Azure App Service]
    KeyVault[Azure Key Vault]
    Foundry[Azure AI Foundry]
    AppInsights[Application Insights]
    Cache[Azure Cache for Redis]
    
    Client -->|HTTPS| APIM
    APIM -->|Rate Limit & Auth| AppService
    AppService -->|Get Credentials| KeyVault
    AppService -->|Check Cache| Cache
    AppService -->|API Calls| Foundry
    AppService -->|Telemetry| AppInsights
    Foundry -->|Claude Models| AppService
    
    style Foundry fill:#0078d4
    style AppInsights fill:#68217a
    style KeyVault fill:#0078d4

Application Insights Integration

Node.js Implementation

import * as appInsights from 'applicationinsights';
import { AnthropicFoundry } from '@anthropic-ai/sdk/foundry';

// Initialize Application Insights
appInsights.setup(process.env.APPLICATIONINSIGHTS_CONNECTION_STRING)
  .setAutoCollectRequests(true)
  .setAutoCollectPerformance(true)
  .setAutoCollectExceptions(true)
  .setAutoCollectDependencies(true)
  .start();

const client = appInsights.defaultClient;

class MonitoredClaudeClient {
  private client: AnthropicFoundry;
  
  constructor() {
    this.client = new AnthropicFoundry({
      credential: new DefaultAzureCredential(),
      resourceName: process.env.AZURE_FOUNDRY_RESOURCE!
    });
  }
  
  async chat(message: string, options: ChatOptions = {}): Promise {
    const startTime = Date.now();
    
    try {
      const response = await this.client.messages.create({
        model: options.model || 'claude-sonnet-4-5',
        max_tokens: options.maxTokens || 2048,
        messages: [{ role: 'user', content: message }]
      });
      
      const duration = Date.now() - startTime;
      
      // Track success metrics
      client.trackMetric({
        name: 'Claude_API_Duration',
        value: duration
      });
      
      client.trackMetric({
        name: 'Claude_Input_Tokens',
        value: response.usage.input_tokens
      });
      
      client.trackMetric({
        name: 'Claude_Output_Tokens',
        value: response.usage.output_tokens
      });
      
      client.trackEvent({
        name: 'Claude_API_Success',
        properties: {
          model: options.model || 'claude-sonnet-4-5',
          duration: duration.toString()
        }
      });
      
      return response.content[0].text;
      
    } catch (error) {
      // Track failures
      client.trackException({ exception: error as Error });
      
      client.trackEvent({
        name: 'Claude_API_Failure',
        properties: {
          error: (error as Error).message,
          model: options.model || 'claude-sonnet-4-5'
        }
      });
      
      throw error;
    }
  }
}

Python Implementation

from opencensus.ext.azure.log_exporter import AzureLogHandler
from opencensus.ext.azure.trace_exporter import AzureExporter
from opencensus.trace.samplers import ProbabilitySampler
from opencensus.trace.tracer import Tracer
import logging
import time

# Configure logging
logger = logging.getLogger(__name__)
logger.addHandler(AzureLogHandler(
    connection_string=os.environ['APPLICATIONINSIGHTS_CONNECTION_STRING']
))

# Configure tracing
tracer = Tracer(
    exporter=AzureExporter(
        connection_string=os.environ['APPLICATIONINSIGHTS_CONNECTION_STRING']
    ),
    sampler=ProbabilitySampler(1.0)
)

class MonitoredClaudeClient:
    def __init__(self):
        self.client = AsyncAnthropicFoundry(
            credential=DefaultAzureCredential(),
            resource_name=os.environ['AZURE_FOUNDRY_RESOURCE']
        )
    
    async def chat(self, message: str, model: str = 'claude-sonnet-4-5') -> str:
        with tracer.span(name='Claude_API_Call') as span:
            start_time = time.time()
            
            try:
                response = await self.client.messages.create(
                    model=model,
                    max_tokens=2048,
                    messages=[{'role': 'user', 'content': message}]
                )
                
                duration = time.time() - start_time
                
                # Log metrics
                logger.info('Claude API Success', extra={
                    'custom_dimensions': {
                        'model': model,
                        'duration': duration,
                        'input_tokens': response.usage.input_tokens,
                        'output_tokens': response.usage.output_tokens
                    }
                })
                
                span.add_attribute('model', model)
                span.add_attribute('duration', duration)
                
                return response.content[0].text
                
            except Exception as e:
                logger.exception('Claude API Failure', extra={
                    'custom_dimensions': {
                        'model': model,
                        'error': str(e)
                    }
                })
                raise

Cost Optimization with Prompt Caching

interface CacheConfig {
  type: 'ephemeral' | 'extended';
  minTokens: number;
}

class CostOptimizedClient {
  private client: AnthropicFoundry;
  private cacheConfig: CacheConfig;
  
  constructor(cacheConfig: CacheConfig = { type: 'ephemeral', minTokens: 1024 }) {
    this.client = new AnthropicFoundry({
      credential: new DefaultAzureCredential(),
      resourceName: process.env.AZURE_FOUNDRY_RESOURCE!
    });
    this.cacheConfig = cacheConfig;
  }
  
  async chatWithContext(
    systemPrompt: string,
    userMessage: string
  ): Promise {
    // Calculate if caching is beneficial
    const systemTokens = this.estimateTokens(systemPrompt);
    
    const response = await this.client.messages.create({
      model: 'claude-sonnet-4-5',
      max_tokens: 2048,
      system: systemTokens >= this.cacheConfig.minTokens
        ? [{
            type: 'text',
            text: systemPrompt,
            cache_control: { type: this.cacheConfig.type }
          }]
        : systemPrompt,
      messages: [{ role: 'user', content: userMessage }]
    });
    
    // Log cache performance
    if (response.usage.cache_read_input_tokens) {
      console.log(`Cache hit! Saved ${response.usage.cache_read_input_tokens} tokens`);
      const savings = this.calculateSavings(response.usage);
      console.log(`Cost savings: $${savings.toFixed(4)}`);
    }
    
    return response.content[0].text;
  }
  
  private estimateTokens(text: string): number {
    return Math.ceil(text.length / 4);
  }
  
  private calculateSavings(usage: any): number {
    const inputPrice = 3.00 / 1_000_000;  // Sonnet 4.5 input
    const cacheReadPrice = 0.30 / 1_000_000;  // 90% discount
    
    const cachedTokens = usage.cache_read_input_tokens || 0;
    const fullCost = cachedTokens * inputPrice;
    const cachedCost = cachedTokens * cacheReadPrice;
    
    return fullCost - cachedCost;
  }
}

Security Best Practices

Key Vault Integration

import { SecretClient } from '@azure/keyvault-secrets';
import { DefaultAzureCredential } from '@azure/identity';

class SecureClaudeClient {
  private client: AnthropicFoundry | null = null;
  private credential: DefaultAzureCredential;
  private keyVaultUrl: string;
  
  constructor(keyVaultUrl: string) {
    this.credential = new DefaultAzureCredential();
    this.keyVaultUrl = keyVaultUrl;
  }
  
  async initialize(): Promise {
    const secretClient = new SecretClient(
      this.keyVaultUrl,
      this.credential
    );
    
    // Retrieve API key from Key Vault
    const secret = await secretClient.getSecret('foundry-api-key');
    
    this.client = new AnthropicFoundry({
      credential: this.credential,
      resourceName: process.env.AZURE_FOUNDRY_RESOURCE!
    });
  }
  
  async chat(message: string): Promise {
    if (!this.client) {
      throw new Error('Client not initialized');
    }
    
    const response = await this.client.messages.create({
      model: 'claude-sonnet-4-5',
      max_tokens: 2048,
      messages: [{ role: 'user', content: message }]
    });
    
    return response.content[0].text;
  }
}

Rate Limiting

import Bottleneck from 'bottleneck';

class RateLimitedClient {
  private client: AnthropicFoundry;
  private limiter: Bottleneck;
  
  constructor() {
    this.client = new AnthropicFoundry({
      credential: new DefaultAzureCredential(),
      resourceName: process.env.AZURE_FOUNDRY_RESOURCE!
    });
    
    // Configure rate limiter (80,000 TPM / 800 RPM for Sonnet 4.5)
    this.limiter = new Bottleneck({
      reservoir: 800,              // Max requests
      reservoirRefreshAmount: 800,
      reservoirRefreshInterval: 60 * 1000,  // Per minute
      maxConcurrent: 10            // Concurrent requests
    });
  }
  
  async chat(message: string): Promise {
    return this.limiter.schedule(async () => {
      const response = await this.client.messages.create({
        model: 'claude-sonnet-4-5',
        max_tokens: 2048,
        messages: [{ role: 'user', content: message }]
      });
      
      return response.content[0].text;
    });
  }
}

High Availability Pattern

class HighAvailabilityClient {
  private clients: AnthropicFoundry[];
  private currentIndex: number = 0;
  
  constructor(resources: string[]) {
    this.clients = resources.map(resource => 
      new AnthropicFoundry({
        credential: new DefaultAzureCredential(),
        resourceName: resource
      })
    );
  }
  
  async chat(message: string, maxRetries: number = 3): Promise {
    let lastError: Error | null = null;
    
    for (let attempt = 0; attempt < maxRetries; attempt++) {
      const client = this.getNextClient();
      
      try {
        const response = await client.messages.create({
          model: 'claude-sonnet-4-5',
          max_tokens: 2048,
          messages: [{ role: 'user', content: message }]
        });
        
        return response.content[0].text;
        
      } catch (error) {
        lastError = error as Error;
        console.warn(`Attempt ${attempt + 1} failed, trying next resource`);
        await this.delay(Math.pow(2, attempt) * 1000);
      }
    }
    
    throw new Error(`All retry attempts failed: ${lastError?.message}`);
  }
  
  private getNextClient(): AnthropicFoundry {
    const client = this.clients[this.currentIndex];
    this.currentIndex = (this.currentIndex + 1) % this.clients.length;
    return client;
  }
  
  private delay(ms: number): Promise {
    return new Promise(resolve => setTimeout(resolve, ms));
  }
}

Production Checklist

  • Monitoring: Application Insights telemetry, custom metrics, alerting
  • Security: Key Vault for secrets, Entra ID authentication, network isolation
  • Cost Optimization: Prompt caching (ephemeral/extended), model selection strategy
  • Reliability: Retry logic with exponential backoff, circuit breakers, health checks
  • Rate Limiting: Client-side throttling, quota monitoring, burst handling
  • High Availability: Multi-region deployment, failover strategy, load balancing
  • Compliance: Content filtering, audit logging, data residency requirements

Conclusion

This seven-part series provided comprehensive coverage of Azure AI Foundry with Claude integration, from strategic overview through production deployment. You now have production-ready patterns across Node.js, Python, and C#, with DevOps automation and enterprise-grade monitoring, security, and optimization strategies.

References

Written by:

535 Posts

View All Posts
Follow Me :
How to whitelist website on AdBlocker?

How to whitelist website on AdBlocker?

  1. 1 Click on the AdBlock Plus icon on the top right corner of your browser
  2. 2 Click on "Enabled on this site" from the AdBlock Plus option
  3. 3 Refresh the page and start browsing the site