Welcome to Part 7B of our NGINX on Ubuntu series! We’ll implement health checks, monitoring, and failover strategies for robust load balancing.
Health Check Basics
graph TD A[Health Checks] --> B[Passive] A --> C[Active] B --> D[Monitor Real Requests] C --> E[Periodic Probes] style A fill:#e1f5fe style B fill:#e8f5e8 style C fill:#e8f5e8
Passive Health Checks
upstream backend_health {
server 192.168.1.10:3000 max_fails=3 fail_timeout=30s;
server 192.168.1.11:3000 max_fails=3 fail_timeout=30s;
server 192.168.1.12:3000 backup;
keepalive 32;
}
server {
listen 80;
server_name health.example.com;
location / {
proxy_pass http://backend_health;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_next_upstream error timeout http_500 http_502;
proxy_next_upstream_tries 3;
add_header X-Upstream-Server $upstream_addr;
}
}
Active Health Check Script
# Create health check script
cat > /usr/local/bin/health-check.sh << 'EOF'
#!/bin/bash
SERVERS=("192.168.1.10:3000" "192.168.1.11:3000")
healthy=0
for server in "${SERVERS[@]}"; do
if curl -f -s --max-time 5 "http://$server/health" > /dev/null; then
echo "✅ $server healthy"
((healthy++))
else
echo "❌ $server unhealthy"
fi
done
echo "Health: $healthy/${#SERVERS[@]} servers"
EOF
sudo chmod +x /usr/local/bin/health-check.sh
Failover Configuration
graph TD A[Primary] --> B{Healthy?} B -->|Yes| C[Route Traffic] B -->|No| D[Backup] style A fill:#e8f5e8 style D fill:#ffebee
upstream primary_pool {
server 192.168.1.10:3000 max_fails=2 fail_timeout=30s;
server 192.168.1.11:3000 max_fails=2 fail_timeout=30s;
}
upstream backup_pool {
server 192.168.1.20:3000;
}
server {
listen 80;
server_name failover.example.com;
location / {
proxy_pass http://primary_pool;
proxy_set_header Host $host;
error_page 502 503 504 = @backup;
}
location @backup {
proxy_pass http://backup_pool;
proxy_set_header Host $host;
add_header X-Failover-Tier "backup";
}
}
Monitoring Setup
# Add to crontab
echo "* * * * * /usr/local/bin/health-check.sh >> /var/log/nginx/health.log" | sudo crontab -
# Create monitoring script
cat > /usr/local/bin/nginx-status.sh << 'EOF'
#!/bin/bash
echo "=== NGINX Status ==="
systemctl is-active nginx
echo "Connections: $(netstat -an | grep :80 | wc -l)"
tail -3 /var/log/nginx/health.log 2>/dev/null || echo "No health logs"
EOF
sudo chmod +x /usr/local/bin/nginx-status.sh
Testing
# Enable sites
sudo ln -s /etc/nginx/sites-available/health.example.com /etc/nginx/sites-enabled/
sudo nginx -t && sudo systemctl reload nginx
# Test health check
/usr/local/bin/health-check.sh
# Test status
/usr/local/bin/nginx-status.sh
# Test failover
curl -H "Host: failover.example.com" http://localhost/
Next: Part 8 covers NGINX caching strategies and performance optimization.