The Complete NGINX on Ubuntu Series: Part 17 – Troubleshooting and Debugging

The Complete NGINX on Ubuntu Series: Part 17 – Troubleshooting and Debugging

Welcome to Part 17 of our comprehensive NGINX on Ubuntu series! We’ll master troubleshooting and debugging techniques to quickly identify, diagnose, and resolve common NGINX issues and performance problems.

Troubleshooting Fundamentals

Effective NGINX troubleshooting requires systematic approaches to identify root causes, analyze logs, monitor system resources, and apply targeted solutions for various issues.

graph TD
    A[NGINX Issues] --> B[Config Problems]
    A --> C[Performance Issues]
    A --> D[Connection Problems]
    A --> E[SSL/TLS Issues]
    
    B --> F[Syntax Errors
Invalid Directives
Missing Files] C --> G[High Load
Memory Issues
Slow Responses] D --> H[Connection Refused
Timeouts
Network Issues] E --> I[Certificate Problems
Handshake Failures
Protocol Issues] J[Debug Tools] --> K[Log Analysis] J --> L[Config Testing] J --> M[Performance Monitoring] J --> N[Network Diagnostics] style A fill:#ffebee style J fill:#e1f5fe style F fill:#fff3e0 style G fill:#e8f5e8 style H fill:#e3f2fd style I fill:#fff3e0

Comprehensive Debug Script

# Create comprehensive NGINX debug tool
sudo nano /usr/local/bin/nginx-debug.sh
#!/bin/bash

# NGINX Debug Tool
DEBUG_LOG="/tmp/nginx-debug-$(date +%Y%m%d-%H%M%S).log"

log_debug() {
    echo "[$(date '+%H:%M:%S')] $1" | tee -a "$DEBUG_LOG"
}

check_service_status() {
    log_debug "=== Service Status ==="
    
    if systemctl is-active --quiet nginx; then
        log_debug "✅ NGINX service running"
        local uptime=$(systemctl show nginx --property=ActiveEnterTimestamp --value)
        log_debug "Uptime: ${uptime##* }"
    else
        log_debug "❌ NGINX service not running"
        log_debug "Status: $(systemctl is-active nginx)"
    fi
    
    # Process info
    local master_pid=$(pgrep -f "nginx: master")
    if [ -n "$master_pid" ]; then
        local workers=$(pgrep -c "nginx: worker")
        log_debug "Master PID: $master_pid, Workers: $workers"
    else
        log_debug "❌ No NGINX processes found"
    fi
    log_debug ""
}

check_configuration() {
    log_debug "=== Configuration Check ==="
    
    if nginx -t > /tmp/nginx-test.out 2>&1; then
        log_debug "✅ Configuration syntax valid"
    else
        log_debug "❌ Configuration syntax errors:"
        cat /tmp/nginx-test.out | sed 's/^/   /' | tee -a "$DEBUG_LOG"
    fi
    
    # Config file info
    if [ -f /etc/nginx/nginx.conf ]; then
        local size=$(stat -c%s /etc/nginx/nginx.conf)
        local modified=$(stat -c%y /etc/nginx/nginx.conf | cut -d. -f1)
        log_debug "Main config: ${size} bytes, modified: $modified"
    else
        log_debug "❌ Main config missing"
    fi
    
    # Enabled sites
    local sites=$(ls -1 /etc/nginx/sites-enabled 2>/dev/null | wc -l)
    log_debug "Enabled sites: $sites"
    log_debug ""
}

check_network_bindings() {
    log_debug "=== Network Bindings ==="
    
    # Listening ports
    ss -tlnp | grep nginx | while read line; do
        log_debug "Listening: $line"
    done
    
    # Port conflicts
    for port in 80 443; do
        local conflicts=$(ss -tlnp | grep ":$port " | grep -v nginx | wc -l)
        if [ "$conflicts" -gt 0 ]; then
            log_debug "⚠️  Port $port conflicts detected"
            ss -tlnp | grep ":$port " | grep -v nginx | sed 's/^/   /' | tee -a "$DEBUG_LOG"
        else
            log_debug "✅ Port $port available"
        fi
    done
    
    # Connectivity test
    if curl -I -s --max-time 3 http://localhost >/dev/null 2>&1; then
        log_debug "✅ Local HTTP connectivity OK"
    else
        log_debug "❌ Local HTTP connectivity failed"
    fi
    log_debug ""
}

analyze_logs() {
    log_debug "=== Log Analysis ==="
    
    local error_log="/var/log/nginx/error.log"
    if [ -f "$error_log" ]; then
        local size=$(stat -c%s "$error_log")
        log_debug "Error log size: $((size / 1024))KB"
        
        # Recent errors
        local recent_errors=$(tail -10 "$error_log" | wc -l)
        if [ "$recent_errors" -gt 0 ]; then
            log_debug "Recent errors (last 10):"
            tail -10 "$error_log" | sed 's/^/   /' | tee -a "$DEBUG_LOG"
        else
            log_debug "✅ No recent errors"
        fi
    else
        log_debug "❌ Error log not found"
    fi
    log_debug ""
}

check_resources() {
    log_debug "=== System Resources ==="
    
    # Memory
    local mem=$(free -h | grep Mem | awk '{printf "%s/%s", $3, $2}')
    log_debug "Memory usage: $mem"
    
    # Load
    local load=$(uptime | awk -F'load average:' '{print $2}')
    log_debug "Load average:$load"
    
    # Disk space
    local disk=$(df -h / | tail -1 | awk '{print $5}')
    log_debug "Root disk usage: $disk"
    
    # Connections
    local conns=$(ss -tun | grep ':80\|:443' | wc -l)
    log_debug "Active connections: $conns"
    log_debug ""
}

check_ssl() {
    log_debug "=== SSL Certificates ==="
    
    local ssl_certs=$(grep -r "ssl_certificate " /etc/nginx/ 2>/dev/null | grep -v key | awk '{print $2}' | sed 's/;//' | sort -u)
    
    if [ -n "$ssl_certs" ]; then
        echo "$ssl_certs" | while read cert; do
            if [ -f "$cert" ]; then
                local expiry=$(openssl x509 -in "$cert" -noout -enddate 2>/dev/null | cut -d= -f2)
                if [ -n "$expiry" ]; then
                    local days=$(( ($(date -d "$expiry" +%s) - $(date +%s)) / 86400 ))
                    if [ "$days" -lt 30 ]; then
                        log_debug "⚠️  $cert expires in $days days"
                    else
                        log_debug "✅ $cert valid for $days days"
                    fi
                else
                    log_debug "❌ Cannot read $cert"
                fi
            else
                log_debug "❌ Missing certificate: $cert"
            fi
        done
    else
        log_debug "No SSL certificates found"
    fi
    log_debug ""
}

# Main execution
case "${1:-full}" in
    status)
        check_service_status
        ;;
    config)
        check_configuration
        ;;
    network)
        check_network_bindings
        ;;
    logs)
        analyze_logs
        ;;
    resources)
        check_resources
        ;;
    ssl)
        check_ssl
        ;;
    full)
        log_debug "Starting NGINX debug analysis..."
        check_service_status
        check_configuration
        check_network_bindings
        analyze_logs
        check_resources
        check_ssl
        log_debug "Debug complete! Log: $DEBUG_LOG"
        ;;
    *)
        echo "Usage: $0 {status|config|network|logs|resources|ssl|full}"
        ;;
esac

# Make executable: sudo chmod +x /usr/local/bin/nginx-debug.sh

Common Issue Fixes

graph TD
    A[Common Issues] --> B[Config Errors]
    A --> C[Service Problems]
    A --> D[Performance Issues]
    A --> E[SSL Problems]
    
    B --> F[Check syntax
Fix directives
Verify paths] C --> G[Start service
Check ports
Fix permissions] D --> H[Tune workers
Optimize cache
Monitor resources] E --> I[Renew certs
Fix chains
Update protocols] J[Resolution Process] --> K[Identify] J --> L[Diagnose] J --> M[Fix] J --> N[Verify] style A fill:#ffebee style J fill:#e1f5fe style F fill:#e8f5e8 style G fill:#fff3e0 style H fill:#e3f2fd style I fill:#e8f5e8
# Create issue resolution script
sudo nano /usr/local/bin/nginx-fix.sh
#!/bin/bash

# NGINX Issue Resolution Tool

fix_common_issues() {
    echo "=== Common Issue Fixes ==="
    
    # Fix 1: Configuration syntax
    echo "--- Configuration Syntax ---"
    if ! nginx -t >/dev/null 2>&1; then
        echo "❌ Syntax errors found:"
        nginx -t
        echo "Common fixes:"
        echo "• Check for missing semicolons"
        echo "• Verify closing braces {}"
        echo "• Validate file paths"
        echo "• Check directive spelling"
    else
        echo "✅ Configuration syntax OK"
    fi
    echo
    
    # Fix 2: Service not running
    echo "--- Service Status ---"
    if ! systemctl is-active --quiet nginx; then
        echo "❌ Service not running, attempting start..."
        if systemctl start nginx; then
            echo "✅ Service started successfully"
        else
            echo "❌ Start failed, check: systemctl status nginx"
        fi
    else
        echo "✅ Service running"
    fi
    echo
    
    # Fix 3: Port conflicts
    echo "--- Port Conflicts ---"
    for port in 80 443; do
        if ss -tlnp | grep ":$port " | grep -q nginx; then
            echo "✅ Port $port bound to NGINX"
        else
            local other=$(ss -tlnp | grep ":$port " | head -1)
            if [ -n "$other" ]; then
                echo "⚠️  Port $port used by: $other"
                echo "   Stop conflicting service or change NGINX port"
            else
                echo "❌ Port $port not bound (check listen directives)"
            fi
        fi
    done
    echo
    
    # Fix 4: Permissions
    echo "--- File Permissions ---"
    if [ ! -r /etc/nginx/nginx.conf ]; then
        echo "❌ Cannot read main config"
        echo "   Fix: sudo chmod 644 /etc/nginx/nginx.conf"
    else
        echo "✅ Main config readable"
    fi
    
    if [ ! -w /var/log/nginx ]; then
        echo "❌ Cannot write to log directory"
        echo "   Fix: sudo chown -R www-data:adm /var/log/nginx"
    else
        echo "✅ Log directory writable"
    fi
}

fix_performance() {
    echo "=== Performance Fixes ==="
    
    # System load
    local load=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | sed 's/,//')
    if (( $(echo "$load > 2.0" | bc -l) )); then
        echo "⚠️  High load: $load"
        echo "Recommendations:"
        echo "• Increase worker_processes"
        echo "• Optimize worker_connections"
        echo "• Review application performance"
    else
        echo "✅ Load normal: $load"
    fi
    
    # Memory usage
    local mem_pct=$(free | grep Mem | awk '{printf "%.0f", ($3/$2)*100}')
    if [ "$mem_pct" -gt 80 ]; then
        echo "⚠️  High memory: $mem_pct%"
        echo "Recommendations:"
        echo "• Review buffer sizes"
        echo "• Check cache zones"
        echo "• Monitor worker memory"
    else
        echo "✅ Memory OK: $mem_pct%"
    fi
    
    # Connections
    local conns=$(ss -tun | grep ':80\|:443' | wc -l)
    echo "Active connections: $conns"
    
    # Quick performance tips
    echo
    echo "Performance Optimization Tips:"
    echo "• Enable gzip compression"
    echo "• Use proxy caching"
    echo "• Optimize worker settings"
    echo "• Enable file caching"
}

fix_ssl() {
    echo "=== SSL Issue Fixes ==="
    
    local ssl_configs=$(grep -r "ssl_certificate " /etc/nginx/ 2>/dev/null | grep -v key)
    
    if [ -n "$ssl_configs" ]; then
        echo "$ssl_configs" | while read config; do
            local cert=$(echo "$config" | awk '{print $2}' | sed 's/;//')
            local file=$(echo "$config" | cut -d: -f1)
            
            echo "Certificate: $cert"
            if [ ! -f "$cert" ]; then
                echo "❌ File missing"
                echo "   Action: Install certificate"
            else
                local expiry=$(openssl x509 -in "$cert" -noout -enddate 2>/dev/null | cut -d= -f2)
                if [ -n "$expiry" ]; then
                    local days=$(( ($(date -d "$expiry" +%s) - $(date +%s)) / 86400 ))
                    if [ "$days" -lt 0 ]; then
                        echo "❌ Expired $((days * -1)) days ago"
                        echo "   Action: Renew immediately"
                    elif [ "$days" -lt 30 ]; then
                        echo "⚠️  Expires in $days days"
                        echo "   Action: Plan renewal"
                    else
                        echo "✅ Valid for $days days"
                    fi
                else
                    echo "❌ Cannot read certificate"
                fi
            fi
            echo
        done
    else
        echo "No SSL certificates configured"
    fi
}

# Main execution
case "${1:-common}" in
    common)
        fix_common_issues
        ;;
    performance)
        fix_performance
        ;;
    ssl)
        fix_ssl
        ;;
    all)
        fix_common_issues
        echo
        fix_performance
        echo
        fix_ssl
        ;;
    *)
        echo "Usage: $0 {common|performance|ssl|all}"
        echo "  common      - Fix common issues"
        echo "  performance - Address performance problems"
        echo "  ssl         - Check SSL certificates"
        echo "  all         - Run all fixes"
        ;;
esac

# Make executable: sudo chmod +x /usr/local/bin/nginx-fix.sh

Log Analysis Tool

# Create log analyzer
sudo nano /usr/local/bin/nginx-logs.sh
#!/bin/bash

# NGINX Log Analyzer
ACCESS_LOG="/var/log/nginx/access.log"
ERROR_LOG="/var/log/nginx/error.log"

analyze_access() {
    echo "=== Access Log Analysis ==="
    
    if [ ! -f "$ACCESS_LOG" ]; then
        echo "❌ Access log not found: $ACCESS_LOG"
        return
    fi
    
    local size=$(stat -c%s "$ACCESS_LOG")
    local total=$(wc -l < "$ACCESS_LOG")
    echo "Log size: $((size / 1024 / 1024))MB, Total requests: $total"
    echo
    
    # Recent activity
    local hour_ago=$(date -d '1 hour ago' '+%d/%b/%Y:%H')
    local current_hour=$(date '+%d/%b/%Y:%H')
    local hourly=$(grep -E "$hour_ago|$current_hour" "$ACCESS_LOG" | wc -l)
    echo "Requests in last hour: $hourly"
    echo
    
    # Status codes
    echo "--- Status Codes (Last 1000) ---"
    tail -1000 "$ACCESS_LOG" | awk '{print $9}' | sort | uniq -c | sort -nr | head -5
    echo
    
    # Top IPs
    echo "--- Top IP Addresses (Last 1000) ---"
    tail -1000 "$ACCESS_LOG" | awk '{print $1}' | sort | uniq -c | sort -nr | head -5
    echo
    
    # Top URLs
    echo "--- Top URLs (Last 1000) ---"
    tail -1000 "$ACCESS_LOG" | awk '{print $7}' | sort | uniq -c | sort -nr | head -5
    echo
    
    # Response times (if logged)
    if tail -100 "$ACCESS_LOG" | grep -q 'rt='; then
        echo "--- Response Times (Last 100) ---"
        tail -100 "$ACCESS_LOG" | grep 'rt=' | awk -F'rt=' '{print $2}' | awk '{print $1}' | awk '
        {
            sum += $1; count++;
            if ($1 > max) max = $1;
            if (min == 0 || $1 < min) min = $1;
        }
        END {
            printf "Average: %.3fs, Min: %.3fs, Max: %.3fs\n", sum/count, min, max;
        }'
        echo
    fi
}

analyze_errors() {
    echo "=== Error Log Analysis ==="
    
    if [ ! -f "$ERROR_LOG" ]; then
        echo "❌ Error log not found: $ERROR_LOG"
        return
    fi
    
    local size=$(stat -c%s "$ERROR_LOG")
    echo "Error log size: $((size / 1024))KB"
    echo
    
    if [ -s "$ERROR_LOG" ]; then
        echo "--- Recent Errors (Last 10) ---"
        tail -10 "$ERROR_LOG"
        echo
        
        echo "--- Error Level Summary (Last 100) ---"
        tail -100 "$ERROR_LOG" | awk '{print $4}' | sort | uniq -c | sort -nr
        echo
        
        echo "--- Common Error Patterns ---"
        tail -100 "$ERROR_LOG" | grep -o 'connect() failed\|No such file\|permission denied\|upstream timed out' | sort | uniq -c | sort -nr
    else
        echo "✅ No errors in log file"
    fi
}

monitor_realtime() {
    echo "=== Real-time Log Monitoring ==="
    echo "Monitoring logs... Press Ctrl+C to stop"
    echo
    
    # Monitor both access and error logs
    (
        if [ -f "$ACCESS_LOG" ]; then
            tail -f "$ACCESS_LOG" | while read line; do
                echo "[ACCESS] $line"
            done &
        fi
        
        if [ -f "$ERROR_LOG" ]; then
            tail -f "$ERROR_LOG" | while read line; do
                echo "[ERROR] $line"
            done &
        fi
        
        wait
    )
}

case "${1:-access}" in
    access)
        analyze_access
        ;;
    error)
        analyze_errors
        ;;
    both)
        analyze_access
        echo
        analyze_errors
        ;;
    monitor)
        monitor_realtime
        ;;
    *)
        echo "Usage: $0 {access|error|both|monitor}"
        echo "  access  - Analyze access logs"
        echo "  error   - Analyze error logs"
        echo "  both    - Analyze both logs"
        echo "  monitor - Real-time monitoring"
        ;;
esac

# Make executable: sudo chmod +x /usr/local/bin/nginx-logs.sh

Testing and Usage

# Deploy and test troubleshooting tools

# 1. Make scripts executable
sudo chmod +x /usr/local/bin/nginx-debug.sh
sudo chmod +x /usr/local/bin/nginx-fix.sh
sudo chmod +x /usr/local/bin/nginx-logs.sh

# 2. Run comprehensive debug
/usr/local/bin/nginx-debug.sh full

# 3. Check for common issues
/usr/local/bin/nginx-fix.sh all

# 4. Analyze logs
/usr/local/bin/nginx-logs.sh both

# 5. Monitor real-time (in separate terminal)
/usr/local/bin/nginx-logs.sh monitor

# 6. Quick status check
/usr/local/bin/nginx-debug.sh status

# 7. SSL certificate check
/usr/local/bin/nginx-fix.sh ssl

# 8. Performance analysis
/usr/local/bin/nginx-fix.sh performance

What's Next?

Excellent! You've built comprehensive troubleshooting tools that help quickly diagnose and resolve NGINX issues. These debugging utilities provide systematic analysis and practical solutions for common problems.

Coming up in Part 18: NGINX Automation and DevOps Integration

References


This is Part 17 of our 22-part NGINX series. You now have powerful debugging tools! Next, we'll automate NGINX with DevOps practices. Questions? Share them in the comments!

Written by:

373 Posts

View All Posts
Follow Me :
How to whitelist website on AdBlocker?

How to whitelist website on AdBlocker?

  1. 1 Click on the AdBlock Plus icon on the top right corner of your browser
  2. 2 Click on "Enabled on this site" from the AdBlock Plus option
  3. 3 Refresh the page and start browsing the site