Real-Time WebSocket Architecture Series: Part 6 – Production-Grade Features

Real-Time WebSocket Architecture Series: Part 6 – Production-Grade Features

This entry is part 6 of 8 in the series Real-Time WebSocket Architecture Series

Welcome to Part 6! We’ve scaled our application with Redis. Now let’s add production-grade features that separate hobby projects from enterprise applications: automatic reconnection, heartbeat monitoring, message acknowledgments, and graceful shutdown.

Automatic Reconnection Strategy

Socket.IO includes automatic reconnection with exponential backoff. Here’s how to configure it:

// client.js
const socket = io('http://localhost:3000', {
  reconnection: true,
  reconnectionAttempts: 10,
  reconnectionDelay: 1000,
  reconnectionDelayMax: 10000,
  randomizationFactor: 0.5,
  timeout: 20000
});

let reconnectCount = 0;

socket.on('connect', () => {
  console.log('Connected to server');
  reconnectCount = 0;
  updateUI('connected');
});

socket.on('disconnect', (reason) => {
  console.log('Disconnected:', reason);
  updateUI('disconnected');
  
  if (reason === 'io server disconnect') {
    socket.connect();
  }
});

socket.on('reconnect_attempt', (attempt) => {
  console.log('Reconnection attempt:', attempt);
  reconnectCount = attempt;
});

socket.on('reconnect', (attempt) => {
  console.log('Reconnected after', attempt, 'attempts');
  restoreState();
});

socket.on('reconnect_failed', () => {
  console.log('Reconnection failed');
  showManualReconnectButton();
});

Heartbeat Mechanism

Implement custom heartbeat to detect dead connections:

// server.js
const io = new Server(server, {
  pingTimeout: 60000,
  pingInterval: 25000
});

io.on('connection', (socket) => {
  let isAlive = true;
  
  socket.on('pong', () => {
    isAlive = true;
  });
  
  const heartbeat = setInterval(() => {
    if (!isAlive) {
      console.log('Dead connection detected');
      socket.disconnect(true);
      return;
    }
    
    isAlive = false;
    socket.emit('ping');
  }, 30000);
  
  socket.on('disconnect', () => {
    clearInterval(heartbeat);
  });
});
// client.js
socket.on('ping', () => {
  socket.emit('pong');
});

Message Acknowledgments

Ensure message delivery with acknowledgments:

// server.js
socket.on('send-message', (data, callback) => {
  try {
    const messageId = saveMessage(data);
    
    io.to(data.room).emit('new-message', {
      id: messageId,
      text: data.text,
      user: socket.user.username,
      timestamp: new Date().toISOString()
    });
    
    callback({
      success: true,
      messageId: messageId
    });
  } catch (error) {
    callback({
      success: false,
      error: error.message
    });
  }
});

// client.js
function sendMessage(text) {
  socket.emit('send-message', { text, room }, (response) => {
    if (response.success) {
      console.log('Message delivered:', response.messageId);
      markAsSent(response.messageId);
    } else {
      console.error('Failed:', response.error);
      retryMessage(text);
    }
  });
}

function retryMessage(text, attempts = 0) {
  if (attempts >= 3) {
    showError('Failed to send message');
    return;
  }
  
  setTimeout(() => {
    sendMessage(text);
  }, Math.pow(2, attempts) * 1000);
}

State Recovery

class StateManager {
  constructor() {
    this.currentRoom = null;
    this.lastMessageId = null;
  }
  
  save() {
    localStorage.setItem('appState', JSON.stringify({
      room: this.currentRoom,
      lastMessageId: this.lastMessageId
    }));
  }
  
  restore() {
    const saved = localStorage.getItem('appState');
    if (saved) {
      const state = JSON.parse(saved);
      return state;
    }
    return null;
  }
}

const stateManager = new StateManager();

socket.on('reconnect', () => {
  const state = stateManager.restore();
  
  if (state && state.room) {
    socket.emit('rejoin-room', {
      room: state.room,
      lastMessageId: state.lastMessageId
    });
  }
});

Graceful Shutdown

// server.js
async function gracefulShutdown(signal) {
  console.log(`${signal} received. Starting graceful shutdown...`);
  
  // Stop accepting new connections
  server.close(() => {
    console.log('HTTP server closed');
  });
  
  // Notify all clients
  io.emit('server-shutdown', {
    message: 'Server restarting',
    reconnect: true
  });
  
  // Wait for clients to disconnect
  await new Promise(resolve => setTimeout(resolve, 5000));
  
  // Force close remaining
  const sockets = await io.fetchSockets();
  sockets.forEach(socket => socket.disconnect(true));
  
  console.log('All connections closed');
  process.exit(0);
}

process.on('SIGTERM', () => gracefulShutdown('SIGTERM'));
process.on('SIGINT', () => gracefulShutdown('SIGINT'));
// client.js
socket.on('server-shutdown', (data) => {
  console.log(data.message);
  stateManager.save();
  showReconnectingUI();
});

Connection Quality Monitoring

class ConnectionMonitor {
  constructor(socket) {
    this.socket = socket;
    this.latencies = [];
  }
  
  measureLatency() {
    const start = Date.now();
    
    this.socket.emit('ping', () => {
      const latency = Date.now() - start;
      this.latencies.push(latency);
      
      if (this.latencies.length > 10) {
        this.latencies.shift();
      }
      
      this.updateQuality();
    });
  }
  
  getAverageLatency() {
    const sum = this.latencies.reduce((a, b) => a + b, 0);
    return Math.round(sum / this.latencies.length);
  }
  
  getQuality() {
    const avg = this.getAverageLatency();
    if (avg < 100) return 'excellent';
    if (avg < 300) return 'good';
    if (avg < 500) return 'fair';
    return 'poor';
  }
  
  updateQuality() {
    const quality = this.getQuality();
    const latency = this.getAverageLatency();
    console.log(`Connection: ${quality} (${latency}ms)`);
  }
  
  start() {
    setInterval(() => {
      if (this.socket.connected) {
        this.measureLatency();
      }
    }, 5000);
  }
}

const monitor = new ConnectionMonitor(socket);
monitor.start();

Error Handling

socket.on('connect_error', (error) => {
  console.error('Connection error:', error.message);
  
  if (error.message.includes('token')) {
    redirectToLogin();
  } else if (error.message.includes('poll')) {
    showNetworkError();
  }
});

socket.on('error', (error) => {
  console.error('Socket error:', error);
  logToMonitoring(error);
});

Best Practices

  1. Always use acknowledgments for critical messages
  2. Implement exponential backoff with jitter
  3. Monitor connection quality continuously
  4. Save state before disconnection
  5. Handle graceful shutdowns properly
  6. Log all errors to monitoring service
  7. Test reconnection scenarios thoroughly
  8. Implement manual reconnect option

What’s Next

In Part 7: Performance Optimization, we’ll optimize memory usage, reduce latency, and implement caching strategies!


Part 6 of the 8-part Real-Time WebSocket Architecture Series.

Navigate<< Real-Time WebSocket Architecture Series: Part 5 – Scaling with RedisReal-Time WebSocket Architecture Series: Part 7 – Serverless WebSocket Implementation with AWS Lambda >>

Written by:

373 Posts

View All Posts
Follow Me :
How to whitelist website on AdBlocker?

How to whitelist website on AdBlocker?

  1. 1 Click on the AdBlock Plus icon on the top right corner of your browser
  2. 2 Click on "Enabled on this site" from the AdBlock Plus option
  3. 3 Refresh the page and start browsing the site