Production applications require sophisticated monitoring beyond basic process management. This comprehensive guide explores PM2’s advanced monitoring capabilities, integration with enterprise monitoring systems, centralized logging solutions, and custom alerting mechanisms that keep your applications running smoothly 24/7.
PM2 Plus: Enterprise Monitoring Platform
PM2 Plus (formerly Keymetrics) provides advanced monitoring, alerting, and management capabilities for production PM2 deployments.
graph TD A[PM2 Applications] --> B[PM2 Agent] B --> C[PM2 Plus Dashboard] C --> D[Real-time Metrics] C --> E[Error Tracking] C --> F[Performance Monitoring] C --> G[Custom Alerts] D --> H[CPU Usage] D --> I[Memory Usage] D --> J[Response Time] E --> K[Exception Tracking] E --> L[Error Rate Monitoring] F --> M[Transaction Tracing] F --> N[Database Queries] G --> O[Email Notifications] G --> P[Slack Integration] G --> Q[Webhook Alerts]
Setting Up PM2 Plus
# Install PM2 Plus
npm install -g @pm2/pm2-plus-connect
# Link your server to PM2 Plus
pm2 plus
# Follow the instructions to create account and link server
# Configure ecosystem with PM2 Plus
module.exports = {
apps: [{
name: 'monitored-app',
script: 'app.js',
instances: 'max',
exec_mode: 'cluster',
// PM2 Plus configuration
pmx: true,
automation: false,
env_production: {
NODE_ENV: 'production',
PMX_MACHINE_NAME: 'production-server-01',
PMX_MODULE_NAME: 'myapp-production'
}
}]
};
Prometheus and Grafana Integration
Integrate PM2 with Prometheus for metrics collection and Grafana for visualization to create comprehensive monitoring dashboards.
Installing Prometheus PM2 Exporter
# Install PM2 Prometheus exporter
npm install -g pm2-prometheus-exporter
# Start the exporter
pm2 install pm2-prometheus-exporter
# Configure the exporter
pm2 set pm2-prometheus-exporter:port 9209
pm2 set pm2-prometheus-exporter:prefix pm2_
# Restart PM2 to apply changes
pm2 restart all
Prometheus Configuration
# /etc/prometheus/prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- "pm2_rules.yml"
scrape_configs:
- job_name: 'pm2'
static_configs:
- targets: ['localhost:9209']
scrape_interval: 10s
metrics_path: '/metrics'
- job_name: 'node-exporter'
static_configs:
- targets: ['localhost:9100']
scrape_interval: 10s
alerting:
alertmanagers:
- static_configs:
- targets:
- alertmanager:9093
graph LR A[PM2 Applications] --> B[PM2 Prometheus Exporter] B --> C[Prometheus Server] C --> D[Grafana Dashboard] E[Node Exporter] --> C F[Application Metrics] --> C C --> G[AlertManager] G --> H[Email Alerts] G --> I[Slack Notifications] G --> J[PagerDuty] D --> K[CPU/Memory Graphs] D --> L[Request Rate Charts] D --> M[Error Rate Monitoring] D --> N[Custom Dashboards]
Centralized Logging with ELK Stack
Implement centralized logging using Elasticsearch, Logstash, and Kibana (ELK) for comprehensive log analysis and monitoring.
PM2 Log Configuration for ELK
// ecosystem.config.js - ELK optimized logging
module.exports = {
apps: [{
name: 'elk-logged-app',
script: 'app.js',
instances: 'max',
exec_mode: 'cluster',
// Structured logging configuration
log_file: '/var/log/myapp/combined.log',
out_file: '/var/log/myapp/out.log',
error_file: '/var/log/myapp/error.log',
log_date_format: 'YYYY-MM-DD HH:mm:ss.SSS Z',
merge_logs: true,
env_production: {
NODE_ENV: 'production',
LOG_LEVEL: 'info',
LOG_FORMAT: 'json', // JSON format for better parsing
APP_NAME: 'myapp',
SERVER_NAME: 'prod-server-01'
}
}]
};
Filebeat Configuration
# /etc/filebeat/filebeat.yml
filebeat.inputs:
- type: log
enabled: true
paths:
- /var/log/myapp/*.log
fields:
app_name: myapp
environment: production
server: prod-server-01
fields_under_root: true
multiline.pattern: '^\d{4}-\d{2}-\d{2}'
multiline.negate: true
multiline.match: after
processors:
- add_host_metadata:
when.not.contains.tags: forwarded
output.logstash:
hosts: ["logstash:5044"]
logging.level: info
logging.to_files: true
logging.files:
path: /var/log/filebeat
name: filebeat
keepfiles: 7
permissions: 0644
Custom Alerting Systems
Build sophisticated alerting mechanisms that notify you of issues before they impact users.
PM2 Custom Alert Module
// pm2-alerts.js - Custom alerting module
const pm2 = require('pm2');
const axios = require('axios');
const nodemailer = require('nodemailer');
class PM2AlertManager {
constructor(config) {
this.config = config;
this.alertHistory = new Map();
this.emailTransporter = this.setupEmailTransporter();
}
setupEmailTransporter() {
return nodemailer.createTransporter({
service: 'gmail',
auth: {
user: this.config.email.user,
pass: this.config.email.password
}
});
}
async sendSlackAlert(message, severity = 'info') {
const colors = {
info: '#36a64f',
warning: '#ff9900',
error: '#ff0000',
critical: '#8B0000'
};
const payload = {
attachments: [{
color: colors[severity],
title: `PM2 Alert - ${severity.toUpperCase()}`,
text: message,
footer: 'PM2 Monitoring',
ts: Math.floor(Date.now() / 1000)
}]
};
try {
await axios.post(this.config.slack.webhook, payload);
} catch (error) {
console.error('Failed to send Slack alert:', error.message);
}
}
async sendEmailAlert(subject, message) {
const mailOptions = {
from: this.config.email.from,
to: this.config.email.to,
subject: `PM2 Alert: ${subject}`,
html: `
PM2 Monitoring Alert
Time: ${new Date().toISOString()}
Server: ${require('os').hostname()}
${message}
`
};
try {
await this.emailTransporter.sendMail(mailOptions);
} catch (error) {
console.error('Failed to send email alert:', error.message);
}
}
shouldSendAlert(appName, alertType) {
const key = `${appName}-${alertType}`;
const now = Date.now();
const lastAlert = this.alertHistory.get(key);
// Prevent spam - don't send same alert within 5 minutes
if (lastAlert && (now - lastAlert) < 300000) {
return false;
}
this.alertHistory.set(key, now);
return true;
}
async monitorApplications() {
return new Promise((resolve, reject) => {
pm2.connect((err) => {
if (err) {
reject(err);
return;
}
pm2.list((err, processes) => {
if (err) {
pm2.disconnect();
reject(err);
return;
}
processes.forEach(async (proc) => {
await this.checkProcessHealth(proc);
});
pm2.disconnect();
resolve();
});
});
});
}
async checkProcessHealth(proc) {
const app = proc.pm2_env;
const appName = app.name;
// Check if process is stopped
if (app.status === 'stopped' && this.shouldSendAlert(appName, 'stopped')) {
await this.sendSlackAlert(
`Application "${appName}" is stopped on ${require('os').hostname()}`,
'error'
);
await this.sendEmailAlert(
'Application Stopped',
`Application "${appName}" has stopped running.`
);
}
// Check for high restart count
if (app.restart_time > 5 && this.shouldSendAlert(appName, 'restarts')) {
await this.sendSlackAlert(
`Application "${appName}" has restarted ${app.restart_time} times`,
'warning'
);
}
// Check memory usage
const memoryMB = proc.memory / 1024 / 1024;
if (memoryMB > 1000 && this.shouldSendAlert(appName, 'memory')) {
await this.sendSlackAlert(
`High memory usage: "${appName}" using ${memoryMB.toFixed(2)}MB`,
'warning'
);
}
// Check CPU usage
if (proc.cpu > 80 && this.shouldSendAlert(appName, 'cpu')) {
await this.sendSlackAlert(
`High CPU usage: "${appName}" using ${proc.cpu}%`,
'warning'
);
}
}
}
// Configuration
const alertConfig = {
slack: {
webhook: process.env.SLACK_WEBHOOK_URL
},
email: {
user: process.env.EMAIL_USER,
password: process.env.EMAIL_PASSWORD,
from: process.env.EMAIL_FROM,
to: process.env.EMAIL_TO
}
};
// Initialize and start monitoring
const alertManager = new PM2AlertManager(alertConfig);
// Monitor every 2 minutes
setInterval(async () => {
try {
await alertManager.monitorApplications();
} catch (error) {
console.error('Monitoring error:', error);
}
}, 120000);
console.log('PM2 Alert Manager started');
sequenceDiagram participant M as Monitor participant PM2 as PM2 Daemon participant A as Alert Manager participant S as Slack participant E as Email participant P as PagerDuty loop Every 2 minutes M->>PM2: Check Process Status PM2-->>M: Process List & Metrics alt Process Issues Detected M->>A: Trigger Alert A->>A: Check Alert History alt Alert Not Recently Sent A->>S: Send Slack Notification A->>E: Send Email Alert alt Critical Issue A->>P: Trigger PagerDuty end end end end
Performance Metrics and Dashboard Creation
Custom Metrics Collection
// metrics-collector.js - Custom application metrics
const express = require('express');
const promClient = require('prom-client');
const app = express();
// Create a Registry
const register = new promClient.Registry();
// Add default metrics
promClient.collectDefaultMetrics({
app: 'myapp',
timeout: 10000,
gcDurationBuckets: [0.001, 0.01, 0.1, 1, 2, 5],
register: register,
});
// Custom metrics
const httpRequestDuration = new promClient.Histogram({
name: 'http_request_duration_seconds',
help: 'Duration of HTTP requests in seconds',
labelNames: ['method', 'route', 'status_code'],
buckets: [0.1, 0.3, 0.5, 0.7, 1, 3, 5, 7, 10]
});
const httpRequestTotal = new promClient.Counter({
name: 'http_requests_total',
help: 'Total number of HTTP requests',
labelNames: ['method', 'route', 'status_code']
});
const activeConnections = new promClient.Gauge({
name: 'active_connections',
help: 'Number of active connections'
});
const databaseQueryDuration = new promClient.Histogram({
name: 'database_query_duration_seconds',
help: 'Duration of database queries',
labelNames: ['query_type', 'table'],
buckets: [0.01, 0.05, 0.1, 0.5, 1, 5]
});
// Register metrics
register.registerMetric(httpRequestDuration);
register.registerMetric(httpRequestTotal);
register.registerMetric(activeConnections);
register.registerMetric(databaseQueryDuration);
// Middleware to collect HTTP metrics
app.use((req, res, next) => {
const start = Date.now();
res.on('finish', () => {
const duration = (Date.now() - start) / 1000;
const labels = {
method: req.method,
route: req.route ? req.route.path : req.path,
status_code: res.statusCode
};
httpRequestDuration.observe(labels, duration);
httpRequestTotal.inc(labels);
});
next();
});
// Metrics endpoint
app.get('/metrics', async (req, res) => {
try {
res.set('Content-Type', register.contentType);
res.end(await register.metrics());
} catch (ex) {
res.status(500).end(ex);
}
});
// Health endpoint with detailed metrics
app.get('/health', (req, res) => {
const memUsage = process.memoryUsage();
const uptime = process.uptime();
res.json({
status: 'healthy',
timestamp: new Date().toISOString(),
uptime: uptime,
memory: {
rss: Math.round(memUsage.rss / 1024 / 1024),
heapTotal: Math.round(memUsage.heapTotal / 1024 / 1024),
heapUsed: Math.round(memUsage.heapUsed / 1024 / 1024),
external: Math.round(memUsage.external / 1024 / 1024)
},
processId: process.pid,
nodeVersion: process.version,
activeHandles: process._getActiveHandles().length,
activeRequests: process._getActiveRequests().length
});
});
// Update active connections
setInterval(() => {
activeConnections.set(Math.floor(Math.random() * 100)); // Replace with actual logic
}, 5000);
module.exports = { app, register };
What’s Next?
You now have comprehensive knowledge of advanced PM2 monitoring, logging, and alerting systems. You can integrate with enterprise monitoring platforms, set up centralized logging, and create custom alerting mechanisms. In the final part of this series, we’ll explore deployment automation and CI/CD integration:
- Automated deployment strategies and pipelines
- Zero-downtime deployment techniques
- Integration with GitHub Actions and GitLab CI
- Blue-green and canary deployment patterns
- Rollback mechanisms and disaster recovery
Series Navigation:
← Part 4: Production Systemd Integration
→ Part 5: Advanced Monitoring and Alerting (You are here)
→ Part 6: Deployment Automation and CI/CD (Coming next)