advanced•50 min

Monitoring and Alerting

Name: XRPL APIs & Integration
Price: 29 USD
Availability: InStock

Learning Objectives

Identify critical metrics for XRPL integration health

Implement logging and metrics collection

Configure alerts for anomalies and failures

Build dashboards for operational visibility

Respond to incidents with appropriate runbooks

class ConnectionMonitor {
  constructor(metricsClient) {
    this.metrics = metricsClient
    this.connectionState = 'disconnected'
    this.lastConnectedTime = null
    this.disconnectCount = 0
  }

recordConnect(server) {
this.connectionState = 'connected'
this.lastConnectedTime = Date.now()

this.metrics.gauge('xrpl.connection.state', 1, { server })
this.metrics.increment('xrpl.connection.connects', { server })
}

recordDisconnect(server, code, reason) {
this.connectionState = 'disconnected'
this.disconnectCount++

const connectedDuration = this.lastConnectedTime
? Date.now() - this.lastConnectedTime
: 0

this.metrics.gauge('xrpl.connection.state', 0, { server })
this.metrics.increment('xrpl.connection.disconnects', { server, code })
this.metrics.timing('xrpl.connection.duration', connectedDuration, { server })

console.log(Disconnected from ${server}: code=${code}, reason=${reason})
}

getHealth() {
return {
connected: this.connectionState === 'connected',
disconnectCount: this.disconnectCount,
lastConnected: this.lastConnectedTime
}
}
}
```

class RequestMonitor {
  constructor(metricsClient) {
    this.metrics = metricsClient
  }

recordRequest(command, latencyMs, success, errorType = null) {
// Latency histogram
this.metrics.timing('xrpl.request.latency', latencyMs, { command })

// Success/failure counter
if (success) {
this.metrics.increment('xrpl.request.success', { command })
} else {
this.metrics.increment('xrpl.request.error', { command, error: errorType })
}

// Alert on high latency
if (latencyMs > 5000) {
console.warn(High latency request: ${command} took ${latencyMs}ms)
}
}

wrapClient(client) {
const originalRequest = client.request.bind(client)
const monitor = this

client.request = async function(request) {
const start = Date.now()
let success = true
let errorType = null

try {
return await originalRequest(request)
} catch (error) {
success = false
errorType = error.data?.error || error.name || 'unknown'
throw error
} finally {
monitor.recordRequest(request.command, Date.now() - start, success, errorType)
}
}

return client
}
}
```

class TransactionMonitor {
  constructor(metricsClient) {
    this.metrics = metricsClient
  }

recordSubmission(txType, success, result, latencyMs) {
this.metrics.timing('xrpl.transaction.latency', latencyMs, { type: txType })

if (success) {
this.metrics.increment('xrpl.transaction.success', { type: txType })
} else {
this.metrics.increment('xrpl.transaction.failure', { type: txType, result })
}
}

recordPayment(amount, currency, success) {
if (success) {
this.metrics.increment('xrpl.payment.count', { currency })

if (currency === 'XRP') {
this.metrics.gauge('xrpl.payment.amount', amount, { currency })
this.metrics.increment('xrpl.payment.volume', amount)
}
}
}

recordWalletBalance(address, balanceXRP) {
this.metrics.gauge('xrpl.wallet.balance', balanceXRP, { address })

// Alert on low balance
if (balanceXRP < 100) {
console.warn(Low wallet balance: ${address} has ${balanceXRP} XRP)
}
}
}
```

const winston = require('winston')

const logger = winston.createLogger({
level: 'info',
format: winston.format.combine(
winston.format.timestamp(),
winston.format.json()
),
defaultMeta: { service: 'xrpl-integration' },
transports: [
new winston.transports.File({ filename: 'error.log', level: 'error' }),
new winston.transports.File({ filename: 'combined.log' }),
],
})

// Structured logging examples
function logPaymentSent(payment, result) {
logger.info('Payment sent', {
event: 'payment_sent',
hash: result.hash,
destination: payment.destination,
amount: payment.amount,
currency: payment.currency || 'XRP',
result: result.result,
latencyMs: result.latencyMs
})
}

function logPaymentReceived(tx) {
logger.info('Payment received', {
event: 'payment_received',
hash: tx.hash,
from: tx.Account,
amount: tx.deliveredAmount,
destinationTag: tx.DestinationTag,
ledger: tx.ledger_index
})
}

function logError(error, context) {
logger.error('XRPL error', {
event: 'xrpl_error',
error: error.message,
code: error.data?.error || error.code,
stack: error.stack,
...context
})
}
```

class AuditLogger {
  constructor(storage) {
    this.storage = storage  // Database, S3, etc.
  }

async logTransaction(tx, metadata) {
const auditRecord = {
timestamp: new Date().toISOString(),
type: 'transaction',
hash: tx.hash,
account: tx.Account,
transactionType: tx.TransactionType,
amount: tx.Amount,
destination: tx.Destination,
result: tx.meta?.TransactionResult,
metadata: {
userId: metadata.userId,
requestId: metadata.requestId,
ipAddress: metadata.ipAddress
}
}

await this.storage.write(auditRecord)
}

async logSecurityEvent(event, details) {
const auditRecord = {
timestamp: new Date().toISOString(),
type: 'security',
event: event,
severity: details.severity || 'info',
details: details,
sourceIp: details.sourceIp
}

await this.storage.write(auditRecord)

// Immediate alert for high severity
if (details.severity === 'critical') {
await this.alertSecurityTeam(auditRecord)
}
}
}
```

const alertRules = {
  connection: {
    disconnected: {
      condition: 'connection_state == 0',
      duration: '1m',
      severity: 'critical',
      message: 'XRPL connection lost for over 1 minute'
    },
    highDisconnectRate: {
      condition: 'rate(disconnects[5m]) > 5',
      severity: 'warning',
      message: 'High disconnect rate: more than 5 disconnects in 5 minutes'
    }
  },

requests: {
highErrorRate: {
condition: 'error_rate > 0.05', // 5%
duration: '5m',
severity: 'warning',
message: 'Request error rate above 5%'
},
highLatency: {
condition: 'p95_latency > 2000', // 2 seconds
duration: '5m',
severity: 'warning',
message: 'P95 request latency above 2 seconds'
}
},

transactions: {
failureSpike: {
condition: 'rate(transaction_failures[5m]) > 10',
severity: 'critical',
message: 'Transaction failure rate spiking'
},
pendingTransactions: {
condition: 'pending_transactions > 100',
duration: '10m',
severity: 'warning',
message: 'Many transactions pending validation'
}
},

wallet: {
lowBalance: {
condition: 'wallet_balance < 1000', // XRP
severity: 'warning',
message: 'Hot wallet balance below 1000 XRP'
},
criticalLowBalance: {
condition: 'wallet_balance < 100',
severity: 'critical',
message: 'Hot wallet balance critically low'
}
}
}
```

class AlertManager {
  constructor(config) {
    this.channels = config.channels  // Slack, PagerDuty, email, etc.
    this.activeAlerts = new Map()
    this.cooldownMinutes = config.cooldownMinutes || 30
  }

async checkAndAlert(alertId, condition, severity, message) {
if (!condition) {
// Condition resolved - clear alert
if (this.activeAlerts.has(alertId)) {
await this.resolveAlert(alertId)
}
return
}

// Check cooldown
const existingAlert = this.activeAlerts.get(alertId)
if (existingAlert) {
const minutesSince = (Date.now() - existingAlert.time) / 60000
if (minutesSince < this.cooldownMinutes) {
return // In cooldown
}
}

// Fire alert
await this.fireAlert(alertId, severity, message)
}

async fireAlert(alertId, severity, message) {
const alert = {
id: alertId,
severity,
message,
time: Date.now()
}

this.activeAlerts.set(alertId, alert)

// Route to appropriate channels
const channels = this.getChannelsForSeverity(severity)

for (const channel of channels) {
await channel.send(alert)
}

console.log(ALERT [${severity}]: ${message})
}

async resolveAlert(alertId) {
const alert = this.activeAlerts.get(alertId)
if (!alert) return

this.activeAlerts.delete(alertId)

const duration = Date.now() - alert.time
console.log(RESOLVED [${alert.severity}]: ${alert.message} (duration: ${duration}ms))

// Notify resolution
for (const channel of this.channels) {
await channel.sendResolution(alert, duration)
}
}

getChannelsForSeverity(severity) {
switch (severity) {
case 'critical':
return this.channels // All channels
case 'warning':
return this.channels.filter(c => c.type !== 'pagerduty')
default:
return this.channels.filter(c => c.type === 'slack')
}
}
}
```

const dashboardConfig = {
  overview: {
    panels: [
      {
        title: 'Connection Status',
        type: 'status',
        query: 'xrpl_connection_state',
        thresholds: { good: 1, bad: 0 }
      },
      {
        title: 'Request Rate',
        type: 'graph',
        query: 'rate(xrpl_requests_total[1m])'
      },
      {
        title: 'Error Rate',
        type: 'graph',
        query: 'rate(xrpl_request_errors[1m]) / rate(xrpl_requests_total[1m])'
      },
      {
        title: 'Request Latency (P95)',
        type: 'graph',
        query: 'histogram_quantile(0.95, xrpl_request_latency_bucket)'
      }
    ]
  },

transactions: {
panels: [
{
title: 'Transaction Success Rate',
type: 'gauge',
query: 'xrpl_transaction_success / xrpl_transaction_total'
},
{
title: 'Payments by Result',
type: 'piechart',
query: 'sum by (result) (xrpl_transactions_total)'
},
{
title: 'Payment Volume (XRP)',
type: 'graph',
query: 'sum(rate(xrpl_payment_volume[1h]))'
}
]
},

wallets: {
panels: [
{
title: 'Hot Wallet Balance',
type: 'gauge',
query: 'xrpl_wallet_balance{type="hot"}',
thresholds: { warning: 1000, critical: 100 }
},
{
title: 'Balance History',
type: 'graph',
query: 'xrpl_wallet_balance'
}
]
}
}
```

# XRPL Connection Lost Runbook

**Name:** xrpl_connection_lost
**Severity:** Critical
**Condition:** Connection to XRPL network lost for >1 minute

Unable to query account balances
Unable to submit transactions
Unable to monitor incoming payments

Check server status: `curl https://s1.ripple.com:51234/`
Check network connectivity: `ping s1.ripple.com`
Check application logs for disconnect reason
Check XRPL network status: https://xrpcharts.ripple.com/

If single server issue, verify failover to backup servers
If network issue, check infrastructure (firewall, DNS, etc.)
If application issue, restart service: `systemctl restart xrpl-service`
Verify reconnection in logs and metrics

If not resolved in 15 minutes, page on-call engineer
If affecting payments, notify finance team

class AutoRemediation {
  constructor(client, alertManager) {
    this.client = client
    this.alertManager = alertManager
  }

async handleConnectionLost() {
console.log('Auto-remediation: Attempting reconnection...')

for (let attempt = 1; attempt <= 3; attempt++) {
try {
await this.client.connect()
console.log('Auto-remediation: Reconnection successful')
return true
} catch (error) {
console.log(Auto-remediation: Attempt ${attempt} failed)
await sleep(5000 * attempt)
}
}

console.log('Auto-remediation: Failed, escalating to human')
await this.alertManager.escalate('connection_lost', 'Auto-remediation failed')
return false
}

async handleLowBalance(threshold) {
console.log('Auto-remediation: Low balance detected')

// Check if we have a replenishment process
const replenished = await this.requestReplenishment()

if (!replenished) {
await this.alertManager.escalate('low_balance', 'Replenishment failed')
}
}
}
```

Tracks connection, request, and transaction metrics
Implements structured logging
Configures alerts for critical conditions
Provides a dashboard view of system health

Time Investment: 3-4 hours

End of Lesson 16

Key Takeaways

Monitor connection state:

Most critical single metric.

Track latency percentiles:

Averages hide problems.

Alert on anomalies:

Rate of change, not just thresholds.

Use structured logging:

Makes debugging possible.

Maintain runbooks:

Keep them updated and tested. ---

Monitoring and Alerting

Learning Objectives

Section 1: Critical Metrics

Section 2: Logging Best Practices

Section 3: Alerting Configuration

Section 4: Dashboard Design

Section 5: Incident Response

Alert

Impact

Diagnosis Steps

Resolution Steps

Escalation

Critical Analysis

Deliverable: Monitoring System

Key Takeaways