Server Monitoring Best Practices: Keeping Your Laravel App and Redis Clusters Alive on Google Cloud
Establishing a Robust Monitoring Foundation with Google Cloud Operations Suite
For any production Laravel application, especially those leveraging Redis clusters for caching and session management on Google Cloud Platform (GCP), a proactive and comprehensive monitoring strategy is non-negotiable. We’ll focus on leveraging GCP’s native tools, specifically Cloud Monitoring and Cloud Logging, to gain deep visibility into application performance, infrastructure health, and potential bottlenecks. This isn’t about setting up basic alerts; it’s about building a system that anticipates issues before they impact users.
Monitoring Laravel Application Performance
Directly instrumenting your Laravel application is key to understanding its internal workings. We’ll use the official Google Cloud Client Libraries for PHP to send custom metrics and logs. This allows us to track critical business logic, request latency, and error rates specific to your application’s context.
Custom Metrics for Laravel
Let’s define a custom metric to track the duration of a specific, performance-sensitive operation, such as processing an incoming webhook or a complex data transformation. We’ll also log errors with detailed context.
PHP Implementation with Google Cloud Client Library
First, ensure you have the necessary Google Cloud client library installed:
composer require google/cloud-monitoring google/cloud-logging
Next, create a service class or integrate this logic into your existing service providers or controllers. For this example, we’ll assume a hypothetical `OrderProcessingService`.
`app/Services/MonitoringService.php`
This service will handle sending metrics and logs to GCP. It’s crucial to initialize the clients once and reuse them.
<?php
namespace App\Services;
use Google\Cloud\Monitoring\V3\MetricServiceClient;
use Google\Cloud\Monitoring\V3\TimeSeries;
use Google\Cloud\Monitoring\V3\Point;
use Google\Cloud\Monitoring\V3\Metric;
use Google\Cloud\Monitoring\V3\MonitoredResource;
use Google\Cloud\Logging\V2\LoggingServiceV2Client;
use Google\Cloud\Logging\V2\LogEntry;
use Google\Protobuf\Timestamp;
use DateTime;
use Exception;
class MonitoringService
{
private MetricServiceClient $metricClient;
private LoggingServiceV2Client $loggingClient;
private string $projectId;
private string $gcpLocation; // e.g., 'us-central1'
private string $gcpClusterName; // e.g., 'my-laravel-cluster'
private string $gcpInstanceName; // e.g., 'laravel-app-instance-1'
public function __construct(string $projectId, string $gcpLocation, string $gcpClusterName, string $gcpInstanceName)
{
$this->projectId = $projectId;
$this->gcpLocation = $gcpLocation;
$this->gcpClusterName = $gcpClusterName;
$this->gcpInstanceName = $gcpInstanceName;
// Initialize clients. Consider dependency injection for production.
$this->metricClient = new MetricServiceClient();
$this->loggingClient = new LoggingServiceV2Client();
}
/**
* Records a custom metric for an operation's duration.
*
* @param string $metricType The type of the metric (e.g., 'custom.googleapis.com/laravel/order_processing_time').
* @param float $durationSeconds The duration of the operation in seconds.
* @param array $labels Key-value pairs for metric labels.
*/
public function recordDurationMetric(string $metricType, float $durationSeconds, array $labels = []): void
{
$now = new DateTime();
$timestamp = new Timestamp();
$timestamp->setSeconds($now->getTimestamp());
$timestamp->setNanos($now->format('u') * 1000); // Microseconds to nanoseconds
$point = new Point();
$point->setTiming($timestamp);
$point->setValue(
(new \Google\Cloud\Monitoring\V3\TypedValue())->setDoubleValue($durationSeconds)
);
$timeSeries = new TimeSeries();
$timeSeries->setMetric(
(new Metric())->setType($metricType)->setLabels($labels)
);
$timeSeries->addPoints($point);
// Define the monitored resource. For GCE instances, use 'gce_instance'.
// For GKE, use 'k8s_pod' or 'k8s_container'.
$resource = new MonitoredResource();
$resource->setType('gce_instance'); // Adjust for GKE: 'k8s_pod'
$resource->setLabels([
'project_id' => $this->projectId,
'instance_id' => $this->gcpInstanceName, // Or 'pod_name', 'container_name' for GKE
'zone' => explode('-', $this->gcpLocation)[0] . '-' . explode('-', $this->gcpLocation)[1], // e.g., 'us-central1'
]);
// If using GKE, you might need:
// 'cluster_name' => $this->gcpClusterName,
// 'namespace_name' => 'default', // Or your specific namespace
try {
$this->metricClient->createTimeSeries(
$this->projectId,
[$timeSeries]
);
// Log success locally or to Cloud Logging if needed for debugging
// error_log("Successfully recorded metric: {$metricType} with duration {$durationSeconds}s");
} catch (Exception $e) {
error_log("Failed to record metric {$metricType}: " . $e->getMessage());
}
}
/**
* Logs an error with context to Cloud Logging.
*
* @param string $message The error message.
* @param array $context Additional context (e.g., request ID, user ID, payload snippet).
* @param string $severity The severity level (e.g., 'ERROR', 'WARNING', 'INFO').
*/
public function logError(string $message, array $context = [], string $severity = 'ERROR'): void
{
$logEntry = new LogEntry();
$logEntry->setLogName(sprintf('projects/%s/logs/laravel-app', $this->projectId));
$logEntry->setSeverity($severity);
$logEntry->setTextPayload($message);
// Add structured data if context is provided
if (!empty($context)) {
$logEntry->setJsonPayload($context);
}
// Define the monitored resource (same as for metrics)
$resource = new MonitoredResource();
$resource->setType('gce_instance'); // Adjust for GKE
$resource->setLabels([
'project_id' => $this->projectId,
'instance_id' => $this->gcpInstanceName,
'zone' => explode('-', $this->gcpLocation)[0] . '-' . explode('-', $this->gcpLocation)[1],
]);
// If using GKE:
// 'cluster_name' => $this->gcpClusterName,
// 'namespace_name' => 'default',
$logEntry->setResource($resource);
try {
$this->loggingClient->writeLogEntries($this->projectId, [$logEntry]);
// error_log("Successfully logged error: {$message}");
} catch (Exception $e) {
error_log("Failed to log error: " . $e->getMessage());
}
}
// Add methods for other metrics like request counts, queue job durations, etc.
}
Integrating with Laravel Services
You’ll need to configure your GCP project ID, location, and instance/cluster names. A good place for this is within your Laravel configuration files or environment variables.
`config/services.php` (or `.env` and `config/app.php` for loading)
Add these to your `.env` file and load them in `config/app.php` or `config/services.php`.
GCP_PROJECT_ID=your-gcp-project-id GCP_LOCATION=us-central1-a GCP_CLUSTER_NAME=my-laravel-gke-cluster # If using GKE GCP_INSTANCE_NAME=laravel-app-instance-01 # If using GCE
Then, register the `MonitoringService` in your `AppServiceProvider`.
`app/Providers/AppServiceProvider.php`
<?php
namespace App\Providers;
use Illuminate\Support\ServiceProvider;
use App\Services\MonitoringService;
class AppServiceProvider extends ServiceProvider
{
/**
* Register any application services.
*
* @return void
*/
public function register()
{
$this->app->singleton(MonitoringService::class, function ($app) {
return new MonitoringService(
config('services.gcp.project_id'),
config('services.gcp.location'),
config('services.gcp.cluster_name', ''), // Optional for GCE
config('services.gcp.instance_name', '') // Optional for GKE
);
});
}
/**
* Bootstrap any application services.
*
* @return void
*/
public function boot()
{
//
}
}
Example Usage in a Service
Now, inject and use the `MonitoringService` in your controllers or other services.
`app/Services/OrderProcessingService.php`
<?php
namespace App\Services;
use App\Services\MonitoringService;
use Illuminate\Support\Facades\Log;
use Exception;
class OrderProcessingService
{
private MonitoringService $monitoringService;
public function __construct(MonitoringService $monitoringService)
{
$this->monitoringService = $monitoringService;
}
public function processOrder(array $orderData): bool
{
$startTime = microtime(true);
$orderId = $orderData['id'] ?? 'unknown';
$userId = $orderData['user_id'] ?? 'guest';
try {
// Simulate order processing logic
sleep(rand(1, 5)); // Simulate work
if (rand(1, 100) <= 5) { // 5% chance of failure
throw new Exception("Simulated database error during order processing.");
}
$duration = microtime(true) - $startTime;
$this->monitoringService->recordDurationMetric(
'custom.googleapis.com/laravel/order_processing_time',
$duration,
['order_id' => $orderId, 'user_id' => $userId]
);
// Log successful processing (optional, can be done via framework logs)
// $this->monitoringService->logInfo("Order {$orderId} processed successfully.");
return true;
} catch (Exception $e) {
$duration = microtime(true) - $startTime;
$this->monitoringService->recordDurationMetric(
'custom.googleapis.com/laravel/order_processing_time',
$duration,
['order_id' => $orderId, 'user_id' => $userId, 'status' => 'failed']
);
$this->monitoringService->logError("Failed to process order {$orderId}", [
'exception' => $e->getMessage(),
'trace' => $e->getTraceAsString(),
'order_data' => array_slice($orderData, 0, 5), // Log a snippet of data
'user_id' => $userId,
'duration_seconds' => $duration,
]);
return false;
}
}
}
Setting up Cloud Monitoring Dashboards and Alerts
Once metrics are flowing, you can visualize them and set up alerts in the GCP Console. Navigate to Cloud Monitoring > Metrics Explorer to query your custom metrics. For example, to see the average order processing time:
Metrics Explorer Query
Resource Type: GCE VM Instance (or Kubernetes Pod/Container if applicable)
Metric: Custom / Laravel / Order Processing Time ( `custom.googleapis.com/laravel/order_processing_time` )
Aggregator: Mean
Group By: `order_id`, `user_id` (or other relevant labels)
To create alerts, go to Cloud Monitoring > Alerting > Create Policy. Configure conditions based on your custom metrics (e.g., average processing time exceeding 5 seconds for more than 5 minutes) or standard metrics (CPU utilization, memory usage).
Monitoring Redis Clusters
Redis clusters, whether managed (Memorystore) or self-hosted on GCE/GKE, require specific monitoring. We’ll focus on key Redis performance indicators and how to surface them in Cloud Monitoring.
Key Redis Metrics to Monitor
- Latency: `redis_latency_percentiles` (average, p95, p99)
- Memory Usage: `used_memory_rss`, `used_memory_peak`
- Connections: `connected_clients`, `rejected_connections`
- Cache Hit Rate: `keyspace_hits`, `keyspace_misses` (calculate hit rate: `hits / (hits + misses)`)
- Command Throughput: `instantaneous_ops_per_sec`
- Replication Lag: `master_repl_offset`, `slave_repl_offset` (for master/replica setups)
- Evictions: `evicted_keys`
Monitoring Redis on Google Cloud Memorystore
Memorystore for Redis automatically exposes many of these metrics to Cloud Monitoring. You can view them directly in the GCP Console under Memorystore > Redis > [Your Instance] > Metrics. Key metrics include:
- `redis.googleapis.com/commands/total` (Command Throughput)
- `redis.googleapis.com/network/received_bytes_count`
- `redis.googleapis.com/network/sent_bytes_count`
- `redis.googleapis.com/memory/usage`
- `redis.googleapis.com/clients/count`
- `redis.googleapis.com/keys/evicted_count`
You can create dashboards and alerts for these Memorystore metrics just like custom application metrics.
Monitoring Self-Hosted Redis Clusters
If you’re running Redis on GCE or GKE, you’ll need to use an agent like Prometheus with the official Redis Exporter, or configure `redis-cli –stat` and pipe its output to a custom script that sends data to Cloud Monitoring via the API. Using Prometheus is generally the more robust and scalable approach.
Option 1: Prometheus and Redis Exporter
1. Deploy Redis Exporter: Run the Redis Exporter as a sidecar container (in GKE) or a separate service (on GCE). It exposes metrics on an HTTP endpoint (default: `/metrics`).
# Example Kubernetes Deployment snippet for Redis Exporter
apiVersion: apps/v1
kind: Deployment
metadata:
name: redis-exporter
spec:
replicas: 1
selector:
matchLabels:
app: redis-exporter
template:
metadata:
labels:
app: redis-exporter
spec:
containers:
- name: redis-exporter
image: oliver006/redis_exporter:latest
ports:
- containerPort: 9121 # Default Prometheus metrics port
env:
- name: REDIS_ADDR
value: "redis://your-redis-master-service:6379" # Or your Redis cluster endpoint
# Add arguments for specific Redis commands or configurations if needed
# args: ["--redis.commandstats", "--redis.slowlog"]
2. Configure Prometheus: Set up Prometheus to scrape the Redis Exporter endpoint. You can use the Prometheus Operator in GKE or a standalone Prometheus instance.
# Example Prometheus scrape config
scrape_configs:
- job_name: 'redis'
static_configs:
- targets: ['redis-exporter.default.svc.cluster.local:9121'] # Adjust service name and namespace
3. Export to Cloud Monitoring: Use the Prometheus-to-Cloud-Monitoring exporter (part of the Cloud Operations for GKE agent or a separate deployment) to forward Prometheus metrics to Cloud Monitoring. This is often configured via the `gke-connect-agent` or by deploying the `prometheus-to-stackdriver` exporter.
Option 2: Custom Script with `redis-cli –stat`
This approach is simpler for basic metrics but less scalable. You’d run a script periodically that fetches stats and sends them via the Cloud Monitoring API.
`scripts/monitor_redis.py`
This Python script uses the `redis-py` library and the GCP client library.
import redis
import time
import os
from google.cloud import monitoring_v3
from google.protobuf.timestamp_pb2 import Timestamp
from datetime import datetime, timezone
# --- Configuration ---
REDIS_HOST = os.environ.get('REDIS_HOST', 'localhost')
REDIS_PORT = int(os.environ.get('REDIS_PORT', 6379))
GCP_PROJECT_ID = os.environ.get('GCP_PROJECT_ID', 'your-gcp-project-id')
GCP_LOCATION = os.environ.get('GCP_LOCATION', 'us-central1-a')
GCP_INSTANCE_NAME = os.environ.get('GCP_INSTANCE_NAME', 'redis-instance-1') # Or cluster name/id
MONITORING_INTERVAL_SECONDS = 60
# ---------------------
def get_redis_stats():
try:
r = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, decode_responses=True)
r.ping() # Check connection
# Fetch essential stats
info_dict = r.info()
stats = {
'connected_clients': int(info_dict.get('connected_clients', 0)),
'used_memory_rss': int(info_dict.get('used_memory_rss', 0)),
'used_memory_peak': int(info_dict.get('used_memory_peak', 0)),
'instantaneous_ops_per_sec': int(info_dict.get('instantaneous_ops_per_sec', 0)),
'keyspace_hits': int(info_dict.get('keyspace_hits', 0)),
'keyspace_misses': int(info_dict.get('keyspace_misses', 0)),
'evicted_keys': int(info_dict.get('evicted_keys', 0)),
'rejected_connections': int(info_dict.get('rejected_connections', 0)),
}
# Calculate hit rate
hits = stats['keyspace_hits']
misses = stats['keyspace_misses']
stats['keyspace_hit_rate'] = (hits / (hits + misses)) if (hits + misses) > 0 else 0.0
# Replication stats (if available)
if 'master_repl_offset' in info_dict and 'slave_repl_offset' in info_dict:
stats['master_repl_offset'] = int(info_dict['master_repl_offset'])
stats['slave_repl_offset'] = int(info_dict['slave_repl_offset'])
stats['replication_lag'] = stats['master_repl_offset'] - stats['slave_repl_offset']
return stats
except redis.exceptions.ConnectionError as e:
print(f"Error connecting to Redis: {e}")
return None
except Exception as e:
print(f"An unexpected error occurred fetching Redis stats: {e}")
return None
def create_metric_time_series(metric_client, project_id, metric_type, value, labels=None):
if labels is None:
labels = {}
now = datetime.now(timezone.utc)
timestamp = Timestamp()
timestamp.seconds = int(now.timestamp())
timestamp.nanos = int(now.microsecond * 1000)
point = monitoring_v3.Point()
point.interval.end_time.seconds = timestamp.seconds
point.interval.end_time.nanos = timestamp.nanos
point.value.double_value = float(value) # Ensure value is float for double_value
time_series = monitoring_v3.TimeSeries()
time_series.metric.type = metric_type
time_series.metric.labels.update(labels)
# Define the monitored resource (adjust for GKE: 'k8s_container', 'k8s_pod')
resource = monitoring_v3.MonitoredResource()
resource.type = 'gce_instance' # Or 'generic_node' if not on GCE
resource.labels['project_id'] = project_id
resource.labels['instance_id'] = GCP_INSTANCE_NAME # Or 'node_id'
resource.labels['zone'] = GCP_LOCATION.split('-')[0] + '-' + GCP_LOCATION.split('-')[1] # e.g., 'us-central1'
# For GKE, you'd use 'cluster_name', 'namespace_name', 'pod_name', 'container_name'
time_series.resource = resource
time_series.points = [point]
try:
metric_client.create_time_series(name=f"projects/{project_id}", time_series=[time_series])
print(f"Recorded metric: {metric_type} = {value}")
except Exception as e:
print(f"Failed to record metric {metric_type}: {e}")
def main():
metric_client = monitoring_v3.MetricServiceClient()
while True:
stats = get_redis_stats()
if stats:
common_labels = {
'location': GCP_LOCATION,
'instance_name': GCP_INSTANCE_NAME, # Or cluster name
}
for metric_name, value in stats.items():
# Map internal names to GCP metric types
metric_type_map = {
'connected_clients': 'custom.googleapis.com/redis/connected_clients',
'used_memory_rss': 'custom.googleapis.com/redis/memory_rss_bytes',
'used_memory_peak': 'custom.googleapis.com/redis/memory_peak_bytes',
'instantaneous_ops_per_sec': 'custom.googleapis.com/redis/ops_per_sec',
'keyspace_hits': 'custom.googleapis.com/redis/keyspace_hits',
'keyspace_misses': 'custom.googleapis.com/redis/keyspace_misses',
'evicted_keys': 'custom.googleapis.com/redis/evicted_keys',
'rejected_connections': 'custom.googleapis.com/redis/rejected_connections',
'keyspace_hit_rate': 'custom.googleapis.com/redis/keyspace_hit_rate',
}
if metric_name in metric_type_map:
create_metric_time_series(
metric_client,
GCP_PROJECT_ID,
metric_type_map[metric_name],
value,
common_labels
)
# Handle replication lag separately if needed, as it's a difference
if 'replication_lag' in stats and stats['replication_lag'] is not None:
create_metric_time_series(
metric_client,
GCP_PROJECT_ID,
'custom.googleapis.com/redis/replication_lag',
stats['replication_lag'],
common_labels
)
time.sleep(MONITORING_INTERVAL_SECONDS)
if __name__ == '__main__':
# Ensure environment variables are set or provide defaults
if not GCP_PROJECT_ID or GCP_PROJECT_ID == 'your-gcp-project-id':
print("Error: GCP_PROJECT_ID environment variable not set or is default.")
exit(1)
main()
To run this script, you’ll need to set up authentication for the GCP client library (e.g., using a service account key file or running on a GCP resource with appropriate scopes). Deploy this script to your GCE instances or within your GKE cluster.
Centralized Logging and Error Tracking
Beyond metrics, robust logging is crucial for debugging and understanding application behavior. Cloud Logging provides a centralized, searchable log repository.
Leveraging Cloud Logging for Laravel
The `MonitoringService` already includes a `logError` method. For general application logs, Laravel’s default Monolog configuration can be adapted to send logs to Cloud Logging. You can use the `google/cloud-logging` package and configure Monolog’s handler.
Monolog Configuration for Cloud Logging
Install the Monolog handler:
composer require monolog/monolog google/cloud-logging
Modify your `config/logging.php` to include a Cloud Logging channel.
`config/logging.php`
<?php
use Monolog\Handler\NullHandler;
use Monolog\Handler\StreamHandler;
use Monolog\Handler\SyslogUdpHandler;
use Monolog\Formatter\LineFormatter;
use Google\Cloud\Logging\Monolog\Handler\GoogleCloudLoggingHandler;
use Google\Cloud\Logging\PsrLogger; // For PSR-3 compatibility
return [
// ... other channels
'channels' => [
// ... other channels
'stackdrivers' => [ // Renamed from 'stackdriver' for clarity if you use both
'driver' => 'custom',
'via' => [App\Logging\CloudLogging::class, 'createDriver'],
'level' => env('LOG_LEVEL', 'debug'),
],
'gcp_logging' => [
'driver' => 'custom',
'via' => [App\Logging\CloudLogging::class, 'createDriver'],
'level' => env('LOG_LEVEL', 'debug'),
],
// Fallback for local development or when GCP logging isn't configured
'single' => [
'driver' => 'single',
'path' => storage_path('logs/laravel.log'),
'level' => env('LOG_LEVEL', 'debug'),
],
'daily' => [
'driver' => 'daily',
'path' => storage_path('logs/laravel.log'),
'level' => env('LOG_LEVEL', 'debug'),
'days' => env('LOG_RETENTION', 14),
],
],
// ...
];
Custom Logging Class for Monolog Handler
Create a new file `app/Logging/CloudLogging.php` to bridge Monolog and the Google Cloud Logging client.
`app/Logging/CloudLogging.php`
<?php
namespace App\Logging;
use Monolog\Logger;
use Google\Cloud\Logging\Monolog\Handler\GoogleCloudLoggingHandler;
use Google\Cloud\Logging\PsrLogger;
use Illuminate\Support\Arr;
class CloudLogging
{
/**
* Create a custom Monolog instance.
*
* @param array $config
* @return \Monolog\Logger
*/
public function __invoke(array $config)
{
$projectId = config('services.gcp.project_id');
$gcpLocation = config('services.gcp.location');
$gcpInstanceName = config('services.gcp.instance_name', ''); // Optional
if (!$projectId) {
throw new \InvalidArgumentException('GCP_PROJECT_ID is not configured.');
}
// Determine the monitored resource type and labels
$resourceType = 'gce_instance'; // Default for GCE
$resourceLabels = [
'project_id' => $projectId,
'instance_id' => $gcpInstanceName,
'zone' => explode('-', $gcpLocation)[0] . '-' . explode('-', $gcpLocation)[1],
];
// Adjust for GKE if applicable (e.g., if you have a GKE_CLUSTER_NAME env var)
if (getenv('GKE_CLUSTER_NAME')) {
$resourceType = 'k8s_container';
$resourceLabels = [
'project_id' => $projectId,
'cluster_name' => getenv('GKE_CLUSTER_NAME'),
'namespace_name' => getenv('POD_NAMESPACE', 'default'),
'pod_name' => getenv('POD_NAME', 'unknown-pod'),
'container_name' => getenv('CONTAINER_NAME', 'unknown-container'),
];
}
$monitoredResource = [
'type' => $resourceType,
'labels' => $resourceLabels,
];
$loggingClient = new PsrLogger($projectId, [
'clientConfig' => [