diff --git a/loki/README.md b/loki/README.md new file mode 100644 index 0000000..52498d0 --- /dev/null +++ b/loki/README.md @@ -0,0 +1,121 @@ +# Homelab Log Aggregation Stack + +Grafana Alloy + Loki + Grafana, configured for: +- **MikroTik RB5009** (and other network devices) via syslog +- **Docker container logs** on the host machine + +## Quick Start + +```bash +# (Optional) load secrets first if using the Vaultwarden secrets workflow +# ./secrets-load.sh docker/loki-stack .env + +docker compose up -d +``` + +Grafana will be available at **http://\:3098** +Default login: `admin` / `admin` — you will be prompted to change this. + +--- + +## MikroTik RB5009 Configuration + +In RouterOS (Winbox or SSH), run: + +```routeros +# Create a remote logging action pointing at this Docker host +/system logging action +add name=remote-loki \ + target=remote \ + remote= \ + remote-port=514 \ + bsd-syslog=yes \ + syslog-facility=local0 \ + syslog-severity=auto + +# Send all log topics to Loki +/system logging +add action=remote-loki topics=all +``` + +To verify it's working, SSH into the RB5009 and run: +```routeros +/log print follow +``` +...then in Grafana, open Explore → Loki and query `{source="network"}`. +You should see entries appearing within a few seconds. + +--- + +## Useful LogQL Queries + +**All RB5009 logs:** +```logql +{job="syslog", source="network"} +``` + +**RB5009 interface/link events only:** +```logql +{job="syslog", source="network"} |= "link" +``` + +**All logs from a specific Docker container:** +```logql +{job="docker", container="myapp"} +``` + +**Errors across all Docker containers:** +```logql +{job="docker"} |= "error" | logfmt | level="error" +``` + +**Everything in the last 24 hours, newest first:** +```logql +{job=~"syslog|docker"} | line_format "{{.source}} {{.container}} {{.message}}" +``` + +--- + +## File Layout + +``` +loki-stack/ +├── docker-compose.yml +├── alloy/ +│ └── config.alloy # Alloy pipeline config (syslog + Docker) +├── loki/ +│ └── loki-config.yml # Loki storage and retention config +└── grafana/ + └── provisioning/ + └── datasources/ + └── loki.yml # Auto-provisions Loki as default datasource +``` + +## Retention + +Logs are kept for **90 days** by default. To change this, edit `loki/loki-config.yml`: +```yaml +limits_config: + retention_period: 30d # or 180d, etc. +``` +Then restart Loki: `docker compose restart loki` + +## Adding More Syslog Sources + +Any device that can send syslog (UDP/TCP 514) will work automatically — +the `host` label will be set from the syslog hostname field, so you can +filter per-device in Grafana with `{host="my-device-hostname"}`. + +--- + +## Integrating with the Vaultwarden Secrets Workflow + +If you're using the `secrets-load.sh` script, store the Grafana admin +password as a custom field named `GF_SECURITY_ADMIN_PASSWORD` in a +Vaultwarden item called `docker/loki-stack`, then replace the hardcoded +value in `docker-compose.yml` with: + +```yaml +env_file: + - .env +``` diff --git a/loki/alloy/config.alloy b/loki/alloy/config.alloy new file mode 100644 index 0000000..9dc00ec --- /dev/null +++ b/loki/alloy/config.alloy @@ -0,0 +1,90 @@ +// Grafana Alloy configuration +// Collects: +// 1. Syslog over UDP/TCP port 514 — for MikroTik RB5009 and other network gear +// 2. Docker container logs — for all containers on this host +// Forwards everything to Loki. + +// ── 1. SYSLOG RECEIVER ──────────────────────────────────────────────────────── +// Listens on 514 UDP and TCP. Point your MikroTik logging action at this host. + +loki.source.syslog "network_devices" { + listener { + address = "0.0.0.0:514" + protocol = "udp" + labels = { + job = "syslog", + source = "network", + } + } + listener { + address = "0.0.0.0:514" + protocol = "tcp" + labels = { + job = "syslog", + source = "network", + } + } + + // Forward to the relabeling stage below + forward_to = [loki.process.syslog_relabel.receiver] +} + +// Relabel syslog: promote the hostname field (sent by RouterOS) to a label +// so you can filter by device in Grafana with {host="RB5009"} etc. +loki.process "syslog_relabel" { + stage.labels { + values = { + host = "__syslog_message_hostname", + severity = "__syslog_message_severity", + facility = "__syslog_message_facility", + app = "__syslog_message_app_name", + } + } + forward_to = [loki.write.default.receiver] +} + + +// ── 2. DOCKER CONTAINER LOGS ───────────────────────────────────────────────── +// Tails logs from all Docker containers on this host. +// Adds container_name and image as labels for easy filtering. + +discovery.docker "containers" { + host = "unix:///var/run/docker.sock" +} + +// Relabel Docker metadata into useful Loki labels +discovery.relabel "docker_labels" { + targets = discovery.docker.containers.targets + + rule { + source_labels = ["__meta_docker_container_name"] + regex = "/(.*)" + target_label = "container" + } + rule { + source_labels = ["__meta_docker_container_log_stream"] + target_label = "stream" + } + rule { + source_labels = ["__meta_docker_image_name"] + target_label = "image" + } +} + +loki.source.docker "docker_logs" { + host = "unix:///var/run/docker.sock" + targets = discovery.relabel.docker_labels.output + labels = { job = "docker" } + forward_to = [loki.write.default.receiver] + relabel_rules = discovery.relabel.docker_labels.rules +} + + +// ── 3. LOKI WRITE TARGET ────────────────────────────────────────────────────── +// All sources above forward here. + +loki.write "default" { + endpoint { + url = "http://loki:3100/loki/api/v1/push" + } +} diff --git a/loki/compose.yaml b/loki/compose.yaml new file mode 100644 index 0000000..94a5701 --- /dev/null +++ b/loki/compose.yaml @@ -0,0 +1,73 @@ +--- +# Loki + Alloy + Grafana log aggregation stack +# Place this file in a directory e.g. ~/docker/loki-stack/ +# Run with: docker compose up -d + +networks: + logging: + driver: bridge + +volumes: + loki-data: + grafana-data: + +services: + + # ── Loki: log storage and query engine ────────────────────────────────────── + loki: + image: grafana/loki:3.4.2 + container_name: loki + restart: unless-stopped + networks: + - logging + ports: + - "3100:3100" # Loki HTTP API (Alloy pushes here; Grafana queries here) + volumes: + - loki-data:/loki + - ./config/loki.yml:/etc/loki/loki.yml:ro + command: -config.file=/etc/loki/loki.yml + healthcheck: + test: ["CMD-SHELL", "wget -q --spider http://localhost:3100/ready || exit 1"] + interval: 30s + timeout: 5s + retries: 5 + + # ── Alloy: log collector / syslog receiver ─────────────────────────────────── + alloy: + image: grafana/alloy:v1.7.5 + container_name: alloy + restart: unless-stopped + networks: + - logging + ports: + - "514:514/udp" # Syslog UDP (for MikroTik and other network devices) + - "514:514/tcp" # Syslog TCP + - "12345:12345" # Alloy UI (optional, useful for debugging) + volumes: + - ./config/alloy.alloy:/etc/alloy/config.alloy:ro + - /var/lib/docker/containers:/var/lib/docker/containers:ro # Docker log access + - /var/run/docker.sock:/var/run/docker.sock:ro # Docker metadata + command: run /etc/alloy/config.alloy --server.http.listen-addr=0.0.0.0:12345 + depends_on: + loki: + condition: service_healthy + + # ── Grafana: log query UI ──────────────────────────────────────────────────── + grafana: + image: grafana/grafana:11.5.2 + container_name: grafana + restart: unless-stopped + networks: + - logging + ports: + - "3098:3000" + volumes: + - grafana-data:/var/lib/grafana + - ./config/grafana-datasources.yml:/etc/grafana/provisioning/datasources/loki.yml:ro + environment: + - GF_AUTH_ANONYMOUS_ENABLED=true # Remove if you want login + - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin # Remove if you want login + - GF_SECURITY_ADMIN_PASSWORD=changeme # Change this + depends_on: + loki: + condition: service_healthy diff --git a/loki/config/alloy.alloy b/loki/config/alloy.alloy new file mode 100644 index 0000000..8161b0e --- /dev/null +++ b/loki/config/alloy.alloy @@ -0,0 +1,98 @@ +// Alloy configuration +// Collects: (1) Docker container logs, (2) Syslog from network devices (MikroTik etc.) +// Pushes everything to local Loki instance. + +// ── Loki destination ────────────────────────────────────────────────────────── +loki.write "local_loki" { + endpoint { + url = "http://loki:3100/loki/api/v1/push" + } +} + +// ── Docker container log collection ────────────────────────────────────────── +// Discovers all running containers and tails their logs automatically. +// New containers are picked up without restarting Alloy. + +discovery.docker "containers" { + host = "unix:///var/run/docker.sock" +} + +discovery.relabel "docker_labels" { + targets = discovery.docker.containers.targets + + // Use container name as the job label (strips the leading slash Docker adds) + rule { + source_labels = ["__meta_docker_container_name"] + regex = "/(.*)" + target_label = "container" + } + + // Carry through the Docker Compose service name if present + rule { + source_labels = ["__meta_docker_container_label_com_docker_compose_service"] + target_label = "service" + } + + // Carry through the Docker Compose project name if present + rule { + source_labels = ["__meta_docker_container_label_com_docker_compose_project"] + target_label = "compose_project" + } + + rule { + target_label = "source" + replacement = "docker" + } +} + +loki.source.docker "docker_logs" { + host = "unix:///var/run/docker.sock" + targets = discovery.relabel.docker_labels.output + forward_to = [loki.write.local_loki.receiver] + relabeling { + source_labels = ["__meta_docker_container_name"] + regex = "/(.*)" + target_label = "container" + } +} + +// ── Syslog receiver (MikroTik RB5009 and other network devices) ────────────── +// Listens on UDP 514 and TCP 514. +// On your RB5009, set the remote logging action to point at this host's IP. + +loki.source.syslog "network_syslog" { + listener { + address = "0.0.0.0:514" + protocol = "udp" + labels = { + source = "syslog", + job = "network_devices", + } + } + listener { + address = "0.0.0.0:514" + protocol = "tcp" + labels = { + source = "syslog", + job = "network_devices", + } + } + + forward_to = [loki.process.syslog_relabel.receiver] +} + +// Enrich syslog entries with a hostname label extracted from the syslog message +loki.process "syslog_relabel" { + forward_to = [loki.write.local_loki.receiver] + + stage.syslog {} // Parses RFC3164/RFC5424 syslog and extracts hostname, app, facility, severity + + stage.labels { + values = { + hostname = "hostname", // Extracted by stage.syslog + app = "app_name", // e.g. "dhcp", "firewall", "interface" on RouterOS + severity = "severity", + facility = "facility", + } + } +} diff --git a/loki/config/grafana-datasources.yml b/loki/config/grafana-datasources.yml new file mode 100644 index 0000000..f8319e7 --- /dev/null +++ b/loki/config/grafana-datasources.yml @@ -0,0 +1,16 @@ +# Grafana datasource provisioning +# Automatically configures Loki as a datasource on first startup. +# No manual setup needed in the Grafana UI. + +apiVersion: 1 + +datasources: + - name: Loki + type: loki + access: proxy + url: http://loki:3100 + isDefault: true + editable: false + jsonData: + maxLines: 5000 + timeout: 60 diff --git a/loki/config/loki.yml b/loki/config/loki.yml new file mode 100644 index 0000000..851e56b --- /dev/null +++ b/loki/config/loki.yml @@ -0,0 +1,56 @@ +# Loki configuration - single binary mode, suitable for homelab scale +# Docs: https://grafana.com/docs/loki/latest/configuration/ + +auth_enabled: false + +server: + http_listen_port: 3100 + grpc_listen_port: 9096 + log_level: warn + +common: + instance_addr: 127.0.0.1 + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +schema_config: + configs: + - from: 2024-01-01 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +# ── Retention ───────────────────────────────────────────────────────────────── +# Adjust these to suit your disk space. 90 days is a good starting point for +# homelab troubleshooting — long enough to catch recurring issues. +limits_config: + retention_period: 90d + ingestion_rate_mb: 4 + ingestion_burst_size_mb: 8 + +compactor: + working_directory: /loki/compactor + retention_enabled: true + retention_delete_delay: 2h + delete_request_store: filesystem + +# ── Query performance ───────────────────────────────────────────────────────── +query_range: + results_cache: + cache: + embedded_cache: + enabled: true + max_size_mb: 100 + +ruler: + alertmanager_url: http://localhost:9093 diff --git a/loki/grafana/provisioning/datasources/loki.yml b/loki/grafana/provisioning/datasources/loki.yml new file mode 100644 index 0000000..c441aa3 --- /dev/null +++ b/loki/grafana/provisioning/datasources/loki.yml @@ -0,0 +1,17 @@ +apiVersion: 1 + +datasources: + - name: Loki + type: loki + access: proxy + url: http://loki:3100 + isDefault: true + editable: false + jsonData: + maxLines: 5000 + # Derive fields let you turn log content into clickable links. + # This example makes trace IDs in logs clickable — remove if not needed. + derivedFields: + - name: TraceID + matcherRegex: "traceID=(\\w+)" + url: "" diff --git a/loki/loki/loki-config.yml b/loki/loki/loki-config.yml new file mode 100644 index 0000000..1ed1e68 --- /dev/null +++ b/loki/loki/loki-config.yml @@ -0,0 +1,55 @@ +# Loki configuration — single-binary mode, suitable for homelab scale +# Stores data in the local filesystem via the 'loki-data' Docker volume + +auth_enabled: false + +server: + http_listen_port: 3100 + grpc_listen_port: 9096 + log_level: warn + +common: + instance_addr: 127.0.0.1 + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +# How long to keep logs. Adjust to taste. +# 90 days is generous but reasonable for a homelab — tune down if disk is tight. +limits_config: + retention_period: 90d + # Reject log lines larger than 256KB (protects against runaway logging) + max_line_size: 256KB + +schema_config: + configs: + - from: 2024-01-01 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +compactor: + working_directory: /loki/compactor + # Enables the retention policy above + retention_enabled: true + retention_delete_delay: 2h + delete_request_store: filesystem + +query_range: + results_cache: + cache: + embedded_cache: + enabled: true + max_size_mb: 100 + +ruler: + alertmanager_url: http://localhost:9093