Add observability stack with Grafana, Prometheus, Loki, and cAdvisor

- Introduced a new Grafana dashboard for monitoring Docker containers using cAdvisor.
- Created provisioning files for Grafana dashboards and data sources.
- Added Dockerfiles and configuration files for Loki and Prometheus.
- Implemented a Docker Compose stack for the observability services.
- Configured Traefik as a reverse proxy for the services with appropriate routing.
- Added scripts for SSH tunneling to access the telemetry dashboard.
- Included secrets management for Grafana admin credentials.
This commit is contained in:
2026-01-18 15:36:23 +01:00
parent 38b651cb0b
commit 9b143e7638
24 changed files with 19139 additions and 0 deletions

View File

@@ -0,0 +1,3 @@
FROM grafana/alloy:latest
COPY config.alloy /etc/alloy/config.alloy

View File

@@ -0,0 +1,29 @@
discovery.docker "containers" {
host = "unix:///var/run/docker.sock"
}
discovery.relabel "swarm" {
targets = []
rule {
source_labels = ["__meta_docker_container_name"]
regex = "^/?(.*)$"
target_label = "container_name"
}
}
loki.source.docker "docker" {
host = "unix:///var/run/docker.sock"
targets = discovery.docker.containers.targets
relabel_rules = discovery.relabel.swarm.rules
forward_to = [loki.write.default.receiver]
labels = { job = "docker" } // label stable pour requêter
}
loki.write "default" {
endpoint {
url = "http://loki:3100/loki/api/v1/push"
}
}

View File

@@ -0,0 +1,159 @@
networks:
observability-network:
driver: overlay
attachable: false
internal: true
driver_opts:
encrypted: "true"
grafana-frontend-network:
driver: overlay
attachable: false
internal: true
driver_opts:
encrypted: "true"
swarm-proxy-network:
external: true
secrets:
grafana-admin-user:
file: ./secrets/grafana-admin-user.txt
grafana-admin-password:
file: ./secrets/grafana-admin-password.txt
volumes:
grafana-storage: {}
prometheus-storage: {}
loki-storage: {}
services:
grafana:
image: grafana/grafana:latest
environment:
GF_SECURITY_ADMIN_USER__FILE: /run/secrets/grafana-admin-user
GF_SECURITY_ADMIN_PASSWORD__FILE: /run/secrets/grafana-admin-password
secrets:
- grafana-admin-user
- grafana-admin-password
volumes:
- grafana-storage:/var/lib/grafana
- ./grafana/provisioning/:/etc/grafana/provisioning/:ro
- ./grafana/grafana.ini:/etc/grafana/grafana.ini:ro
deploy:
mode: replicated
replicas: 1
restart_policy:
condition: any
labels:
- "traefik.enable=true"
- "traefik.http.routers.grafana.entrypoints=admin"
- "traefik.http.routers.grafana.rule=Host(`admin.localhost`) && PathPrefix(`/grafana`)"
- "traefik.http.routers.grafana.middlewares=compress-all"
- "traefik.http.middlewares.compress-all.compress=true"
- "traefik.http.middlewares.compress-all.compress.encodings=zstd, br, gzip"
- "traefik.http.routers.grafana.service=grafana"
- "traefik.http.services.grafana.loadbalancer.server.port=3000"
ports:
- target: 3000
published: 3002
protocol: tcp
mode: ingress
networks:
- swarm-proxy-network
- grafana-frontend-network
prometheus:
image: prom/prometheus:latest
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- prometheus-storage:/prometheus
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus"
deploy:
mode: replicated
replicas: 1
restart_policy:
condition: any
placement:
constraints:
- node.role == manager
ports:
- target: 9090
published: 3001
protocol: tcp
mode: ingress
networks:
- observability-network
- grafana-frontend-network
loki:
image: grafana/loki:latest
command:
- -config.file=/etc/loki/local-config.yaml
volumes:
- loki-storage:/loki
- ./loki/loki-config.yaml:/etc/loki/local-config.yaml:ro
deploy:
mode: replicated
replicas: 1
restart_policy:
condition: any
placement:
constraints:
- node.role == manager
ports:
- target: 3100
published: 3100
protocol: tcp
mode: ingress
networks:
- observability-network
- grafana-frontend-network
alloy:
image: grafana/alloy:latest
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
- ./alloy/config.alloy:/etc/alloy/config.alloy:ro
deploy:
mode: global
endpoint_mode: dnsrr
restart_policy:
condition: any
networks:
- observability-network
cadvisor:
image: gcr.io/cadvisor/cadvisor:latest
volumes:
- /:/rootfs:ro
- /run:/run:ro
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
- /dev/disk/:/dev/disk:ro
deploy:
mode: global
endpoint_mode: dnsrr
restart_policy:
condition: any
networks:
- observability-network
node_exporter:
image: quay.io/prometheus/node-exporter:latest
hostname: "{{.Node.Hostname}}"
command:
- "--path.rootfs=/host"
volumes:
- "/:/host:ro,rslave"
deploy:
mode: global
endpoint_mode: dnsrr
restart_policy:
condition: any
networks:
- observability-network

View File

@@ -0,0 +1,11 @@
FROM grafana/grafana:latest
# Disable Grafana sign up option
ENV GF_USERS_ALLOW_SIGN_UP=false
# Copy local Grafana config and provisioning into the image
COPY grafana.ini /etc/grafana/grafana.ini
COPY provisioning/ /etc/grafana/provisioning/
# Switch back to Grafana user and keep default entrypoint/CMD
EXPOSE 3000

View File

@@ -0,0 +1,41 @@
; https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#configuration-options
[server]
http_port = 3000
; https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#serve_from_sub_path
; To enable serving Grafana from a subpath
serve_from_sub_path = false
; root_url = http://grafana.localhost:3030/
enable_gzip = true
[analytics]
reporting_enabled = true
check_for_updates = true
check_for_plugin_updates = true
feedback_links_enabled = false
[security]
disable_gravatar = false
[users]
allow_sign_up = false
allow_org_create = false
[log]
level = info ; debug, info, warn, error
[explore]
enabled = true
[help]
enabled = true
[profile]
enabled = true
[news]
news_feed_enabled = false
[public_dashboards]
enabled = false

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,991 @@
{
"__inputs": [
{
"name": "DS_PROMETHEUS",
"label": "Prometheus",
"description": "Prometheus is used for data collection from cAdvisor",
"type": "datasource",
"pluginId": "prometheus",
"pluginName": "Prometheus"
}
],
"__elements": {},
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "10.2.0"
},
{
"type": "panel",
"id": "heatmap",
"name": "Heatmap",
"version": ""
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
},
{
"type": "panel",
"id": "stat",
"name": "Stat",
"version": ""
},
{
"type": "panel",
"id": "timeseries",
"name": "Time series",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 3,
"panels": [],
"title": "Basic",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus_uid"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 4,
"x": 0,
"y": 1
},
"id": 9,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "10.2.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus_uid"
},
"editorMode": "code",
"expr": "count(count(container_last_seen) by (name))",
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Running Containers",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus_uid"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 1,
"pointSize": 1,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 20,
"x": 4,
"y": 1
},
"id": 1,
"options": {
"legend": {
"calcs": [
"max",
"mean"
],
"displayMode": "table",
"placement": "right",
"showLegend": true,
"sortBy": "Mean",
"sortDesc": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus_uid"
},
"editorMode": "code",
"expr": "sum(rate(container_cpu_usage_seconds_total{name=~\".+\"}[10m])) by (name) * 100",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "CPU Usage",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 10
},
"id": 4,
"panels": [],
"title": "Memory",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus_uid"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 1,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "decbytes"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 11
},
"id": 2,
"options": {
"legend": {
"calcs": [
"max",
"mean"
],
"displayMode": "table",
"placement": "right",
"showLegend": true,
"sortBy": "Mean",
"sortDesc": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus_uid"
},
"editorMode": "code",
"expr": "sum(rate(container_memory_usage_bytes{name=~\".+\"}[10m])) by (name)",
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Memory Usage",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus_uid"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 1,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "decbytes"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 12,
"y": 11
},
"id": 7,
"options": {
"legend": {
"calcs": [
"max",
"mean"
],
"displayMode": "table",
"placement": "right",
"showLegend": true,
"sortBy": "Mean",
"sortDesc": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus_uid"
},
"editorMode": "code",
"expr": "sum(rate(container_memory_cache{name=~\".+\"}[10m])) by (name)",
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Memory Cached",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 20
},
"id": 12,
"panels": [],
"title": "I/O",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus_uid"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 1,
"pointSize": 1,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "binBps"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 21
},
"id": 13,
"options": {
"legend": {
"calcs": [
"max",
"mean"
],
"displayMode": "table",
"placement": "right",
"showLegend": true,
"sortBy": "Mean",
"sortDesc": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus_uid"
},
"editorMode": "code",
"expr": "sum(rate(container_fs_reads_bytes_total{name=~\".+\"}[10m])) by (name)",
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Reads",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus_uid"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 1,
"pointSize": 1,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "binBps"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 12,
"y": 21
},
"id": 14,
"options": {
"legend": {
"calcs": [
"max",
"mean"
],
"displayMode": "table",
"placement": "right",
"showLegend": true,
"sortBy": "Mean",
"sortDesc": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus_uid"
},
"editorMode": "code",
"expr": "sum(rate(container_fs_writes_bytes_total{name=~\".+\"}[10m])) by (name)",
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Writes",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 30
},
"id": 6,
"panels": [],
"title": "Network",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus_uid"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 1,
"pointSize": 1,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "binBps"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 31
},
"id": 5,
"options": {
"legend": {
"calcs": [
"max",
"mean"
],
"displayMode": "table",
"placement": "right",
"showLegend": true,
"sortBy": "Mean",
"sortDesc": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus_uid"
},
"editorMode": "code",
"expr": "sum(rate(container_network_receive_bytes_total{name=~\".+\"}[10m])) by (name)",
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Received Network Traffic",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus_uid"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 1,
"pointSize": 1,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "binBps"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 12,
"y": 31
},
"id": 8,
"options": {
"legend": {
"calcs": [
"max",
"mean"
],
"displayMode": "table",
"placement": "right",
"showLegend": true,
"sortBy": "Mean",
"sortDesc": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus_uid"
},
"editorMode": "code",
"expr": "sum(rate(container_network_transmit_bytes_total{name=~\".+\"}[10m])) by (name)",
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Sent Network Traffic",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 40
},
"id": 11,
"panels": [],
"title": "Details",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus_uid"
},
"fieldConfig": {
"defaults": {
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"scaleDistribution": {
"type": "linear"
}
},
"fieldMinMax": true
},
"overrides": []
},
"gridPos": {
"h": 13,
"w": 24,
"x": 0,
"y": 41
},
"id": 10,
"options": {
"calculate": false,
"cellGap": 1,
"color": {
"exponent": 0.5,
"fill": "dark-red",
"mode": "opacity",
"reverse": false,
"scale": "exponential",
"scheme": "PuRd",
"steps": 30
},
"exemplars": {
"color": "rgba(255,0,255,0.7)"
},
"filterValues": {
"le": 1e-9
},
"legend": {
"show": true
},
"rowsFrame": {
"layout": "auto",
"value": "Restarts"
},
"tooltip": {
"show": true,
"yHistogram": true
},
"yAxis": {
"axisPlacement": "left",
"reverse": false
}
},
"pluginVersion": "10.2.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus_uid"
},
"editorMode": "code",
"expr": "count by(name) (count_over_time(container_last_seen{name=~\".+\"}[$__range]))",
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Container Restarts",
"type": "heatmap"
}
],
"refresh": "5s",
"schemaVersion": 38,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-3h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "cAdvisor Docker Insights",
"uid": "ae3c41d7-cea5-4cca-a918-5708706b4d1a",
"version": 14,
"weekStart": "",
"gnetId": 19908,
"description": "This Grafana dashboard offers a basic overview of key performance metrics for Docker containers in your system."
}

View File

@@ -0,0 +1,27 @@
apiVersion: 1
# https://grafana.com/docs/grafana/latest/administration/provisioning/#dashboards
providers:
# https://grafana.com/grafana/dashboards/19908-docker-container-monitoring-with-prometheus-and-cadvisor/
- name: cAdvisor
type: file
disableDeletion: true
allowUiUpdates: false
options:
path: /etc/grafana/provisioning/dashboards/19908_rev1.json
# https://grafana.com/grafana/dashboards/1860-node-exporter-full/
- name: Node Exporter
type: file
disableDeletion: true
allowUiUpdates: false
options:
path: /etc/grafana/provisioning/dashboards/1860_rev42.json
# https://grafana.com/grafana/dashboards/17346-traefik-official-standalone-dashboard/
- name: Traefik
type: file
disableDeletion: true
allowUiUpdates: false
options:
path: /etc/grafana/provisioning/dashboards/17346_rev9.json

View File

@@ -0,0 +1,18 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
uid: prometheus_uid
editable: false
isDefault: true
access: proxy
url: http://prometheus:9090
- name: Loki
type: loki
uid: loki_uid
editable: false
isDefault: false
access: proxy
url: http://loki:3100

View File

@@ -0,0 +1,7 @@
FROM grafana/loki:latest
COPY loki-config.yaml /etc/loki/local-config.yaml
CMD ["-config.file=/etc/loki/local-config.yaml"]
EXPOSE 3100

View File

@@ -0,0 +1,51 @@
auth_enabled: false
# https://grafana.com/docs/loki/latest/configure/#common
server:
http_listen_port: 3100
grpc_listen_port: 9096
log_level: info # options: debug, info, warn, error
grpc_server_max_concurrent_streams: 1000
# https://grafana.com/docs/loki/latest/configure/#common
common:
# Loki is composed of multiple microservices that communicate with each other (gRPC).
# The `instance_addr` is the address each microservice uses for inter-service communication.
# We use the [monolithic mode](https://grafana.com/docs/loki/latest/get-started/deployment-modes/#monolithic-mode) for simplicity.
# Therefore, we set the address to 127.0.0.1 so services can communicate with each other,
# even when the container IP changes during upgrades.
instance_addr: 127.0.0.1
path_prefix: /loki
replication_factor: 1
ring:
kvstore:
store: inmemory
storage:
filesystem:
chunks_directory: /loki/chunks
rules_directory: /loki/rules
schema_config:
configs:
- from: 2025-01-01
store: tsdb
object_store: filesystem
schema: v13
index:
prefix: index_
period: 24h
frontend:
encoding: protobuf
# By default, Loki will send anonymous, but uniquely-identifiable usage and configuration
# analytics to Grafana Labs. These statistics are sent to https://stats.grafana.org/
#
# Statistics help us better understand how Loki is used, and they show us performance
# levels for most users. This helps us prioritize features and documentation.
# For more information on what's sent, look at
# https://github.com/grafana/loki/blob/main/pkg/analytics/stats.go
# Refer to the buildReport method to see what goes into a report.
#
# If you would like to disable reporting, uncomment the following lines:
#analytics:
# reporting_enabled: false

View File

@@ -0,0 +1,13 @@
# https://github.com/prometheus/prometheus/releases
FROM prom/prometheus:latest
# Copy Prometheus configuration
COPY prometheus.yaml /etc/prometheus/prometheus.yaml
# Switch to default user and expose port
EXPOSE 9090
# Start Prometheus with the provided configuration file
ENTRYPOINT ["prometheus"]
CMD ["--config.file=/etc/prometheus/prometheus.yaml", "--storage.tsdb.path=/prometheus"]

View File

@@ -0,0 +1,41 @@
# Prometheus main configuration
global:
scrape_interval: 30s
evaluation_interval: 30s
scrape_configs:
# Scrape Prometheus itself
- job_name: "prometheus"
metrics_path: /metrics
dns_sd_configs:
- names: ["tasks.prometheus"]
type: A
refresh_interval: "30s"
port: 9090
# Scrape cAdvisor
- job_name: "cadvisor"
metrics_path: /metrics
dns_sd_configs:
- names: ["tasks.cadvisor"]
type: A
refresh_interval: "30s"
port: 8080
# Scrape Node Exporter
- job_name: "node_exporter"
metrics_path: /metrics
dns_sd_configs:
- names: ["tasks.node_exporter"]
type: A
refresh_interval: "30s"
port: 9100
# Scrape Traefik
- job_name: "traefik"
metrics_path: /metrics
static_configs:
- targets: ["traefik:9090"]

View File

@@ -0,0 +1 @@
admin

View File

@@ -0,0 +1 @@
admin

149
observability/stack.yaml Normal file
View File

@@ -0,0 +1,149 @@
networks:
observability-network:
driver: overlay
attachable: false
internal: true
driver_opts:
encrypted: "true"
grafana-frontend-network:
driver: overlay
attachable: false
internal: true
driver_opts:
encrypted: "true"
proxy-swarm-network:
external: true
secrets:
grafana-admin-user:
file: ./secrets/grafana-admin-user.txt
grafana-admin-password:
file: ./secrets/grafana-admin-password.txt
volumes:
grafana-storage: {}
prometheus-storage: {}
loki-storage: {}
services:
grafana:
image: ghcr.io/hyntaria/observability/grafana:v1.0
environment:
GF_SECURITY_ADMIN_USER__FILE: /run/secrets/grafana-admin-user
GF_SECURITY_ADMIN_PASSWORD__FILE: /run/secrets/grafana-admin-password
secrets:
- grafana-admin-user
- grafana-admin-password
volumes:
- grafana-storage:/var/lib/grafana
deploy:
mode: replicated
replicas: 1
restart_policy:
condition: any
labels:
- "traefik.enable=true"
- "traefik.http.routers.grafana.entrypoints=admin"
- "traefik.http.routers.grafana.rule=Host(`grafana.localhost`)"
- "traefik.http.routers.grafana.middlewares=compress-grafana"
- "traefik.http.middlewares.compress-grafana.compress=true"
- "traefik.http.middlewares.compress-grafana.compress.encodings=zstd, br, gzip"
- "traefik.http.routers.grafana.service=grafana"
- "traefik.http.services.grafana.loadbalancer.server.port=3000"
networks:
- proxy-swarm-network
- grafana-frontend-network
prometheus:
image: ghcr.io/hyntaria/observability/prometheus:v0.2
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus"
volumes:
- prometheus-storage:/prometheus
deploy:
mode: replicated
replicas: 1
restart_policy:
condition: any
placement:
constraints:
- node.role == manager
labels:
- "traefik.enable=true"
- "traefik.http.routers.prometheus.entrypoints=admin"
- "traefik.http.routers.prometheus.rule=Host(`prometheus.localhost`)"
- "traefik.http.routers.prometheus.middlewares=compress-prometheus"
- "traefik.http.middlewares.compress-prometheus.compress=true"
- "traefik.http.middlewares.compress-prometheus.compress.encodings=zstd, br, gzip"
- "traefik.http.routers.prometheus.service=prometheus"
- "traefik.http.services.prometheus.loadbalancer.server.port=9090"
networks:
- observability-network
- grafana-frontend-network
- proxy-swarm-network
loki:
image: ghcr.io/hyntaria/observability/loki:v0.1
volumes:
- loki-storage:/loki
deploy:
mode: replicated
replicas: 1
restart_policy:
condition: any
placement:
constraints:
- node.role == manager
networks:
- observability-network
- grafana-frontend-network
cadvisor:
image: gcr.io/cadvisor/cadvisor:latest
volumes:
- /:/rootfs:ro
- /run:/run:ro
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
- /dev/disk/:/dev/disk:ro
deploy:
mode: global
endpoint_mode: dnsrr
restart_policy:
condition: any
networks:
- observability-network
node_exporter:
image: quay.io/prometheus/node-exporter:latest
hostname: "{{.Node.Hostname}}"
command:
- "--path.rootfs=/host"
volumes:
- "/:/host:ro,rslave"
deploy:
mode: global
endpoint_mode: dnsrr
restart_policy:
condition: any
networks:
- observability-network
alloy:
image: ghcr.io/hyntaria/observability/alloy:v0.1
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
deploy:
mode: global
endpoint_mode: dnsrr
restart_policy:
condition: any
networks:
- observability-network