Add observability stack with Grafana, Prometheus, Loki, and cAdvisor
- Introduced a new Grafana dashboard for monitoring Docker containers using cAdvisor. - Created provisioning files for Grafana dashboards and data sources. - Added Dockerfiles and configuration files for Loki and Prometheus. - Implemented a Docker Compose stack for the observability services. - Configured Traefik as a reverse proxy for the services with appropriate routing. - Added scripts for SSH tunneling to access the telemetry dashboard. - Included secrets management for Grafana admin credentials.
This commit is contained in:
71
.github/workflows/action-build-and-push.yaml
vendored
Normal file
71
.github/workflows/action-build-and-push.yaml
vendored
Normal file
@@ -0,0 +1,71 @@
|
||||
name: Docker Build & Push to GitHub Container Registry (Use git tags with format vX.Y.Z as release version)
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
image_name:
|
||||
type: string
|
||||
required: true
|
||||
description: >
|
||||
Docker image name without registry and owner.
|
||||
Example: observability/alloy
|
||||
|
||||
build_context:
|
||||
type: string
|
||||
required: false
|
||||
default: .
|
||||
description: >
|
||||
Path to the Docker build context.
|
||||
Example: . or ./services/api
|
||||
|
||||
file_path:
|
||||
type: string
|
||||
required: false
|
||||
default: ./Dockerfile
|
||||
description: >
|
||||
Path to the Dockerfile relative to the repository root.
|
||||
Example: ./Dockerfile or ./services/api/Dockerfile
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Ensure lowercase username because some registries do not allow uppercase letters
|
||||
id: lowercase
|
||||
run: echo "username=$(echo ${{ github.repository_owner }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ghcr.io/${{ steps.lowercase.outputs.username }}/${{ inputs.image_name }}
|
||||
tags: |
|
||||
type=match,pattern=.*(v\d+\.\d+\.\d+),group=1
|
||||
type=match,pattern=.*(v\d+\.\d+)\.\d+,group=1
|
||||
type=match,pattern=.*(v\d+)\.\d+\.\d+,group=1
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ${{ inputs.build_context }}
|
||||
file: ${{ inputs.file_path }}
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
17
.github/workflows/b&p-observability-alloy.yaml
vendored
Normal file
17
.github/workflows/b&p-observability-alloy.yaml
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
name: Build and Push Alloy
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "alloy-v*.*.*"
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
uses: ./.github/workflows/action-build-and-push.yaml
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
with:
|
||||
image_name: observability/alloy
|
||||
build_context: ./observability/alloy
|
||||
file_path: ./observability/alloy/Dockerfile
|
||||
17
.github/workflows/b&p-observability-grafana.yaml
vendored
Normal file
17
.github/workflows/b&p-observability-grafana.yaml
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
name: Build and Push Grafana Observability
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "grafana-v*.*.*"
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
uses: ./.github/workflows/action-build-and-push.yaml
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
with:
|
||||
image_name: observability/grafana
|
||||
build_context: ./observability/grafana
|
||||
file_path: ./observability/grafana/Dockerfile
|
||||
17
.github/workflows/b&p-observability-loki.yaml
vendored
Normal file
17
.github/workflows/b&p-observability-loki.yaml
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
name: Build and Push Loki Database
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "loki-v*.*.*"
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
uses: ./.github/workflows/action-build-and-push.yaml
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
with:
|
||||
image_name: observability/loki
|
||||
build_context: ./observability/loki
|
||||
file_path: ./observability/loki/Dockerfile
|
||||
17
.github/workflows/b&p-observability-prometheus.yaml
vendored
Normal file
17
.github/workflows/b&p-observability-prometheus.yaml
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
name: Build and Push Prometheus Observability
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "prometheus-v*.*.*"
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
uses: ./.github/workflows/action-build-and-push.yaml
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
with:
|
||||
image_name: observability/prometheus
|
||||
build_context: ./observability/prometheus
|
||||
file_path: ./observability/prometheus/Dockerfile
|
||||
3
observability/alloy/Dockerfile
Normal file
3
observability/alloy/Dockerfile
Normal file
@@ -0,0 +1,3 @@
|
||||
FROM grafana/alloy:latest
|
||||
|
||||
COPY config.alloy /etc/alloy/config.alloy
|
||||
29
observability/alloy/config.alloy
Normal file
29
observability/alloy/config.alloy
Normal file
@@ -0,0 +1,29 @@
|
||||
|
||||
discovery.docker "containers" {
|
||||
host = "unix:///var/run/docker.sock"
|
||||
}
|
||||
|
||||
discovery.relabel "swarm" {
|
||||
targets = []
|
||||
|
||||
rule {
|
||||
source_labels = ["__meta_docker_container_name"]
|
||||
regex = "^/?(.*)$"
|
||||
target_label = "container_name"
|
||||
}
|
||||
}
|
||||
|
||||
loki.source.docker "docker" {
|
||||
host = "unix:///var/run/docker.sock"
|
||||
targets = discovery.docker.containers.targets
|
||||
relabel_rules = discovery.relabel.swarm.rules
|
||||
forward_to = [loki.write.default.receiver]
|
||||
|
||||
labels = { job = "docker" } // label stable pour requêter
|
||||
}
|
||||
|
||||
loki.write "default" {
|
||||
endpoint {
|
||||
url = "http://loki:3100/loki/api/v1/push"
|
||||
}
|
||||
}
|
||||
159
observability/dev-stack.yaml
Normal file
159
observability/dev-stack.yaml
Normal file
@@ -0,0 +1,159 @@
|
||||
networks:
|
||||
observability-network:
|
||||
driver: overlay
|
||||
attachable: false
|
||||
internal: true
|
||||
driver_opts:
|
||||
encrypted: "true"
|
||||
|
||||
grafana-frontend-network:
|
||||
driver: overlay
|
||||
attachable: false
|
||||
internal: true
|
||||
driver_opts:
|
||||
encrypted: "true"
|
||||
|
||||
swarm-proxy-network:
|
||||
external: true
|
||||
|
||||
secrets:
|
||||
grafana-admin-user:
|
||||
file: ./secrets/grafana-admin-user.txt
|
||||
grafana-admin-password:
|
||||
file: ./secrets/grafana-admin-password.txt
|
||||
|
||||
volumes:
|
||||
grafana-storage: {}
|
||||
prometheus-storage: {}
|
||||
loki-storage: {}
|
||||
|
||||
services:
|
||||
grafana:
|
||||
image: grafana/grafana:latest
|
||||
environment:
|
||||
GF_SECURITY_ADMIN_USER__FILE: /run/secrets/grafana-admin-user
|
||||
GF_SECURITY_ADMIN_PASSWORD__FILE: /run/secrets/grafana-admin-password
|
||||
secrets:
|
||||
- grafana-admin-user
|
||||
- grafana-admin-password
|
||||
volumes:
|
||||
- grafana-storage:/var/lib/grafana
|
||||
- ./grafana/provisioning/:/etc/grafana/provisioning/:ro
|
||||
- ./grafana/grafana.ini:/etc/grafana/grafana.ini:ro
|
||||
deploy:
|
||||
mode: replicated
|
||||
replicas: 1
|
||||
restart_policy:
|
||||
condition: any
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.grafana.entrypoints=admin"
|
||||
- "traefik.http.routers.grafana.rule=Host(`admin.localhost`) && PathPrefix(`/grafana`)"
|
||||
|
||||
- "traefik.http.routers.grafana.middlewares=compress-all"
|
||||
- "traefik.http.middlewares.compress-all.compress=true"
|
||||
- "traefik.http.middlewares.compress-all.compress.encodings=zstd, br, gzip"
|
||||
|
||||
- "traefik.http.routers.grafana.service=grafana"
|
||||
- "traefik.http.services.grafana.loadbalancer.server.port=3000"
|
||||
ports:
|
||||
- target: 3000
|
||||
published: 3002
|
||||
protocol: tcp
|
||||
mode: ingress
|
||||
networks:
|
||||
- swarm-proxy-network
|
||||
- grafana-frontend-network
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
volumes:
|
||||
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
||||
- prometheus-storage:/prometheus
|
||||
command:
|
||||
- "--config.file=/etc/prometheus/prometheus.yml"
|
||||
- "--storage.tsdb.path=/prometheus"
|
||||
deploy:
|
||||
mode: replicated
|
||||
replicas: 1
|
||||
restart_policy:
|
||||
condition: any
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == manager
|
||||
ports:
|
||||
- target: 9090
|
||||
published: 3001
|
||||
protocol: tcp
|
||||
mode: ingress
|
||||
networks:
|
||||
- observability-network
|
||||
- grafana-frontend-network
|
||||
|
||||
loki:
|
||||
image: grafana/loki:latest
|
||||
command:
|
||||
- -config.file=/etc/loki/local-config.yaml
|
||||
volumes:
|
||||
- loki-storage:/loki
|
||||
- ./loki/loki-config.yaml:/etc/loki/local-config.yaml:ro
|
||||
deploy:
|
||||
mode: replicated
|
||||
replicas: 1
|
||||
restart_policy:
|
||||
condition: any
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == manager
|
||||
ports:
|
||||
- target: 3100
|
||||
published: 3100
|
||||
protocol: tcp
|
||||
mode: ingress
|
||||
networks:
|
||||
- observability-network
|
||||
- grafana-frontend-network
|
||||
|
||||
alloy:
|
||||
image: grafana/alloy:latest
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
- ./alloy/config.alloy:/etc/alloy/config.alloy:ro
|
||||
deploy:
|
||||
mode: global
|
||||
endpoint_mode: dnsrr
|
||||
restart_policy:
|
||||
condition: any
|
||||
networks:
|
||||
- observability-network
|
||||
|
||||
cadvisor:
|
||||
image: gcr.io/cadvisor/cadvisor:latest
|
||||
volumes:
|
||||
- /:/rootfs:ro
|
||||
- /run:/run:ro
|
||||
- /sys:/sys:ro
|
||||
- /var/lib/docker/:/var/lib/docker:ro
|
||||
- /dev/disk/:/dev/disk:ro
|
||||
deploy:
|
||||
mode: global
|
||||
endpoint_mode: dnsrr
|
||||
restart_policy:
|
||||
condition: any
|
||||
networks:
|
||||
- observability-network
|
||||
|
||||
node_exporter:
|
||||
image: quay.io/prometheus/node-exporter:latest
|
||||
hostname: "{{.Node.Hostname}}"
|
||||
command:
|
||||
- "--path.rootfs=/host"
|
||||
volumes:
|
||||
- "/:/host:ro,rslave"
|
||||
deploy:
|
||||
mode: global
|
||||
endpoint_mode: dnsrr
|
||||
restart_policy:
|
||||
condition: any
|
||||
networks:
|
||||
- observability-network
|
||||
11
observability/grafana/Dockerfile
Normal file
11
observability/grafana/Dockerfile
Normal file
@@ -0,0 +1,11 @@
|
||||
FROM grafana/grafana:latest
|
||||
|
||||
# Disable Grafana sign up option
|
||||
ENV GF_USERS_ALLOW_SIGN_UP=false
|
||||
|
||||
# Copy local Grafana config and provisioning into the image
|
||||
COPY grafana.ini /etc/grafana/grafana.ini
|
||||
COPY provisioning/ /etc/grafana/provisioning/
|
||||
|
||||
# Switch back to Grafana user and keep default entrypoint/CMD
|
||||
EXPOSE 3000
|
||||
41
observability/grafana/grafana.ini
Normal file
41
observability/grafana/grafana.ini
Normal file
@@ -0,0 +1,41 @@
|
||||
; https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#configuration-options
|
||||
[server]
|
||||
http_port = 3000
|
||||
|
||||
; https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#serve_from_sub_path
|
||||
; To enable serving Grafana from a subpath
|
||||
serve_from_sub_path = false
|
||||
; root_url = http://grafana.localhost:3030/
|
||||
|
||||
enable_gzip = true
|
||||
|
||||
[analytics]
|
||||
reporting_enabled = true
|
||||
check_for_updates = true
|
||||
check_for_plugin_updates = true
|
||||
feedback_links_enabled = false
|
||||
|
||||
[security]
|
||||
disable_gravatar = false
|
||||
|
||||
[users]
|
||||
allow_sign_up = false
|
||||
allow_org_create = false
|
||||
|
||||
[log]
|
||||
level = info ; debug, info, warn, error
|
||||
|
||||
[explore]
|
||||
enabled = true
|
||||
[help]
|
||||
enabled = true
|
||||
[profile]
|
||||
enabled = true
|
||||
[news]
|
||||
news_feed_enabled = false
|
||||
|
||||
[public_dashboards]
|
||||
enabled = false
|
||||
|
||||
|
||||
|
||||
1570
observability/grafana/provisioning/dashboards/17346_rev9.json
Normal file
1570
observability/grafana/provisioning/dashboards/17346_rev9.json
Normal file
File diff suppressed because it is too large
Load Diff
15766
observability/grafana/provisioning/dashboards/1860_rev42.json
Normal file
15766
observability/grafana/provisioning/dashboards/1860_rev42.json
Normal file
File diff suppressed because it is too large
Load Diff
991
observability/grafana/provisioning/dashboards/19908_rev1.json
Normal file
991
observability/grafana/provisioning/dashboards/19908_rev1.json
Normal file
@@ -0,0 +1,991 @@
|
||||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"name": "DS_PROMETHEUS",
|
||||
"label": "Prometheus",
|
||||
"description": "Prometheus is used for data collection from cAdvisor",
|
||||
"type": "datasource",
|
||||
"pluginId": "prometheus",
|
||||
"pluginName": "Prometheus"
|
||||
}
|
||||
],
|
||||
"__elements": {},
|
||||
"__requires": [
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "10.2.0"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "heatmap",
|
||||
"name": "Heatmap",
|
||||
"version": ""
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "prometheus",
|
||||
"name": "Prometheus",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "stat",
|
||||
"name": "Stat",
|
||||
"version": ""
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "timeseries",
|
||||
"name": "Time series",
|
||||
"version": ""
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"panels": [],
|
||||
"title": "Basic",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus_uid"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 4,
|
||||
"x": 0,
|
||||
"y": 1
|
||||
},
|
||||
"id": 9,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus_uid"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "count(count(container_last_seen) by (name))",
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Running Containers",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus_uid"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineStyle": {
|
||||
"fill": "solid"
|
||||
},
|
||||
"lineWidth": 1,
|
||||
"pointSize": 1,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 20,
|
||||
"x": 4,
|
||||
"y": 1
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"max",
|
||||
"mean"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true,
|
||||
"sortBy": "Mean",
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus_uid"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(container_cpu_usage_seconds_total{name=~\".+\"}[10m])) by (name) * 100",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "CPU Usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 10
|
||||
},
|
||||
"id": 4,
|
||||
"panels": [],
|
||||
"title": "Memory",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus_uid"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 1,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "decbytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 11
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"max",
|
||||
"mean"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true,
|
||||
"sortBy": "Mean",
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus_uid"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(container_memory_usage_bytes{name=~\".+\"}[10m])) by (name)",
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Memory Usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus_uid"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 1,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "decbytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 11
|
||||
},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"max",
|
||||
"mean"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true,
|
||||
"sortBy": "Mean",
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus_uid"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(container_memory_cache{name=~\".+\"}[10m])) by (name)",
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Memory Cached",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 20
|
||||
},
|
||||
"id": 12,
|
||||
"panels": [],
|
||||
"title": "I/O",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus_uid"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineStyle": {
|
||||
"fill": "solid"
|
||||
},
|
||||
"lineWidth": 1,
|
||||
"pointSize": 1,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "binBps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 21
|
||||
},
|
||||
"id": 13,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"max",
|
||||
"mean"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true,
|
||||
"sortBy": "Mean",
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus_uid"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(container_fs_reads_bytes_total{name=~\".+\"}[10m])) by (name)",
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Reads",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus_uid"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineStyle": {
|
||||
"fill": "solid"
|
||||
},
|
||||
"lineWidth": 1,
|
||||
"pointSize": 1,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "binBps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 21
|
||||
},
|
||||
"id": 14,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"max",
|
||||
"mean"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true,
|
||||
"sortBy": "Mean",
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus_uid"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(container_fs_writes_bytes_total{name=~\".+\"}[10m])) by (name)",
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Writes",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 30
|
||||
},
|
||||
"id": 6,
|
||||
"panels": [],
|
||||
"title": "Network",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus_uid"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineStyle": {
|
||||
"fill": "solid"
|
||||
},
|
||||
"lineWidth": 1,
|
||||
"pointSize": 1,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "binBps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 31
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"max",
|
||||
"mean"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true,
|
||||
"sortBy": "Mean",
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus_uid"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(container_network_receive_bytes_total{name=~\".+\"}[10m])) by (name)",
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Received Network Traffic",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus_uid"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineStyle": {
|
||||
"fill": "solid"
|
||||
},
|
||||
"lineWidth": 1,
|
||||
"pointSize": 1,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "binBps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 31
|
||||
},
|
||||
"id": 8,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"max",
|
||||
"mean"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true,
|
||||
"sortBy": "Mean",
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus_uid"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(container_network_transmit_bytes_total{name=~\".+\"}[10m])) by (name)",
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Sent Network Traffic",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 40
|
||||
},
|
||||
"id": 11,
|
||||
"panels": [],
|
||||
"title": "Details",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus_uid"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
}
|
||||
},
|
||||
"fieldMinMax": true
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 13,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 41
|
||||
},
|
||||
"id": 10,
|
||||
"options": {
|
||||
"calculate": false,
|
||||
"cellGap": 1,
|
||||
"color": {
|
||||
"exponent": 0.5,
|
||||
"fill": "dark-red",
|
||||
"mode": "opacity",
|
||||
"reverse": false,
|
||||
"scale": "exponential",
|
||||
"scheme": "PuRd",
|
||||
"steps": 30
|
||||
},
|
||||
"exemplars": {
|
||||
"color": "rgba(255,0,255,0.7)"
|
||||
},
|
||||
"filterValues": {
|
||||
"le": 1e-9
|
||||
},
|
||||
"legend": {
|
||||
"show": true
|
||||
},
|
||||
"rowsFrame": {
|
||||
"layout": "auto",
|
||||
"value": "Restarts"
|
||||
},
|
||||
"tooltip": {
|
||||
"show": true,
|
||||
"yHistogram": true
|
||||
},
|
||||
"yAxis": {
|
||||
"axisPlacement": "left",
|
||||
"reverse": false
|
||||
}
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus_uid"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "count by(name) (count_over_time(container_last_seen{name=~\".+\"}[$__range]))",
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Container Restarts",
|
||||
"type": "heatmap"
|
||||
}
|
||||
],
|
||||
"refresh": "5s",
|
||||
"schemaVersion": 38,
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-3h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "cAdvisor Docker Insights",
|
||||
"uid": "ae3c41d7-cea5-4cca-a918-5708706b4d1a",
|
||||
"version": 14,
|
||||
"weekStart": "",
|
||||
"gnetId": 19908,
|
||||
"description": "This Grafana dashboard offers a basic overview of key performance metrics for Docker containers in your system."
|
||||
}
|
||||
27
observability/grafana/provisioning/dashboards/default.yml
Normal file
27
observability/grafana/provisioning/dashboards/default.yml
Normal file
@@ -0,0 +1,27 @@
|
||||
apiVersion: 1
|
||||
|
||||
# https://grafana.com/docs/grafana/latest/administration/provisioning/#dashboards
|
||||
providers:
|
||||
# https://grafana.com/grafana/dashboards/19908-docker-container-monitoring-with-prometheus-and-cadvisor/
|
||||
- name: cAdvisor
|
||||
type: file
|
||||
disableDeletion: true
|
||||
allowUiUpdates: false
|
||||
options:
|
||||
path: /etc/grafana/provisioning/dashboards/19908_rev1.json
|
||||
|
||||
# https://grafana.com/grafana/dashboards/1860-node-exporter-full/
|
||||
- name: Node Exporter
|
||||
type: file
|
||||
disableDeletion: true
|
||||
allowUiUpdates: false
|
||||
options:
|
||||
path: /etc/grafana/provisioning/dashboards/1860_rev42.json
|
||||
|
||||
# https://grafana.com/grafana/dashboards/17346-traefik-official-standalone-dashboard/
|
||||
- name: Traefik
|
||||
type: file
|
||||
disableDeletion: true
|
||||
allowUiUpdates: false
|
||||
options:
|
||||
path: /etc/grafana/provisioning/dashboards/17346_rev9.json
|
||||
@@ -0,0 +1,18 @@
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
uid: prometheus_uid
|
||||
editable: false
|
||||
isDefault: true
|
||||
access: proxy
|
||||
url: http://prometheus:9090
|
||||
|
||||
- name: Loki
|
||||
type: loki
|
||||
uid: loki_uid
|
||||
editable: false
|
||||
isDefault: false
|
||||
access: proxy
|
||||
url: http://loki:3100
|
||||
7
observability/loki/Dockerfile
Normal file
7
observability/loki/Dockerfile
Normal file
@@ -0,0 +1,7 @@
|
||||
FROM grafana/loki:latest
|
||||
|
||||
COPY loki-config.yaml /etc/loki/local-config.yaml
|
||||
|
||||
CMD ["-config.file=/etc/loki/local-config.yaml"]
|
||||
|
||||
EXPOSE 3100
|
||||
51
observability/loki/loki-config.yaml
Normal file
51
observability/loki/loki-config.yaml
Normal file
@@ -0,0 +1,51 @@
|
||||
auth_enabled: false
|
||||
|
||||
# https://grafana.com/docs/loki/latest/configure/#common
|
||||
server:
|
||||
http_listen_port: 3100
|
||||
grpc_listen_port: 9096
|
||||
log_level: info # options: debug, info, warn, error
|
||||
grpc_server_max_concurrent_streams: 1000
|
||||
|
||||
# https://grafana.com/docs/loki/latest/configure/#common
|
||||
common:
|
||||
# Loki is composed of multiple microservices that communicate with each other (gRPC).
|
||||
# The `instance_addr` is the address each microservice uses for inter-service communication.
|
||||
# We use the [monolithic mode](https://grafana.com/docs/loki/latest/get-started/deployment-modes/#monolithic-mode) for simplicity.
|
||||
# Therefore, we set the address to 127.0.0.1 so services can communicate with each other,
|
||||
# even when the container IP changes during upgrades.
|
||||
instance_addr: 127.0.0.1
|
||||
path_prefix: /loki
|
||||
replication_factor: 1
|
||||
ring:
|
||||
kvstore:
|
||||
store: inmemory
|
||||
storage:
|
||||
filesystem:
|
||||
chunks_directory: /loki/chunks
|
||||
rules_directory: /loki/rules
|
||||
|
||||
schema_config:
|
||||
configs:
|
||||
- from: 2025-01-01
|
||||
store: tsdb
|
||||
object_store: filesystem
|
||||
schema: v13
|
||||
index:
|
||||
prefix: index_
|
||||
period: 24h
|
||||
|
||||
frontend:
|
||||
encoding: protobuf
|
||||
# By default, Loki will send anonymous, but uniquely-identifiable usage and configuration
|
||||
# analytics to Grafana Labs. These statistics are sent to https://stats.grafana.org/
|
||||
#
|
||||
# Statistics help us better understand how Loki is used, and they show us performance
|
||||
# levels for most users. This helps us prioritize features and documentation.
|
||||
# For more information on what's sent, look at
|
||||
# https://github.com/grafana/loki/blob/main/pkg/analytics/stats.go
|
||||
# Refer to the buildReport method to see what goes into a report.
|
||||
#
|
||||
# If you would like to disable reporting, uncomment the following lines:
|
||||
#analytics:
|
||||
# reporting_enabled: false
|
||||
13
observability/prometheus/Dockerfile
Normal file
13
observability/prometheus/Dockerfile
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
# https://github.com/prometheus/prometheus/releases
|
||||
FROM prom/prometheus:latest
|
||||
|
||||
# Copy Prometheus configuration
|
||||
COPY prometheus.yaml /etc/prometheus/prometheus.yaml
|
||||
|
||||
# Switch to default user and expose port
|
||||
EXPOSE 9090
|
||||
|
||||
# Start Prometheus with the provided configuration file
|
||||
ENTRYPOINT ["prometheus"]
|
||||
CMD ["--config.file=/etc/prometheus/prometheus.yaml", "--storage.tsdb.path=/prometheus"]
|
||||
41
observability/prometheus/prometheus.yaml
Normal file
41
observability/prometheus/prometheus.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
# Prometheus main configuration
|
||||
|
||||
global:
|
||||
scrape_interval: 30s
|
||||
evaluation_interval: 30s
|
||||
|
||||
|
||||
|
||||
scrape_configs:
|
||||
# Scrape Prometheus itself
|
||||
- job_name: "prometheus"
|
||||
metrics_path: /metrics
|
||||
dns_sd_configs:
|
||||
- names: ["tasks.prometheus"]
|
||||
type: A
|
||||
refresh_interval: "30s"
|
||||
port: 9090
|
||||
|
||||
# Scrape cAdvisor
|
||||
- job_name: "cadvisor"
|
||||
metrics_path: /metrics
|
||||
dns_sd_configs:
|
||||
- names: ["tasks.cadvisor"]
|
||||
type: A
|
||||
refresh_interval: "30s"
|
||||
port: 8080
|
||||
|
||||
# Scrape Node Exporter
|
||||
- job_name: "node_exporter"
|
||||
metrics_path: /metrics
|
||||
dns_sd_configs:
|
||||
- names: ["tasks.node_exporter"]
|
||||
type: A
|
||||
refresh_interval: "30s"
|
||||
port: 9100
|
||||
|
||||
# Scrape Traefik
|
||||
- job_name: "traefik"
|
||||
metrics_path: /metrics
|
||||
static_configs:
|
||||
- targets: ["traefik:9090"]
|
||||
1
observability/secrets/grafana-admin-password.txt
Normal file
1
observability/secrets/grafana-admin-password.txt
Normal file
@@ -0,0 +1 @@
|
||||
admin
|
||||
1
observability/secrets/grafana-admin-user.txt
Normal file
1
observability/secrets/grafana-admin-user.txt
Normal file
@@ -0,0 +1 @@
|
||||
admin
|
||||
149
observability/stack.yaml
Normal file
149
observability/stack.yaml
Normal file
@@ -0,0 +1,149 @@
|
||||
networks:
|
||||
observability-network:
|
||||
driver: overlay
|
||||
attachable: false
|
||||
internal: true
|
||||
driver_opts:
|
||||
encrypted: "true"
|
||||
|
||||
grafana-frontend-network:
|
||||
driver: overlay
|
||||
attachable: false
|
||||
internal: true
|
||||
driver_opts:
|
||||
encrypted: "true"
|
||||
|
||||
proxy-swarm-network:
|
||||
external: true
|
||||
|
||||
secrets:
|
||||
grafana-admin-user:
|
||||
file: ./secrets/grafana-admin-user.txt
|
||||
grafana-admin-password:
|
||||
file: ./secrets/grafana-admin-password.txt
|
||||
|
||||
volumes:
|
||||
grafana-storage: {}
|
||||
prometheus-storage: {}
|
||||
loki-storage: {}
|
||||
|
||||
services:
|
||||
grafana:
|
||||
image: ghcr.io/hyntaria/observability/grafana:v1.0
|
||||
environment:
|
||||
GF_SECURITY_ADMIN_USER__FILE: /run/secrets/grafana-admin-user
|
||||
GF_SECURITY_ADMIN_PASSWORD__FILE: /run/secrets/grafana-admin-password
|
||||
secrets:
|
||||
- grafana-admin-user
|
||||
- grafana-admin-password
|
||||
volumes:
|
||||
- grafana-storage:/var/lib/grafana
|
||||
deploy:
|
||||
mode: replicated
|
||||
replicas: 1
|
||||
restart_policy:
|
||||
condition: any
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.grafana.entrypoints=admin"
|
||||
- "traefik.http.routers.grafana.rule=Host(`grafana.localhost`)"
|
||||
|
||||
- "traefik.http.routers.grafana.middlewares=compress-grafana"
|
||||
- "traefik.http.middlewares.compress-grafana.compress=true"
|
||||
- "traefik.http.middlewares.compress-grafana.compress.encodings=zstd, br, gzip"
|
||||
|
||||
- "traefik.http.routers.grafana.service=grafana"
|
||||
- "traefik.http.services.grafana.loadbalancer.server.port=3000"
|
||||
networks:
|
||||
- proxy-swarm-network
|
||||
- grafana-frontend-network
|
||||
|
||||
prometheus:
|
||||
image: ghcr.io/hyntaria/observability/prometheus:v0.2
|
||||
command:
|
||||
- "--config.file=/etc/prometheus/prometheus.yml"
|
||||
- "--storage.tsdb.path=/prometheus"
|
||||
volumes:
|
||||
- prometheus-storage:/prometheus
|
||||
deploy:
|
||||
mode: replicated
|
||||
replicas: 1
|
||||
restart_policy:
|
||||
condition: any
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == manager
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.prometheus.entrypoints=admin"
|
||||
- "traefik.http.routers.prometheus.rule=Host(`prometheus.localhost`)"
|
||||
|
||||
- "traefik.http.routers.prometheus.middlewares=compress-prometheus"
|
||||
- "traefik.http.middlewares.compress-prometheus.compress=true"
|
||||
- "traefik.http.middlewares.compress-prometheus.compress.encodings=zstd, br, gzip"
|
||||
|
||||
- "traefik.http.routers.prometheus.service=prometheus"
|
||||
- "traefik.http.services.prometheus.loadbalancer.server.port=9090"
|
||||
networks:
|
||||
- observability-network
|
||||
- grafana-frontend-network
|
||||
- proxy-swarm-network
|
||||
|
||||
loki:
|
||||
image: ghcr.io/hyntaria/observability/loki:v0.1
|
||||
volumes:
|
||||
- loki-storage:/loki
|
||||
deploy:
|
||||
mode: replicated
|
||||
replicas: 1
|
||||
restart_policy:
|
||||
condition: any
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == manager
|
||||
networks:
|
||||
- observability-network
|
||||
- grafana-frontend-network
|
||||
|
||||
cadvisor:
|
||||
image: gcr.io/cadvisor/cadvisor:latest
|
||||
volumes:
|
||||
- /:/rootfs:ro
|
||||
- /run:/run:ro
|
||||
- /sys:/sys:ro
|
||||
- /var/lib/docker/:/var/lib/docker:ro
|
||||
- /dev/disk/:/dev/disk:ro
|
||||
deploy:
|
||||
mode: global
|
||||
endpoint_mode: dnsrr
|
||||
restart_policy:
|
||||
condition: any
|
||||
networks:
|
||||
- observability-network
|
||||
|
||||
node_exporter:
|
||||
image: quay.io/prometheus/node-exporter:latest
|
||||
hostname: "{{.Node.Hostname}}"
|
||||
command:
|
||||
- "--path.rootfs=/host"
|
||||
volumes:
|
||||
- "/:/host:ro,rslave"
|
||||
deploy:
|
||||
mode: global
|
||||
endpoint_mode: dnsrr
|
||||
restart_policy:
|
||||
condition: any
|
||||
networks:
|
||||
- observability-network
|
||||
|
||||
alloy:
|
||||
image: ghcr.io/hyntaria/observability/alloy:v0.1
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
deploy:
|
||||
mode: global
|
||||
endpoint_mode: dnsrr
|
||||
restart_policy:
|
||||
condition: any
|
||||
networks:
|
||||
- observability-network
|
||||
18
scripts/bind-telemetry.sh
Executable file
18
scripts/bind-telemetry.sh
Executable file
@@ -0,0 +1,18 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Local port on your machine
|
||||
LOCAL_PORT=3030
|
||||
|
||||
# Remote port on the server
|
||||
REMOTE_PORT=3030
|
||||
|
||||
# SSH host alias defined in ~/.ssh/config
|
||||
SSH_ALIAS="hyntaria"
|
||||
|
||||
echo "🔐 Establishing SSH tunnel: localhost:${LOCAL_PORT} → ${SSH_ALIAS}:localhost:${REMOTE_PORT}"
|
||||
echo "🌐 Access the telemetry dashboard at http://grafana.localhost:${LOCAL_PORT}"
|
||||
echo "Press Ctrl+C to close the tunnel."
|
||||
|
||||
ssh -N -L ${LOCAL_PORT}:127.0.0.1:${REMOTE_PORT} ${SSH_ALIAS}
|
||||
|
||||
|
||||
104
traefik/compose.yaml
Normal file
104
traefik/compose.yaml
Normal file
@@ -0,0 +1,104 @@
|
||||
networks:
|
||||
|
||||
# External network created outside of this compose file with:
|
||||
# docker network create proxy-swarm-network --attachable --driver overlay --opt encrypted
|
||||
proxy-swarm-network:
|
||||
external: true
|
||||
|
||||
# External network created outside of this compose file with:
|
||||
# docker network create proxy-docker-network
|
||||
proxy-docker-network:
|
||||
external: true
|
||||
|
||||
volumes:
|
||||
traefik-acme: {}
|
||||
|
||||
services:
|
||||
traefik:
|
||||
image: traefik:v3.6.7
|
||||
container_name: traefik
|
||||
restart: unless-stopped
|
||||
command:
|
||||
- "--global.checknewversion=true"
|
||||
- "--global.sendanonymoususage=true"
|
||||
|
||||
# Log configuration
|
||||
- "--accesslog=true" # Enable Access Logs
|
||||
- "--log.level=INFO" # Set the Log Level e.g INFO, DEBUG
|
||||
# - "--log.format=json"
|
||||
|
||||
# Prometheus Metrics configuration
|
||||
- "--metrics.prometheus=true"
|
||||
- "--metrics.prometheus.entrypoint=metrics"
|
||||
- "--metrics.prometheus.addrouterslabels=true"
|
||||
- "--metrics.prometheus.addserviceslabels=true"
|
||||
|
||||
# Dashboard and API configuration
|
||||
- "--api.dashboard=true"
|
||||
- "--api.insecure=false"
|
||||
- "--api.basepath=/"
|
||||
|
||||
# Docker Provider configuration
|
||||
- "--providers.docker.endpoint=unix:///var/run/docker.sock"
|
||||
- "--providers.docker.watch=true"
|
||||
- "--providers.docker.exposedbydefault=false"
|
||||
- "--providers.docker.network=proxy-docker-network"
|
||||
|
||||
# Swarm Provider configuration
|
||||
- "--providers.swarm.endpoint=unix:///var/run/docker.sock"
|
||||
- "--providers.swarm.watch=true"
|
||||
- "--providers.swarm.exposedbydefault=false"
|
||||
- "--providers.swarm.network=proxy-swarm-network"
|
||||
|
||||
# EntryPoints configuration
|
||||
- "--entrypoints.web.address=:80"
|
||||
- "--entrypoints.web.http.redirections.entrypoint.to=websecure"
|
||||
- "--entrypoints.web.http.redirections.entrypoint.scheme=https"
|
||||
- "--entrypoints.web.http.redirections.entrypoint.permanent=true"
|
||||
|
||||
- "--entrypoints.websecure.address=:443"
|
||||
- "--entrypoints.websecure.http.tls=true"
|
||||
|
||||
- "--entrypoints.admin.address=:3030"
|
||||
- "--entrypoints.metrics.address=:9090"
|
||||
|
||||
# Certificates Resolver configuration
|
||||
- "--certificatesresolvers.default-resolver.acme.email=mart1.guillemot@gmail.com"
|
||||
- "--certificatesresolvers.default-resolver.acme.storage=/etc/traefik/acme/default-resolver-acme.json"
|
||||
- "--certificatesresolvers.default-resolver.acme.tlschallenge=true"
|
||||
|
||||
- "--certificatesresolvers.default-resolver.acme.caserver=https://acme-v02.api.letsencrypt.org/directory"
|
||||
# - "--certificatesresolvers.default-resolver.acme.caserver=https://acme-staging-v02.api.letsencrypt.org/directory"
|
||||
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.traefik.entrypoints=admin"
|
||||
- "traefik.http.routers.traefik.rule=Host(`traefik.localhost`)"
|
||||
- "traefik.http.routers.traefik.service=api@internal"
|
||||
|
||||
# Add a compression middleware which can be used by other services
|
||||
- "traefik.http.routers.traefik.middlewares=compress-all"
|
||||
- "traefik.http.middlewares.compress-all.compress=true"
|
||||
- "traefik.http.middlewares.compress-all.compress.encodings=zstd, br, gzip"
|
||||
|
||||
# environment:
|
||||
# - CLOUDFLARE_DNS_API_TOKEN=${CLOUDFLARE_DNS_API_TOKEN}
|
||||
|
||||
# - OVH_ENDPOINT=ovh-eu
|
||||
# - OVH_APPLICATION_KEY=${OVH_APPLICATION_KEY}
|
||||
# - OVH_APPLICATION_SECRET=${OVH_APPLICATION_SECRET}
|
||||
# - OVH_CONSUMER_KEY=${OVH_CONSUMER_KEY}
|
||||
|
||||
ports:
|
||||
- "80:80"
|
||||
- "443:443"
|
||||
- "127.0.0.1:3030:3030"
|
||||
networks:
|
||||
- proxy-docker-network
|
||||
- proxy-swarm-network
|
||||
|
||||
volumes:
|
||||
# So that Traefik can listen to the Docker events
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
# So that Traefik can access the acme.json file
|
||||
- traefik-acme:/etc/traefik/acme/:rw
|
||||
Reference in New Issue
Block a user