master
greglebreton 2 years ago
commit ad12256bdc
  1. 51
      README.md
  2. 42
      docker-compose-server.yml
  3. 11
      docker-compose-telegraf-agent.yml
  4. 373
      grafana/dashboards/performance.json
  5. 11
      grafana/provisioning/dashboards/all.yml
  6. 46
      grafana/provisioning/datasources/influxdb.yml
  7. 212
      telegraf/telegraf.conf

@ -0,0 +1,51 @@
# Telegraf-influxDB-grafana
![LOGO](docs/logo.png)
## Config
- Grafana: grafana/provisioning/datasources/influxdb.yml
- InfluxDB:
### Production use
![PUSE](docs/production.png)
## Usage
- start stack
```bash
docker-compose up -d
```
- see logs
```bash
docker-compose logs -f
```
## Dashboards
- AMD64:
- ARM64:
---------------------
## Services and Ports
### Grafana
- URL: http://localhost:3000
- User: admin
- Password: admin
### Telegraf
- Port: 8125 UDP (StatsD input)
### InfluxDB
- Port: 8086 (HTTP API)
- User: admin
- Password: admin
- Database: influx

@ -0,0 +1,42 @@
version: '3.6'
services:
influxdb:
image: influxdb:1.8-alpine
env_file: .env
ports:
- '8086:8086'
volumes:
- influxdb_data:/var/lib/influxdb
- ./influxdb/imports:/imports
grafana:
image: grafana/grafana:8.0.2
depends_on:
- influxdb
env_file: .env
links:
- influxdb
ports:
- '3000:3000'
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/provisioning/:/etc/grafana/provisioning/
- ./grafana/dashboards/:/var/lib/grafana/dashboards/
telegraf:
image: telegraf:1.18-alpine
# image: telegraf:latest #(for amd64)
volumes:
- ./telegraf/telegraf.conf:/etc/telegraf/telegraf.conf:ro
depends_on:
- influxdb
links:
- influxdb
ports:
- '8125:8125/udp'
volumes:
grafana_data: {}
influxdb_data: {}

@ -0,0 +1,11 @@
version: "3.6"
services:
telegraf:
image: telegraf:1.18-alpine
# image: telegraf:latest #(for amd64)
volumes:
- ./telegraf/telegraf.conf:/etc/telegraf/telegraf.conf:ro
ports:
- '8125:8125/udp'

@ -0,0 +1,373 @@
{
"annotations": {
"list": [
{
"$$hashKey": "object:7",
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"links": [],
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "InfluxDB",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 0
},
"hiddenSeries": false,
"id": 4,
"interval": "",
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"alias": "$tag_type",
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"type"
],
"type": "tag"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "performance_request_successful_time",
"orderByTime": "ASC",
"policy": "default",
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"90_percentile"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Request Time",
"tooltip": {
"shared": true,
"sort": 1,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:237",
"decimals": null,
"format": "ms",
"label": "",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"$$hashKey": "object:238",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "InfluxDB",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 11,
"w": 24,
"x": 0,
"y": 9
},
"hiddenSeries": false,
"id": 2,
"legend": {
"alignAsTable": true,
"avg": true,
"current": false,
"hideEmpty": false,
"hideZero": false,
"max": true,
"min": true,
"rightSide": false,
"show": true,
"total": true,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"alias": "Request Type: $tag_type",
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"type"
],
"type": "tag"
},
{
"params": [
"none"
],
"type": "fill"
}
],
"hide": false,
"measurement": "performance_request_successful_count",
"orderByTime": "ASC",
"policy": "default",
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "sum"
},
{
"params": [
" / $__interval_ms*1000"
],
"type": "math"
}
]
],
"tags": []
},
{
"alias": "All Types",
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
}
],
"hide": false,
"measurement": "performance_request_successful_count",
"orderByTime": "ASC",
"policy": "default",
"refId": "B",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "sum"
},
{
"params": [
" / $__interval_ms*1000"
],
"type": "math"
}
]
],
"tags": []
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Requests per Second",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:126",
"format": "reqps",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"$$hashKey": "object:127",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"refresh": "5s",
"schemaVersion": 22,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-5m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "Performance",
"uid": "1Mar-DTiz",
"variables": {
"list": []
},
"version": 1
}

@ -0,0 +1,11 @@
apiVersion: 1
providers:
- name: 'default'
orgId: 1
folder: ''
type: file
disableDeletion: false
updateIntervalSeconds: 3 #how often Grafana will scan for changed dashboards
options:
path: /var/lib/grafana/dashboards

@ -0,0 +1,46 @@
# config file version
apiVersion: 1
# list of datasources that should be deleted from the database
deleteDatasources:
- name: Influxdb
orgId: 1
# list of datasources to insert/update depending
# whats available in the database
datasources:
# <string, required> name of the datasource. Required
- name: InfluxDB
# <string, required> datasource type. Required
type: influxdb
# <string, required> access mode. direct or proxy. Required
access: proxy
# <int> org id. will default to orgId 1 if not specified
orgId: 1
# <string> url
url: http://influxdb:8086
# <string> database password, if used
password: "admin"
# <string> database user, if used
user: "admin"
# <string> database name, if used
database: "influx"
# <bool> enable/disable basic auth
basicAuth: false
# withCredentials:
# <bool> mark as default datasource. Max one per org
isDefault: true
# <map> fields that will be converted to json and stored in json_data
jsonData:
timeInterval: "5s"
# graphiteVersion: "1.1"
# tlsAuth: false
# tlsAuthWithCACert: false
# # <string> json object of data that will be encrypted.
# secureJsonData:
# tlsCACert: "..."
# tlsClientCert: "..."
# tlsClientKey: "..."
version: 1
# <bool> allow users to edit datasources from the UI.
editable: false

@ -0,0 +1,212 @@
# Telegraf configuration
# Telegraf is entirely plugin driven. All metrics are gathered from the
# declared inputs, and sent to the declared outputs.
# Plugins must be declared in here to be active.
# To deactivate a plugin, comment out the name and any variables.
# Use 'telegraf -config telegraf.conf -test' to see what metrics a config
# file would generate.
# Global tags can be specified here in key="value" format.
[global_tags]
# dc = "us-east-1" # will tag all metrics with dc=us-east-1
# rack = "1a"
# Configuration for telegraf agent
[agent]
## Default data collection interval for all inputs
interval = "5s"
## Rounds collection interval to 'interval'
## ie, if interval="10s" then always collect on :00, :10, :20, etc.
round_interval = true
## Telegraf will cache metric_buffer_limit metrics for each output, and will
## flush this buffer on a successful write.
metric_buffer_limit = 10000
## Flush the buffer whenever full, regardless of flush_interval.
flush_buffer_when_full = true
## Collection jitter is used to jitter the collection by a random amount.
## Each plugin will sleep for a random time within jitter before collecting.
## This can be used to avoid many plugins querying things like sysfs at the
## same time, which can have a measurable effect on the system.
collection_jitter = "0s"
## Default flushing interval for all outputs. You shouldn't set this below
## interval. Maximum flush_interval will be flush_interval + flush_jitter
flush_interval = "1s"
## Jitter the flush interval by a random amount. This is primarily to avoid
## large write spikes for users running a large number of telegraf instances.
## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
flush_jitter = "0s"
## Run telegraf in debug mode
debug = false
## Run telegraf in quiet mode
quiet = false
## Override default hostname, if empty use os.Hostname()
hostname = "lenovo"
###############################################################################
# OUTPUTS #
###############################################################################
# Configuration for influxdb server to send metrics to
[[outputs.influxdb]]
# The full HTTP or UDP endpoint URL for your InfluxDB instance.
# Multiple urls can be specified but it is assumed that they are part of the same
# cluster, this means that only ONE of the urls will be written to each interval.
# urls = ["udp://localhost:8089"] # UDP endpoint example
urls = ["http://influxdb:8086"] # required
# The target database for metrics (telegraf will create it if not exists)
database = "influx" # required
# Precision of writes, valid values are "ns", "us" (or "µs"), "ms", "s", "m", "h".
# note: using second precision greatly helps InfluxDB compression
precision = "s"
## Write timeout (for the InfluxDB client), formatted as a string.
## If not provided, will default to 5s. 0s means no timeout (not recommended).
timeout = "5s"
# username = "telegraf"
# password = "metricsmetricsmetricsmetrics"
# Set the user agent for HTTP POSTs (can be useful for log differentiation)
# user_agent = "telegraf"
# Set UDP payload size, defaults to InfluxDB UDP Client default (512 bytes)
# udp_payload = 512
###############################################################################
# INPUTS #
###############################################################################
# Statsd Server
[[inputs.statsd]]
## Protocol, must be "tcp", "udp4", "udp6" or "udp" (default=udp)
protocol = "udp"
## MaxTCPConnection - applicable when protocol is set to tcp (default=250)
max_tcp_connections = 250
## Enable TCP keep alive probes (default=false)
tcp_keep_alive = false
## Specifies the keep-alive period for an active network connection.
## Only applies to TCP sockets and will be ignored if tcp_keep_alive is false.
## Defaults to the OS configuration.
# tcp_keep_alive_period = "2h"
## Address and port to host UDP listener on
service_address = ":8125"
## The following configuration options control when telegraf clears it's cache
## of previous values. If set to false, then telegraf will only clear it's
## cache when the daemon is restarted.
## Reset gauges every interval (default=true)
delete_gauges = true
## Reset counters every interval (default=true)
delete_counters = true
## Reset sets every interval (default=true)
delete_sets = true
## Reset timings & histograms every interval (default=true)
delete_timings = true
## Percentiles to calculate for timing & histogram stats
percentiles = [90]
## separator to use between elements of a statsd metric
metric_separator = "_"
## Parses tags in the datadog statsd format
## http://docs.datadoghq.com/guides/dogstatsd/
parse_data_dog_tags = false
## Statsd data translation templates, more info can be read here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#graphite
# templates = [
# "cpu.* measurement*"
# ]
## Number of UDP messages allowed to queue up, once filled,
## the statsd server will start dropping packets
allowed_pending_messages = 10000
## Number of timing/histogram values to track per-measurement in the
## calculation of percentiles. Raising this limit increases the accuracy
## of percentiles but also increases the memory usage and cpu time.
percentile_limit = 1000
## Maximum socket buffer size in bytes, once the buffer fills up, metrics
## will start dropping. Defaults to the OS default.
# read_buffer_size = 65535
# Read metrics about cpu usage
[[inputs.cpu]]
## Whether to report per-cpu stats or not
percpu = true
## Whether to report total system cpu stats or not
totalcpu = true
## Comment this line if you want the raw CPU time metrics
fielddrop = ["time_*"]
# Read metrics about disk usage by mount point
[[inputs.disk]]
## By default, telegraf gather stats for all mountpoints.
## Setting mountpoints will restrict the stats to the specified mountpoints.
# mount_points = ["/"]
## Ignore some mountpoints by filesystem type. For example (dev)tmpfs (usually
## present on /run, /var/run, /dev/shm or /dev).
ignore_fs = ["tmpfs", "devtmpfs"]
# Read metrics about disk IO by device
[[inputs.diskio]]
## By default, telegraf will gather stats for all devices including
## disk partitions.
## Setting devices will restrict the stats to the specified devices.
# devices = ["sda", "sdb"]
## Uncomment the following line if you need disk serial numbers.
# skip_serial_number = false
# Get kernel statistics from /proc/stat
[[inputs.kernel]]
# no configuration
# Read metrics about memory usage
[[inputs.mem]]
# no configuration
# Get the number of processes and group them by status
[[inputs.processes]]
# no configuration
# Read metrics about swap memory usage
[[inputs.swap]]
# no configuration
# Read metrics about system load & uptime
[[inputs.system]]
# no configuration
# Read metrics about network interface usage
[[inputs.net]]
# collect data only about specific interfaces
# interfaces = ["eth0"]
[[inputs.netstat]]
# no configuration
[[inputs.interrupts]]
# no configuration
[[inputs.linux_sysctl_fs]]
# no configuration
Loading…
Cancel
Save