Skip to content
Merged
46 changes: 43 additions & 3 deletions plugins/inputs/system/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,14 @@ plugin ordering. See [CONFIGURATION.md][CONFIGURATION.md] for more details.
## legacy_cpus - legacy layout of CPU counts; see README for details
## uptime - system uptime
## legacy_uptime - legacy layout of system uptime; see README for details
## os - operating system release and uname information
# include = ["load", "users", "legacy_cpus", "legacy_uptime"]

## How long to cache the result of the "os" group between gathers.
## Set higher to reduce the number of os-release/uname reads, lower to
## surface distro upgrades and kexec'd kernels faster. Set to zero to
## re-read the data on every gather.
# os_cache_ttl = "8h"
```

> [!NOTE]
Expand All @@ -52,12 +59,21 @@ The `n_unique_users` shows the count of unique usernames logged in. This way if
a user has multiple sessions open/started they would only get counted once. The
same requirements for `n_users` apply.

The `os` group reads `/etc/os-release` on Linux (typically world-readable) and
calls the `uname` syscall on POSIX systems. On platforms where gopsutil cannot
provide a particular value (e.g. parts of FreeBSD/OpenBSD/Solaris) the
corresponding field is left empty; if no field can be gathered, the
`system_os` metric is skipped entirely. Results are cached between gathers,
Comment thread
bilkoua marked this conversation as resolved.
Outdated
see `os_cache_ttl` above.

## Metrics

### `system`
The `include` option controls which measurements and fields are gathered.
The `load`, `users`, `cpus` / `legacy_cpus` and `uptime` / `legacy_uptime`
groups populate the `system` measurement, while the `os` group emits a
separate `system_os` measurement.

All fields below belong to the `system` measurement. The `include` option
controls which groups are gathered.
### `system`

| Field | Include option | Type | Description |
|-------------------|----------------------------|---------|---------------------------------------------|
Expand All @@ -73,6 +89,22 @@ controls which groups are gathered.
| `uptime` | `legacy_uptime` | integer | System uptime in seconds (separate counter) |
| `uptime_format` | `legacy_uptime` | string | Human-readable uptime (deprecated) |

### `system_os`

Emitted only when `os` is included. The values reflect operating system
release information together with `uname`-style kernel data. Fields are
reported as strings; on platforms where a particular value cannot be
determined the corresponding field is empty.

| Field | Type | Description |
|--------------------|--------|----------------------------------------------------------------------|
| `os` | string | Operating system family as reported by Go's runtime (e.g. `linux`) |
| `arch` | string | Architecture as reported by Go's runtime (e.g. `amd64`) |
| `platform` | string | OS distribution / platform identifier (e.g. `ubuntu`, `centos`) |
| `platform_family` | string | Platform family (e.g. `debian`, `rhel`) |
| `platform_version` | string | Platform / distribution version (e.g. `26.04`) |
| `kernel_version` | string | Kernel release as returned by `uname -r` (e.g. `7.0.0-7-generic`) |

## Example Output

### Default configuration
Expand All @@ -94,3 +126,11 @@ in a single metric with the new field names:
```text
system,host=worker-01 load1=3.72,load5=2.4,load15=2.1,n_users=3i,n_unique_users=2i,n_virtual_cpus=4i,n_physical_cpus=2i,uptime=1249632i 1748000000000000000
```

### OS information

With `include = ["os"]`, a separate `system_os` measurement is emitted:

```text
system_os,host=worker-01 os="linux",arch="amd64",platform="ubuntu",platform_family="debian",platform_version="26.04",kernel_version="7.0.0-7-generic" 1748000000000000000
```
7 changes: 7 additions & 0 deletions plugins/inputs/system/sample.conf
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,11 @@
## legacy_cpus - legacy layout of CPU counts; see README for details
## uptime - system uptime
## legacy_uptime - legacy layout of system uptime; see README for details
## os - operating system release and uname information
# include = ["load", "users", "legacy_cpus", "legacy_uptime"]

## How long to cache the result of the "os" group between gathers.
## Set higher to reduce the number of os-release/uname reads, lower to
## surface distro upgrades and kexec'd kernels faster. Set to zero to
## re-read the data on every gather.
# os_cache_ttl = "8h"
54 changes: 50 additions & 4 deletions plugins/inputs/system/system.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"errors"
"fmt"
"os"
"runtime"
"strings"
"time"

Expand All @@ -24,8 +25,12 @@ import (
var sampleConfig string

type System struct {
Include []string `toml:"include"`
Log telegraf.Logger `toml:"-"`
Include []string `toml:"include"`
OSCacheTTL config.Duration `toml:"os_cache_ttl"`
Log telegraf.Logger `toml:"-"`

osFields map[string]interface{}
Comment thread
bilkoua marked this conversation as resolved.
Outdated
osCachedAt time.Time
}

func (*System) SampleConfig() string {
Expand All @@ -46,7 +51,7 @@ func (s *System) Init() error {
continue
}
switch incl {
case "load", "users", "cpus", "uptime":
case "load", "users", "cpus", "uptime", "os":
case "legacy_cpus":
if userSupplied {
config.PrintOptionValueDeprecationNotice(
Expand Down Expand Up @@ -97,6 +102,19 @@ func (s *System) Gather(acc telegraf.Accumulator) error {

for _, incl := range s.Include {
switch incl {
case "os":
if time.Since(s.osCachedAt) > time.Duration(s.OSCacheTTL) {
osFields, err := gatherOS()
if err != nil {
acc.AddError(err)
} else {
s.osFields = osFields
s.osCachedAt = now
}
}
if len(s.osFields) > 0 {
acc.AddFields("system_os", s.osFields, nil, now)
}
case "load":
loadavg, err := load.Avg()
if err != nil {
Expand Down Expand Up @@ -166,6 +184,32 @@ func (s *System) Gather(acc telegraf.Accumulator) error {
return nil
}

// gatherOS reads OS release and uname information via gopsutil, skipping
// host.Info() to avoid the unrelated virtualization, boot-time and
// process-count probes.
func gatherOS() (map[string]interface{}, error) {
platform, family, version, err := host.PlatformInformation()
if err != nil && !strings.Contains(err.Error(), "not implemented") {
return nil, fmt.Errorf("reading platform information: %w", err)
}
kernelVersion, err := host.KernelVersion()
if err != nil && !strings.Contains(err.Error(), "not implemented") {
return nil, fmt.Errorf("reading kernel version: %w", err)
}

if platform == "" && family == "" && version == "" && kernelVersion == "" {
return nil, nil
}
return map[string]interface{}{
"os": runtime.GOOS,
"arch": runtime.GOARCH,
"platform": platform,
"platform_family": family,
"platform_version": version,
"kernel_version": kernelVersion,
}, nil
Comment thread
bilkoua marked this conversation as resolved.
Outdated
}

func findUniqueUsers(userStats []host.UserStat) int {
uniqueUsers := make(map[string]bool)
for _, userstat := range userStats {
Expand Down Expand Up @@ -201,6 +245,8 @@ func formatUptime(uptime uint64) string {

func init() {
inputs.Add("system", func() telegraf.Input {
return &System{}
return &System{
OSCacheTTL: config.Duration(8 * time.Hour),
}
})
}
51 changes: 51 additions & 0 deletions plugins/inputs/system/system_os_linux_test.go
Comment thread
bilkoua marked this conversation as resolved.
Outdated
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
//go:build linux

package system

import (
"path/filepath"
"testing"
"time"

"github.com/stretchr/testify/require"

"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/config"
"github.com/influxdata/telegraf/metric"
"github.com/influxdata/telegraf/testutil"
)

func TestGatherOSValuesLinux(t *testing.T) {
etcDir, err := filepath.Abs(filepath.Join("testdata", "os-release"))
require.NoError(t, err)
t.Setenv("HOST_ETC", etcDir)

s := &System{
Include: []string{"os"},
OSCacheTTL: config.Duration(8 * time.Hour),
Log: &testutil.Logger{},
}
require.NoError(t, s.Init())

var acc testutil.Accumulator
require.NoError(t, s.Gather(&acc))

expected := []telegraf.Metric{
metric.New(
"system_os",
map[string]string{},
map[string]interface{}{
"os": "linux",
"arch": "amd64",
"platform": "telegraftest",
"platform_family": "",
"platform_version": "1.0",
"kernel_version": "",
},
time.Unix(0, 0),
telegraf.Untyped,
),
}

testutil.RequireMetricsStructureEqual(t, expected, acc.GetTelegrafMetrics(), testutil.IgnoreTime())
}
4 changes: 2 additions & 2 deletions plugins/inputs/system/system_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ func TestInitAllValidOptions(t *testing.T) {
name string
include []string
}{
{"new", []string{"load", "users", "cpus", "uptime"}},
{"legacy", []string{"load", "users", "legacy_cpus", "legacy_uptime"}},
{"new", []string{"load", "users", "cpus", "uptime", "os"}},
{"legacy", []string{"load", "users", "legacy_cpus", "legacy_uptime", "os"}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down
4 changes: 4 additions & 0 deletions plugins/inputs/system/testdata/os-release/os-release
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
NAME="Telegraf Test OS"
ID=telegraftest
VERSION_ID="1.0"
PRETTY_NAME="Telegraf Test OS 1.0"
Loading