Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 68 additions & 15 deletions plugins/inputs/system/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@ plugin ordering. See [CONFIGURATION.md][CONFIGURATION.md] for more details.
```toml @sample.conf
# Read metrics about system load & uptime
[[inputs.system]]
# no configuration
## Metric groups to collect.
## Available options:
## load - system gauge metrics (load averages, cpu counts, user counts)
## uptime - system uptime
## By default all groups are collected.
# collect = ["load", "uptime"]
```

### Permissions
Expand All @@ -35,22 +40,70 @@ same requirements for `n_users` apply.

## Metrics

- system
- fields:
- load1 (float)
- load15 (float)
- load5 (float)
- n_users (integer)
- n_unique_users (integer)
- n_cpus (integer)
- n_physical_cpus (integer)
- uptime (integer, seconds)
- uptime_format (string, deprecated in 1.10, use `uptime` field)
### `system`

All fields below belong to the `system` measurement. The `collect` option
controls which groups are gathered.

| Field | Group | Type | Description |
|-------------------|----------|---------|------------------------------------------------|
| `load1` | `load` | float | 1-minute load average |
| `load5` | `load` | float | 5-minute load average |
| `load15` | `load` | float | 15-minute load average |
| `n_users` | `load` | integer | Number of logged-in user sessions |
| `n_unique_users` | `load` | integer | Number of unique logged-in usernames |
| `n_cpus` | `load` | integer | Number of logical CPUs |
| `n_physical_cpus` | `load` | integer | Number of physical CPUs |
| `uptime` | `uptime` | integer | System uptime in seconds |
| `uptime_format` | `uptime` | string | Human-readable uptime (deprecated, use uptime) |

## Example Output

```text
system,host=tyrion load1=3.72,load5=2.4,load15=2.1,n_users=3i,n_cpus=4i,n_physical_cpus=2i 1483964144000000000
system,host=tyrion uptime=1249632i 1483964144000000000
system,host=tyrion uptime_format="14 days, 11:07" 1483964144000000000
system,host=worker-01 load1=3.72,load5=2.4,load15=2.1,n_users=3i,n_unique_users=2i,n_cpus=4i,n_physical_cpus=2i 1748000000000000000
system,host=worker-01 uptime=1249632i 1748000000000000000
system,host=worker-01 uptime_format="14 days, 11:07" 1748000000000000000
```

## Example Output (Prometheus)
Comment thread
bilkoua marked this conversation as resolved.
Outdated

When using the [Prometheus output plugin][prom-output] or
[Prometheus client plugin][prom-client], Telegraf converts each field into
its own Prometheus metric by appending the field name to the measurement name.

[prom-output]: ../../../plugins/outputs/prometheus_client/README.md
[prom-client]: ../../../plugins/outputs/prometheus_client/README.md

```text
# HELP system_load1 Telegraf collected metric
# TYPE system_load1 gauge
system_load1{host="worker-01"} 3.72

# HELP system_load15 Telegraf collected metric
# TYPE system_load15 gauge
system_load15{host="worker-01"} 2.1

# HELP system_load5 Telegraf collected metric
# TYPE system_load5 gauge
system_load5{host="worker-01"} 2.4

# HELP system_n_cpus Telegraf collected metric
# TYPE system_n_cpus gauge
system_n_cpus{host="worker-01"} 4

# HELP system_n_physical_cpus Telegraf collected metric
# TYPE system_n_physical_cpus gauge
system_n_physical_cpus{host="worker-01"} 2

# HELP system_n_unique_users Telegraf collected metric
# TYPE system_n_unique_users gauge
system_n_unique_users{host="worker-01"} 2

# HELP system_n_users Telegraf collected metric
# TYPE system_n_users gauge
system_n_users{host="worker-01"} 3

# HELP system_uptime Telegraf collected metric
# TYPE system_uptime counter
system_uptime{host="worker-01"} 1249632
```
7 changes: 6 additions & 1 deletion plugins/inputs/system/sample.conf
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# Read metrics about system load & uptime
[[inputs.system]]
# no configuration
## Metric groups to collect.
## Available options:
## load - system gauge metrics (load averages, cpu counts, user counts)
## uptime - system uptime
## By default all groups are collected.
# collect = ["load", "uptime"]
107 changes: 66 additions & 41 deletions plugins/inputs/system/system.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,68 +15,95 @@ import (
"github.com/shirou/gopsutil/v4/load"

"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal/choice"
"github.com/influxdata/telegraf/plugins/inputs"
)

//go:embed sample.conf
var sampleConfig string

var availableCollectors = []string{"load", "uptime"}

type System struct {
Collect []string `toml:"collect"`

Log telegraf.Logger `toml:"-"`

collectLoad bool
collectUptime bool
}

func (*System) SampleConfig() string {
return sampleConfig
}

func (s *System) Gather(acc telegraf.Accumulator) error {
loadavg, err := load.Avg()
if err != nil && !strings.Contains(err.Error(), "not implemented") {
return err
func (s *System) Init() error {
if len(s.Collect) == 0 {
s.Collect = availableCollectors
}

numLogicalCPUs, err := cpu.Counts(true)
if err != nil {
return err
if err := choice.CheckSlice(s.Collect, availableCollectors); err != nil {
return fmt.Errorf("config option 'collect': %w", err)
}

numPhysicalCPUs, err := cpu.Counts(false)
if err != nil {
return err
}
s.collectLoad = choice.Contains("load", s.Collect)
s.collectUptime = choice.Contains("uptime", s.Collect)

fields := map[string]interface{}{
"load1": loadavg.Load1,
"load5": loadavg.Load5,
"load15": loadavg.Load15,
"n_cpus": numLogicalCPUs,
"n_physical_cpus": numPhysicalCPUs,
}

users, err := host.Users()
if err == nil {
fields["n_users"] = len(users)
fields["n_unique_users"] = findUniqueUsers(users)
} else if os.IsNotExist(err) {
s.Log.Debugf("Reading users: %s", err.Error())
} else if os.IsPermission(err) {
s.Log.Debug(err.Error())
}
return nil
}

func (s *System) Gather(acc telegraf.Accumulator) error {
now := time.Now()
acc.AddGauge("system", fields, nil, now)

uptime, err := host.Uptime()
if err != nil {
return err
if s.collectLoad {
fields := make(map[string]interface{})

loadavg, err := load.Avg()
if err != nil {
if !strings.Contains(err.Error(), "not implemented") {
return err
}
} else {
fields["load1"] = loadavg.Load1
fields["load5"] = loadavg.Load5
fields["load15"] = loadavg.Load15
}

numLogicalCPUs, err := cpu.Counts(true)
if err != nil {
return err
}
numPhysicalCPUs, err := cpu.Counts(false)
if err != nil {
return err
}
fields["n_cpus"] = numLogicalCPUs
fields["n_physical_cpus"] = numPhysicalCPUs
Comment thread
srebhan marked this conversation as resolved.
Outdated

users, err := host.Users()
if err == nil {
fields["n_users"] = len(users)
fields["n_unique_users"] = findUniqueUsers(users)
} else if os.IsNotExist(err) {
s.Log.Debugf("Reading users: %s", err.Error())
} else if os.IsPermission(err) {
s.Log.Debug(err.Error())
}
Comment thread
bilkoua marked this conversation as resolved.
Outdated

acc.AddGauge("system", fields, nil, now)
}

acc.AddCounter("system", map[string]interface{}{
"uptime": uptime,
}, nil, now)
acc.AddFields("system", map[string]interface{}{
"uptime_format": formatUptime(uptime),
}, nil, now)
if s.collectUptime {
uptime, err := host.Uptime()
if err != nil {
return err
}
acc.AddCounter("system", map[string]interface{}{
"uptime": uptime,
}, nil, now)
acc.AddFields("system", map[string]interface{}{
"uptime_format": formatUptime(uptime),
}, nil, now)
}

return nil
}
Expand All @@ -88,7 +115,6 @@ func findUniqueUsers(userStats []host.UserStat) int {
uniqueUsers[userstat.User] = true
}
}

return len(uniqueUsers)
}

Expand All @@ -97,7 +123,6 @@ func formatUptime(uptime uint64) string {
w := bufio.NewWriter(buf)

days := uptime / (60 * 60 * 24)

if days != 0 {
s := ""
if days > 1 {
Expand Down
Loading