Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/happy-books-boil.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"chainlink": minor
---

Add Telemetry.PrometheusBridge to TOML config to support forwarding Prometheus metrics through Open Telemetry
10 changes: 10 additions & 0 deletions core/cmd/shell.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/urfave/cli"
prombridge "go.opentelemetry.io/contrib/bridges/prometheus"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
Expand All @@ -38,6 +39,7 @@ import (
"github.com/smartcontractkit/chainlink-common/pkg/beholder"
clhttp "github.com/smartcontractkit/chainlink-common/pkg/http"
"github.com/smartcontractkit/chainlink-common/pkg/loop"
"github.com/smartcontractkit/chainlink-common/pkg/promutil"
"github.com/smartcontractkit/chainlink-common/pkg/sqlutil"
"github.com/smartcontractkit/chainlink-data-streams/llo/retirement"
"github.com/smartcontractkit/chainlink-data-streams/mercury/wsrpc"
Expand Down Expand Up @@ -176,6 +178,14 @@ func newBeholderClient(
return nil, err
}
}
if pmCfg := cfgTelemetry.PrometheusBridge(); pmCfg.Enabled() {
var bridgeOpts []prombridge.Option
if prefixes := pmCfg.Prefixes(); len(prefixes) > 0 {
bridgeOpts = append(bridgeOpts, prombridge.WithGatherer(promutil.NewPrefixGatherer(prometheus.DefaultGatherer, prefixes)))
}
clientCfg.MetricProducers = append(clientCfg.MetricProducers, prombridge.NewMetricProducer(bridgeOpts...))
}

beholderClient, err := beholder.NewClient(clientCfg)
if err != nil {
return nil, err
Expand Down
10 changes: 9 additions & 1 deletion core/config/docs/core.toml
Original file line number Diff line number Diff line change
Expand Up @@ -879,7 +879,6 @@ ChipIngressInsecureConnection = false # Default
ChipIngressBatchEmitterEnabled = true # Default
# DurableEmitterEnabled enables persisting outbound CHIP events to Postgres for at-least-once delivery.
DurableEmitterEnabled = false # Default

# HeartbeatInterval is the interval at which a the application heartbeat is sent to telemetry backends.
HeartbeatInterval = '1s' # Default
# LogLevel sets the log level for telemetry streaming (debug, info, warn, error, crit, panic, fatal)
Expand All @@ -902,6 +901,15 @@ LogMaxQueueSize = 2048 # Default
# foo is an example resource attribute
foo = "bar" # Example

# The Prometheus bridge automatically forwards metrics through open telemetry.
[Telemetry.PrometheusBridge]
# **ADVANCED**
# Enabled enables the Promtheus bridge.
Enabled = false # Default
# Prefixes is a set of filters to restrict which prometheus metrics are forwarded based on prefix matching.
# By default, we only forward the go runtime metrics. Empty means forward everything.
Prefixes = ["go_"] # Default

[CRE.Streams]
# WsURL is the websockets url for the streams sdk config
WsURL = "streams.url" # Example
Expand Down
6 changes: 6 additions & 0 deletions core/config/telemetry_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,10 @@ type Telemetry interface {
LogExportMaxBatchSize() int
LogExportInterval() time.Duration
LogMaxQueueSize() int
PrometheusBridge() PrometheusBridge
}

type PrometheusBridge interface {
Enabled() bool
Prefixes() []string
}
17 changes: 17 additions & 0 deletions core/config/toml/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -2873,6 +2873,8 @@ type Telemetry struct {
LogExportMaxBatchSize *int
LogExportInterval *commonconfig.Duration
LogMaxQueueSize *int

PrometheusBridge PrometheusBridge `toml:",omitempty"`
}

func (b *Telemetry) setFrom(f *Telemetry) {
Expand Down Expand Up @@ -2939,6 +2941,7 @@ func (b *Telemetry) setFrom(f *Telemetry) {
if v := f.LogMaxQueueSize; v != nil {
b.LogMaxQueueSize = v
}
b.PrometheusBridge.setFrom(&f.PrometheusBridge)
}

func (b *Telemetry) ValidateConfig() (err error) {
Expand All @@ -2960,6 +2963,20 @@ func (b *Telemetry) ValidateConfig() (err error) {
return err
}

type PrometheusBridge struct {
Enabled *bool
Prefixes []string
}

func (b *PrometheusBridge) setFrom(f *PrometheusBridge) {
if v := f.Enabled; v != nil {
b.Enabled = v
}
if v := f.Prefixes; v != nil {
b.Prefixes = v
}
}

var hostnameRegex = regexp.MustCompile(`^[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)*$`)

// Validates uri is valid external or local URI
Expand Down
16 changes: 16 additions & 0 deletions core/services/chainlink/config_telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,19 @@ func (b *telemetryConfig) LogMaxQueueSize() int {
}
return *b.s.LogMaxQueueSize
}

func (b *telemetryConfig) PrometheusBridge() config.PrometheusBridge {
return &prometheusBridgeConfig{b.s.PrometheusBridge}
}

type prometheusBridgeConfig struct {
s toml.PrometheusBridge
}

func (p *prometheusBridgeConfig) Enabled() bool {
return *p.s.Enabled
}

func (p *prometheusBridgeConfig) Prefixes() []string {
return p.s.Prefixes
}
43 changes: 24 additions & 19 deletions core/services/chainlink/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -470,9 +470,9 @@ func TestConfig_Marshal(t *testing.T) {
Enabled: ptr(false),
DiskMonitorEnabled: ptr(false),
IdleEviction: ptr(true),
IdleTimeout: commoncfg.MustNewDuration(10 * time.Minute),
MaxLoaded: ptr(200),
CacheDir: ptr(""),
IdleTimeout: commoncfg.MustNewDuration(10 * time.Minute),
MaxLoaded: ptr(200),
CacheDir: ptr(""),
},
AdditionalSourcesConfig: []toml.AdditionalWorkflowSource{
{
Expand Down Expand Up @@ -550,27 +550,32 @@ func TestConfig_Marshal(t *testing.T) {
Release: ptr("v1.2.3"),
}
full.Telemetry = toml.Telemetry{
Enabled: ptr(true),
CACertFile: ptr("cert-file"),
Endpoint: ptr("example.com/collector"),
InsecureConnection: ptr(true),
ResourceAttributes: map[string]string{"Baz": "test", "Foo": "bar"},
TraceSampleRatio: ptr(0.01),
EmitterBatchProcessor: ptr(true),
EmitterExportTimeout: commoncfg.MustNewDuration(1 * time.Second),
AuthHeadersTTL: commoncfg.MustNewDuration(0 * time.Second),
Enabled: ptr(true),
CACertFile: ptr("cert-file"),
Endpoint: ptr("example.com/collector"),
InsecureConnection: ptr(true),
ResourceAttributes: map[string]string{"Baz": "test", "Foo": "bar"},
TraceSampleRatio: ptr(0.01),
EmitterBatchProcessor: ptr(true),
EmitterExportTimeout: commoncfg.MustNewDuration(1 * time.Second),
AuthHeadersTTL: commoncfg.MustNewDuration(0 * time.Second),
ChipIngressEndpoint: ptr("example.com/chip-ingress"),
ChipIngressInsecureConnection: ptr(false),
ChipIngressBatchEmitterEnabled: ptr(true),
DurableEmitterEnabled: ptr(false),
HeartbeatInterval: commoncfg.MustNewDuration(1 * time.Second),
LogStreamingEnabled: ptr(false),
LogLevel: ptr("info"),
LogBatchProcessor: ptr(true),
LogExportTimeout: commoncfg.MustNewDuration(1 * time.Second),
LogExportMaxBatchSize: ptr[int](512),
LogExportInterval: ptrDuration(1 * time.Second),
LogMaxQueueSize: ptrInt(2048),
LogStreamingEnabled: ptr(false),
LogLevel: ptr("info"),
LogBatchProcessor: ptr(true),
LogExportTimeout: commoncfg.MustNewDuration(1 * time.Second),
LogExportMaxBatchSize: ptr[int](512),
LogExportInterval: ptrDuration(1 * time.Second),
LogMaxQueueSize: ptrInt(2048),

PrometheusBridge: toml.PrometheusBridge{
Enabled: ptr(true),
Prefixes: []string{"ocr_"},
},
}
full.CRE = toml.CreConfig{
UseLocalTimeProvider: ptr(true),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,10 @@ LogExportMaxBatchSize = 512
LogExportInterval = '1s'
LogMaxQueueSize = 2048

[Telemetry.PrometheusBridge]
Enabled = false
Prefixes = ['go_']

[Workflows]
[Workflows.Limits]
Global = 200
Expand Down
4 changes: 4 additions & 0 deletions core/services/chainlink/testdata/config-full.toml
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,10 @@ LogMaxQueueSize = 2048
Baz = 'test'
Foo = 'bar'

[Telemetry.PrometheusBridge]
Enabled = true
Prefixes = ['ocr_']

[Workflows]
[Workflows.Limits]
Global = 200
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,10 @@ LogExportMaxBatchSize = 512
LogExportInterval = '1s'
LogMaxQueueSize = 2048

[Telemetry.PrometheusBridge]
Enabled = false
Prefixes = ['go_']

[Workflows]
[Workflows.Limits]
Global = 200
Expand Down
4 changes: 4 additions & 0 deletions core/web/resolver/testdata/config-empty-effective.toml
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,10 @@ LogExportMaxBatchSize = 512
LogExportInterval = '1s'
LogMaxQueueSize = 2048

[Telemetry.PrometheusBridge]
Enabled = false
Prefixes = ['go_']

[Workflows]
[Workflows.Limits]
Global = 200
Expand Down
4 changes: 4 additions & 0 deletions core/web/resolver/testdata/config-full.toml
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,10 @@ LogMaxQueueSize = 2048
Baz = 'test'
Foo = 'bar'

[Telemetry.PrometheusBridge]
Enabled = true
Prefixes = ['ocr_']

[Workflows]
[Workflows.Limits]
Global = 200
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,10 @@ LogExportMaxBatchSize = 512
LogExportInterval = '1s'
LogMaxQueueSize = 2048

[Telemetry.PrometheusBridge]
Enabled = false
Prefixes = ['go_']

[Workflows]
[Workflows.Limits]
Global = 200
Expand Down
22 changes: 22 additions & 0 deletions docs/CONFIG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2522,6 +2522,28 @@ foo = "bar" # Example
```
foo is an example resource attribute

## Telemetry.PrometheusBridge
```toml
[Telemetry.PrometheusBridge]
Enabled = false # Default
Prefixes = ["go_"] # Default
```
The Prometheus bridge automatically forwards metrics through open telemetry.

### Enabled
:warning: **_ADVANCED_**: _Do not change this setting unless you know what you are doing._
```toml
Enabled = false # Default
```
Enabled enables the Promtheus bridge.

### Prefixes
```toml
Prefixes = ["go_"] # Default
```
Prefixes is a set of filters to restrict which prometheus metrics are forwarded based on prefix matching.
By default, we only forward the go runtime metrics. Empty means forward everything.

## CRE.Streams
```toml
[CRE.Streams]
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ require (
github.com/unrolled/secure v1.13.0
github.com/urfave/cli v1.22.14
go.dedis.ch/kyber/v3 v3.1.0
go.opentelemetry.io/contrib/bridges/prometheus v0.68.0
go.opentelemetry.io/contrib/instrumentation/github.com/gin-gonic/gin/otelgin v0.49.0
go.opentelemetry.io/otel v1.43.0
go.opentelemetry.io/otel/log v0.19.0
Expand Down Expand Up @@ -391,7 +392,6 @@ require (
go.etcd.io/bbolt v1.4.2 // indirect
go.mongodb.org/mongo-driver v1.17.9 // indirect
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
go.opentelemetry.io/contrib/bridges/prometheus v0.68.0 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.68.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.19.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.19.0 // indirect
Expand Down
2 changes: 2 additions & 0 deletions plugins/loop_registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ func (m *LoopRegistry) Register(id string) (*RegisteredLoop, error) {
envCfg.TelemetryLogExportMaxBatchSize = m.cfgTelemetry.LogExportMaxBatchSize()
envCfg.TelemetryLogExportInterval = m.cfgTelemetry.LogExportInterval()
envCfg.TelemetryLogMaxQueueSize = m.cfgTelemetry.LogMaxQueueSize()
envCfg.TelemetryPrometheusBridgeEnabled = m.cfgTelemetry.PrometheusBridge().Enabled()
envCfg.TelemetryPrometheusBridgePrefixes = m.cfgTelemetry.PrometheusBridge().Prefixes()
}
m.lggr.Debugf("Registered loopp %q with port %d", id, envCfg.PrometheusPort)

Expand Down
10 changes: 10 additions & 0 deletions plugins/loop_registry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,16 @@ func (m mockCfgTelemetry) LogExportMaxBatchSize() int { return 512 }
func (m mockCfgTelemetry) LogExportInterval() time.Duration { return 5 * time.Second }
func (m mockCfgTelemetry) LogMaxQueueSize() int { return 2048 }

func (m mockCfgTelemetry) PrometheusBridge() config.PrometheusBridge {
return mockPrometheusBridge{}
}

type mockPrometheusBridge struct{}

func (m mockPrometheusBridge) Enabled() bool { return true }

func (m mockPrometheusBridge) Prefixes() []string { return nil }

type mockCfgDatabase struct{}

func (m mockCfgDatabase) Backup() config.Backup { panic("unimplemented") }
Expand Down
4 changes: 4 additions & 0 deletions testdata/scripts/config/merge_raw_configs.txtar
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,10 @@ LogExportMaxBatchSize = 512
LogExportInterval = '1s'
LogMaxQueueSize = 2048

[Telemetry.PrometheusBridge]
Enabled = false
Prefixes = ['go_']

[Workflows]
[Workflows.Limits]
Global = 200
Expand Down
4 changes: 4 additions & 0 deletions testdata/scripts/node/validate/default.txtar
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,10 @@ LogExportMaxBatchSize = 512
LogExportInterval = '1s'
LogMaxQueueSize = 2048

[Telemetry.PrometheusBridge]
Enabled = false
Prefixes = ['go_']

[Workflows]
[Workflows.Limits]
Global = 200
Expand Down
4 changes: 4 additions & 0 deletions testdata/scripts/node/validate/defaults-override.txtar
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,10 @@ LogExportMaxBatchSize = 512
LogExportInterval = '1s'
LogMaxQueueSize = 2048

[Telemetry.PrometheusBridge]
Enabled = false
Prefixes = ['go_']

[Workflows]
[Workflows.Limits]
Global = 200
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,10 @@ LogExportMaxBatchSize = 512
LogExportInterval = '1s'
LogMaxQueueSize = 2048

[Telemetry.PrometheusBridge]
Enabled = false
Prefixes = ['go_']

[Workflows]
[Workflows.Limits]
Global = 200
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,10 @@ LogExportMaxBatchSize = 512
LogExportInterval = '1s'
LogMaxQueueSize = 2048

[Telemetry.PrometheusBridge]
Enabled = false
Prefixes = ['go_']

[Workflows]
[Workflows.Limits]
Global = 200
Expand Down
4 changes: 4 additions & 0 deletions testdata/scripts/node/validate/disk-based-logging.txtar
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,10 @@ LogExportMaxBatchSize = 512
LogExportInterval = '1s'
LogMaxQueueSize = 2048

[Telemetry.PrometheusBridge]
Enabled = false
Prefixes = ['go_']

[Workflows]
[Workflows.Limits]
Global = 200
Expand Down
Loading
Loading