Skip to content

Commit 42a3805

Browse files
Merge pull request #1329 from hongkailiu/OTA-1813
OTA-1813: Populate risks from alerts
2 parents f7e9723 + dfa20d6 commit 42a3805

14 files changed

Lines changed: 1314 additions & 17 deletions

File tree

.openshift-tests-extension/openshift_payload_cluster-version-operator.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,18 @@
11
[
2+
{
3+
"name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator should work with risks from alerts",
4+
"labels": {
5+
"Local": {},
6+
"OTA-1813": {},
7+
"Serial": {}
8+
},
9+
"resources": {
10+
"isolation": {}
11+
},
12+
"source": "openshift:payload:cluster-version-operator",
13+
"lifecycle": "blocking",
14+
"environmentSelector": {}
15+
},
216
{
317
"name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator should work with accept risks",
418
"labels": {

cmd/cluster-version-operator-tests/main.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ func main() {
2424
Name: "openshift/cluster-version-operator/conformance/parallel",
2525
Parents: []string{"openshift/conformance/parallel"},
2626
Qualifiers: []string{
27-
`!(name.contains("[Serial]") || "Serial" in labels || name.contains("[Slow]"))`,
27+
`!(name.contains("[Serial]") || "Serial" in labels || name.contains("[Slow]") || "Local" in labels)`,
2828
},
2929
})
3030

@@ -33,7 +33,7 @@ func main() {
3333
Name: "openshift/cluster-version-operator/conformance/serial",
3434
Parents: []string{"openshift/conformance/serial"},
3535
Qualifiers: []string{
36-
`name.contains("[Serial]") || "Serial" in labels`,
36+
`(name.contains("[Serial]") || "Serial" in labels) && !("Local" in labels)`,
3737
},
3838
})
3939

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ require (
1717
github.com/operator-framework/api v0.17.1
1818
github.com/operator-framework/operator-lifecycle-manager v0.22.0
1919
github.com/pkg/errors v0.9.1
20+
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.86.0
2021
github.com/prometheus-operator/prometheus-operator/pkg/client v0.86.0
2122
github.com/prometheus/client_golang v1.23.2
2223
github.com/prometheus/client_model v0.6.2
@@ -72,7 +73,6 @@ require (
7273
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
7374
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
7475
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
75-
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.86.0 // indirect
7676
github.com/prometheus/procfs v0.16.1 // indirect
7777
github.com/robfig/cron v1.2.0 // indirect
7878
github.com/sirupsen/logrus v1.9.3 // indirect
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
package promql
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"sync"
7+
8+
"github.com/prometheus/client_golang/api"
9+
prometheusv1 "github.com/prometheus/client_golang/api/prometheus/v1"
10+
"github.com/prometheus/common/config"
11+
12+
"k8s.io/klog/v2"
13+
14+
"github.com/openshift/cluster-version-operator/pkg/clusterconditions"
15+
)
16+
17+
type Getter interface {
18+
Get(ctx context.Context) prometheusv1.AlertsResult
19+
}
20+
21+
func NewAlertGetter(promQLTarget clusterconditions.PromQLTarget) Getter {
22+
p := NewPromQL(promQLTarget)
23+
condition := p.Condition
24+
v, ok := condition.(*PromQL)
25+
if !ok {
26+
panic("invalid condition type")
27+
}
28+
return &ocAlertGetter{promQL: v}
29+
}
30+
31+
type ocAlertGetter struct {
32+
promQL *PromQL
33+
34+
mutex sync.Mutex
35+
cached prometheusv1.AlertsResult
36+
}
37+
38+
func (o *ocAlertGetter) Get(ctx context.Context) prometheusv1.AlertsResult {
39+
if err := o.refresh(ctx); err != nil {
40+
klog.Errorf("Failed to refresh alerts, using stale cache instead: %v", err)
41+
}
42+
return o.cached
43+
}
44+
45+
func (o *ocAlertGetter) refresh(ctx context.Context) error {
46+
o.mutex.Lock()
47+
defer o.mutex.Unlock()
48+
49+
klog.Info("refresh alerts ...")
50+
p := o.promQL
51+
host, err := p.Host(ctx)
52+
if err != nil {
53+
return fmt.Errorf("failure determine thanos IP: %w", err)
54+
}
55+
p.url.Host = host
56+
clientConfig := api.Config{Address: p.url.String()}
57+
58+
if roundTripper, err := config.NewRoundTripperFromConfig(p.HTTPClientConfig, "cluster-conditions"); err == nil {
59+
clientConfig.RoundTripper = roundTripper
60+
} else {
61+
return fmt.Errorf("creating PromQL round-tripper: %w", err)
62+
}
63+
64+
promqlClient, err := api.NewClient(clientConfig)
65+
if err != nil {
66+
return fmt.Errorf("creating PromQL client: %w", err)
67+
}
68+
69+
client := &statusCodeNotImplementedForPostClient{
70+
client: promqlClient,
71+
}
72+
73+
v1api := prometheusv1.NewAPI(client)
74+
75+
queryContext := ctx
76+
if p.QueryTimeout > 0 {
77+
var cancel context.CancelFunc
78+
queryContext, cancel = context.WithTimeout(ctx, p.QueryTimeout)
79+
defer cancel()
80+
}
81+
82+
r, err := v1api.Alerts(queryContext)
83+
if err != nil {
84+
return fmt.Errorf("failed to get alerts: %w", err)
85+
}
86+
o.cached = r
87+
klog.Infof("refreshed: %d alerts", len(o.cached.Alerts))
88+
return nil
89+
}

0 commit comments

Comments
 (0)