Skip to content
8 changes: 8 additions & 0 deletions src/App/Program.fs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ type MissionOptions
ingressInternalDomain: string,
ingressExternalHost: string option,
ingressExternalPort: int,
metricsViaClusterDns: bool,
exportToPrometheus: bool,
probeTimeout: int,
missions: string seq,
Expand Down Expand Up @@ -178,6 +179,12 @@ type MissionOptions
Default = 80)>]
member self.IngressExternalPort = ingressExternalPort

[<Option("metrics-via-cluster-dns",
HelpText = "Scrape post-mission metrics directly from the per-pod svc.cluster.local DNS name instead of through the ingress hostname. Use when the ingress hostname (e.g. <nonce>.local) does not resolve from where SSC runs but cluster DNS does.",
Required = false,
Default = false)>]
member self.MetricsViaClusterDns = metricsViaClusterDns

[<Option("export-to-prometheus", HelpText = "Whether to export core metrics to prometheus")>]
member self.ExportToPrometheus : bool = exportToPrometheus

Expand Down Expand Up @@ -779,6 +786,7 @@ let main argv =
ingressInternalDomain = mission.IngressInternalDomain
ingressExternalHost = mission.IngressExternalHost
ingressExternalPort = mission.IngressExternalPort
metricsViaClusterDns = mission.MetricsViaClusterDns
exportToPrometheus = mission.ExportToPrometheus
probeTimeout = mission.ProbeTimeout
coreResources = SmallTestResources
Expand Down
1 change: 1 addition & 0 deletions src/FSLibrary.Tests/Tests.fs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ let ctx : MissionContext =
ingressInternalDomain = "local"
ingressExternalHost = None
ingressExternalPort = 80
metricsViaClusterDns = false
exportToPrometheus = false
probeTimeout = 10
coreResources = SmallTestResources
Expand Down
22 changes: 21 additions & 1 deletion src/FSLibrary/StellarCoreHTTP.fs
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,17 @@ type Peer with
let url = self.URL path
Http.RequestString(url, headers = self.Headers)

// URL that reaches stellar-core's admin HTTP endpoint directly via the
// per-pod svc.cluster.local DNS name, bypassing the ingress. The ingress
// hostname (<nonce>.<ingress-internal-domain>, e.g. <nonce>.local) does not
// resolve in all cluster-DNS environments (for example non-SDF k3s
// clusters), whereas the per-pod service DNS name resolves wherever cluster
// DNS is reachable. See https://github.com/stellar/supercluster/issues/399.
member self.ClusterDnsURL(path: string) : string =
sprintf "http://%s:%d/%s" self.DnsName.StringName StellarCoreCfg.CfgVal.httpPort path

member self.fetchFromClusterDns(path: string) : string = Http.RequestString(self.ClusterDnsURL path)

member self.GetState() = self.GetInfo().State

member self.GetStatusOrState() : string =
Expand All @@ -401,7 +412,16 @@ type Peer with
member self.GetMetrics() : Metrics.Metrics =
WebExceptionRetry DefaultRetry (fun _ -> Metrics.Parse(self.fetch "metrics").Metrics)

member self.GetRawMetrics() = WebExceptionRetry DefaultRetry (fun _ -> self.fetch "metrics")
// Post-mission metrics are scraped through the ingress by default. When
// --metrics-via-cluster-dns is set they are instead scraped directly from
// the per-pod svc.cluster.local DNS name (see ClusterDnsURL), so the
// teardown dump succeeds in environments where the ingress hostname (e.g.
// <nonce>.local) does not resolve from where SSC runs but cluster DNS does.
member self.GetRawMetrics() =
if self.networkCfg.missionContext.metricsViaClusterDns then
WebExceptionRetry DefaultRetry (fun _ -> self.fetchFromClusterDns "metrics")
else
WebExceptionRetry DefaultRetry (fun _ -> self.fetch "metrics")

member self.GetInfo() : Info.Info =
WebExceptionRetry
Expand Down
1 change: 1 addition & 0 deletions src/FSLibrary/StellarMissionContext.fs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ type MissionContext =
ingressInternalDomain: string
ingressExternalHost: string option
ingressExternalPort: int
metricsViaClusterDns: bool
exportToPrometheus: bool
probeTimeout: int
coreResources: CoreResources
Expand Down
Loading