From 1678c0d9c5834f8c1b95e886940760460dd035ad Mon Sep 17 00:00:00 2001 From: RebeccaMahany Date: Mon, 20 Apr 2026 10:24:03 -0400 Subject: [PATCH 1/8] Move empty variable check to config.toml --- ee/tables/secretscan/config.toml | 8 ++- ee/tables/secretscan/table.go | 48 +------------ ee/tables/secretscan/table_test.go | 112 ++++++++--------------------- 3 files changed, 35 insertions(+), 133 deletions(-) diff --git a/ee/tables/secretscan/config.toml b/ee/tables/secretscan/config.toml index 616bc1030..48d951857 100644 --- a/ee/tables/secretscan/config.toml +++ b/ee/tables/secretscan/config.toml @@ -5,11 +5,15 @@ title = "Kolide secretscan config" [extend] useDefault = true -# Ignore K8s Sealed Secrets -# https://github.com/gitleaks/gitleaks/issues/1728 [[rules]] id = "generic-api-key" [[rules.allowlists]] +description = "Ignore K8s Sealed Secrets (https://github.com/gitleaks/gitleaks/issues/1728)" condition = "AND" regexes = ['''Ag[a-zA-Z0-9+/]{500,}={0,2}'''] paths = ['''(?i).*\.ya?ml$'''] +[[rules.allowlists]] +description = "Ignore variable names with empty values (https://github.com/gitleaks/gitleaks/issues/1828)" +condition = "AND" +regex_target = "match" +regexes = ['''^\s*\w[\w-]*=$'''] diff --git a/ee/tables/secretscan/table.go b/ee/tables/secretscan/table.go index 7539da7fe..37e359997 100644 --- a/ee/tables/secretscan/table.go +++ b/ee/tables/secretscan/table.go @@ -240,7 +240,6 @@ func (t *Table) findingsToRows(ctx context.Context, argon2idSalts []string, find // Just for logging purposes -- we're curious how frequently we detect false positives encryptedJwtFalsePositiveCount := 0 - emptyVariableFalsePositiveCount := 0 for idx, f := range findings { // We sometimes see false positives under the "generic-api-key" rule. // Check for these. @@ -249,10 +248,6 @@ func (t *Table) findingsToRows(ctx context.Context, argon2idSalts []string, find encryptedJwtFalsePositiveCount += 1 continue } - if isEmptyVariable(f) { - emptyVariableFalsePositiveCount += 1 - continue - } } // Get the hash of this secret. If there's an error, log it, and allow the rest of the data to be returned. @@ -287,11 +282,10 @@ func (t *Table) findingsToRows(ctx context.Context, argon2idSalts []string, find results = append(results, row) } - if encryptedJwtFalsePositiveCount > 0 || emptyVariableFalsePositiveCount > 0 { + if encryptedJwtFalsePositiveCount > 0 { t.slogger.Log(ctx, slog.LevelInfo, "detected and skipped false positive generic-api-key findings", "jwt_family_count", encryptedJwtFalsePositiveCount, - "empty_variable", emptyVariableFalsePositiveCount, ) } @@ -340,46 +334,6 @@ func isEncryptedJWTFamilyValue(finding report.Finding) bool { return false } -// emptyVariableRegexp matches strings that start with a word char, -// contain only word chars and underscores or hyphens, and end with a -// singular equal sign -- for example, `MY_ENV_VAR=`. -var emptyVariableRegexp = regexp.MustCompile(`^\w[\w-]*=$`) - -// isEmptyVariable inspects the given finding to determine if it is actually -// an empty variable name instead. -func isEmptyVariable(finding report.Finding) bool { - // This type of false positive typically has an entropy score around 3, - // so we exclude higher-entropy values right off the bat. - if finding.Entropy >= 4 { - return false - } - - // Next, check for our regex match. - if !emptyVariableRegexp.MatchString(finding.Secret) { - return false - } - - // We expect that this "secret" would be at the start of a line, with either nothing - // or whitespace in front of it. However, sometimes our finding.Line will contain - // multiple lines -- in this case, it looks like "\nMY_ENV_VAR1=\nMY_ENV_VAR2=". - // So first we isolate the actual line we're looking at, then check to see if there's - // anything besides whitespace in front of it. - lines := strings.Split(strings.ReplaceAll(finding.Line, "\r\n", "\n"), "\n") - var lineWithSecret string - for _, line := range lines { - if strings.Contains(line, finding.Secret) { - lineWithSecret = line - break - } - } - if lineWithSecret == "" { - return false - } - before, _, _ := strings.Cut(lineWithSecret, finding.Secret) - beforeTrimmed := strings.TrimSpace(before) - return beforeTrimmed == "" -} - // findingsToKeyNames attempts to extract the key names (eg: in an .env file) to help understand the context // of the discovered secret. Because of the multitude of possible ways people can stash secrets, and the myriad of // secret types, this is very hard to get right. So instead, we aim to solve the simple case, and ignore the rest. diff --git a/ee/tables/secretscan/table_test.go b/ee/tables/secretscan/table_test.go index b5e54766c..4e3bb3677 100644 --- a/ee/tables/secretscan/table_test.go +++ b/ee/tables/secretscan/table_test.go @@ -414,90 +414,6 @@ func Test_isEncryptedJWTFamilyValue(t *testing.T) { } } -func Test_isEmptyVariable(t *testing.T) { - t.Parallel() - - // Make sure config exists - newConfigOnce() - require.NoError(t, configErr) - - for _, tt := range []struct { - testCaseName string - rawData string - expectedReturn bool - }{ - { - testCaseName: "underscore", - rawData: ` -123_S3_CREDS= -123_S3_IP_REGION= -`, - expectedReturn: true, - }, - { - testCaseName: "hyphen", - rawData: ` -123-S3-CREDS= -123-S3-IP-REGION= -`, - expectedReturn: true, - }, - { - testCaseName: "alphanumeric", - rawData: ` -123S3CREDS= -123S3IPREGION= -`, - expectedReturn: true, - }, - { - testCaseName: "tab before empty variable", - rawData: ` - 123_S3_CREDS= - 123_S3_IP_REGION= -`, - expectedReturn: true, - }, - { - testCaseName: "non-empty", - rawData: ` -123_S3_CREDS=9b065cc5-cf2e-4b3f-9a20-3422e060807a -123_S3_IP_REGION=52b22b1e-2178-4a1e-bbba-50d0160ffab3 -`, - expectedReturn: false, - }, - { - testCaseName: "high entropy", // 4.19 entropy - rawData: ` -375E6860-39D4-11F1-B4AC-0800200C9A66-375E6861-39D4-11F1-B4AC-0800200C9A66_123_S3_CREDS= -4DE613D1-39D4-11F1-B4AC-0800200C9A66_123_S3_IP_REGION_4DE613D0-39D4-11F1-B4AC-0800200C9A66= -`, - expectedReturn: false, - }, - } { - t.Run(tt.testCaseName, func(t *testing.T) { - t.Parallel() - - detector := detect.NewDetector(*kolideConfig) - fileSource := &sources.File{ - Content: strings.NewReader(tt.rawData), - Config: &detector.Config, - } - - findings, err := detector.DetectSource(t.Context(), fileSource) - require.NoError(t, err) - require.Greater(t, len(findings), 0) - - for _, finding := range findings { - // Make sure the test finding we generated is the type we expected - require.Equal(t, "generic-api-key", finding.RuleID) - // Confirm that isEmptyVariable classifies the finding appropriately - require.Equal(t, tt.expectedReturn, isEmptyVariable(finding)) - } - }) - } -} - // Test_kolideConfig confirms that our overrides in config.toml work as expected func Test_kolideConfig(t *testing.T) { t.Parallel() @@ -530,6 +446,34 @@ spec: creationTimestamp: null name: basic-auth namespace: default +`, + }, + { + testCaseName: "empty variable, with underscore", + rawData: ` +123_S3_CREDS= +123_S3_IP_REGION= +`, + }, + { + testCaseName: "empty variable, with hyphen", + rawData: ` +123-S3-CREDS= +123-S3-IP-REGION= +`, + }, + { + testCaseName: "empty variable, with alphanumeric", + rawData: ` +123S3CREDS= +123S3IPREGION= +`, + }, + { + testCaseName: "empty variable, with tab before empty variable", + rawData: ` + 123_S3_CREDS= + 123_S3_IP_REGION= `, }, } { From 22f4f61b66a8ca4458c4a4f13b8219f9b8e0fb50 Mon Sep 17 00:00:00 2001 From: RebeccaMahany Date: Mon, 20 Apr 2026 10:32:47 -0400 Subject: [PATCH 2/8] Restrict exception to all-uppercase --- ee/tables/secretscan/config.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ee/tables/secretscan/config.toml b/ee/tables/secretscan/config.toml index 48d951857..7139c848f 100644 --- a/ee/tables/secretscan/config.toml +++ b/ee/tables/secretscan/config.toml @@ -16,4 +16,4 @@ paths = ['''(?i).*\.ya?ml$'''] description = "Ignore variable names with empty values (https://github.com/gitleaks/gitleaks/issues/1828)" condition = "AND" regex_target = "match" -regexes = ['''^\s*\w[\w-]*=$'''] +regexes = ['''^\s*[A-Z\d][A-Z\d_-]*=$'''] From ec62066a2df7b4aeba49a910b1d5a60ac3abef3e Mon Sep 17 00:00:00 2001 From: RebeccaMahany Date: Mon, 20 Apr 2026 10:35:50 -0400 Subject: [PATCH 3/8] Remove match restriction, not needed --- ee/tables/secretscan/config.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/ee/tables/secretscan/config.toml b/ee/tables/secretscan/config.toml index 7139c848f..430535224 100644 --- a/ee/tables/secretscan/config.toml +++ b/ee/tables/secretscan/config.toml @@ -15,5 +15,4 @@ paths = ['''(?i).*\.ya?ml$'''] [[rules.allowlists]] description = "Ignore variable names with empty values (https://github.com/gitleaks/gitleaks/issues/1828)" condition = "AND" -regex_target = "match" regexes = ['''^\s*[A-Z\d][A-Z\d_-]*=$'''] From 0b622acfd3e2fe26fd93a29f324241e7946ba37b Mon Sep 17 00:00:00 2001 From: RebeccaMahany Date: Mon, 20 Apr 2026 11:41:50 -0400 Subject: [PATCH 4/8] Add back test cases for true positives --- ee/tables/secretscan/table_test.go | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/ee/tables/secretscan/table_test.go b/ee/tables/secretscan/table_test.go index 4e3bb3677..8411bc598 100644 --- a/ee/tables/secretscan/table_test.go +++ b/ee/tables/secretscan/table_test.go @@ -423,9 +423,10 @@ func Test_kolideConfig(t *testing.T) { require.NoError(t, configErr) for _, tt := range []struct { - testCaseName string - pathName string - rawData string + testCaseName string + pathName string + rawData string + expectedFinding bool }{ { testCaseName: "K8s sealed secrets", @@ -447,6 +448,7 @@ spec: name: basic-auth namespace: default `, + expectedFinding: false, }, { testCaseName: "empty variable, with underscore", @@ -454,6 +456,7 @@ spec: 123_S3_CREDS= 123_S3_IP_REGION= `, + expectedFinding: false, }, { testCaseName: "empty variable, with hyphen", @@ -461,6 +464,7 @@ spec: 123-S3-CREDS= 123-S3-IP-REGION= `, + expectedFinding: false, }, { testCaseName: "empty variable, with alphanumeric", @@ -468,6 +472,7 @@ spec: 123S3CREDS= 123S3IPREGION= `, + expectedFinding: false, }, { testCaseName: "empty variable, with tab before empty variable", @@ -475,6 +480,15 @@ spec: 123_S3_CREDS= 123_S3_IP_REGION= `, + expectedFinding: false, + }, + { + testCaseName: "empty variable (true positive)", + rawData: ` +123_S3_CREDS=9b065cc5-cf2e-4b3f-9a20-3422e060807a +123_S3_IP_REGION=52b22b1e-2178-4a1e-bbba-50d0160ffab3 +`, + expectedFinding: true, }, } { t.Run(tt.testCaseName, func(t *testing.T) { @@ -489,7 +503,11 @@ spec: findings, err := detector.DetectSource(t.Context(), fileSource) require.NoError(t, err) - require.Equal(t, 0, len(findings)) + if tt.expectedFinding { + require.Less(t, 0, len(findings)) + } else { + require.Equal(t, 0, len(findings)) + } }) } } From a1632473c7545322cd8db54192dca92d15121387 Mon Sep 17 00:00:00 2001 From: RebeccaMahany Date: Mon, 20 Apr 2026 11:57:35 -0400 Subject: [PATCH 5/8] Add length restriction to regex as a proxy for filtering on entropy --- ee/tables/secretscan/config.toml | 4 +++- ee/tables/secretscan/table_test.go | 10 +++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/ee/tables/secretscan/config.toml b/ee/tables/secretscan/config.toml index 430535224..83d943980 100644 --- a/ee/tables/secretscan/config.toml +++ b/ee/tables/secretscan/config.toml @@ -15,4 +15,6 @@ paths = ['''(?i).*\.ya?ml$'''] [[rules.allowlists]] description = "Ignore variable names with empty values (https://github.com/gitleaks/gitleaks/issues/1828)" condition = "AND" -regexes = ['''^\s*[A-Z\d][A-Z\d_-]*=$'''] +# We limit length to 35 as a proxy for not allowlisting high-entropy values here. +# Variable names are probably usually under that length. +regexes = ['''^\s*[A-Z\d][A-Z\d_-]{0,35}=$'''] diff --git a/ee/tables/secretscan/table_test.go b/ee/tables/secretscan/table_test.go index 8411bc598..1411e156b 100644 --- a/ee/tables/secretscan/table_test.go +++ b/ee/tables/secretscan/table_test.go @@ -483,10 +483,18 @@ spec: expectedFinding: false, }, { - testCaseName: "empty variable (true positive)", + testCaseName: "empty variable (true positive, variable is not empty)", rawData: ` 123_S3_CREDS=9b065cc5-cf2e-4b3f-9a20-3422e060807a 123_S3_IP_REGION=52b22b1e-2178-4a1e-bbba-50d0160ffab3 +`, + expectedFinding: true, + }, + { + testCaseName: "empty variable (true positive, long variable with high entropy)", + rawData: ` +375E6860-39D4-11F1-B4AC-0800200C9A66-375E6861-39D4-11F1-B4AC-0800200C9A66_123_S3_CREDS= +4DE613D1-39D4-11F1-B4AC-0800200C9A66_123_S3_IP_REGION_4DE613D0-39D4-11F1-B4AC-0800200C9A66= `, expectedFinding: true, }, From fc262ec7daedc72fc69ec53d0250120acaa6b58c Mon Sep 17 00:00:00 2001 From: RebeccaMahany Date: Mon, 20 Apr 2026 16:01:33 -0400 Subject: [PATCH 6/8] Make exception for .env files only --- ee/tables/secretscan/config.toml | 1 + ee/tables/secretscan/table_test.go | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/ee/tables/secretscan/config.toml b/ee/tables/secretscan/config.toml index 83d943980..f7dea9b00 100644 --- a/ee/tables/secretscan/config.toml +++ b/ee/tables/secretscan/config.toml @@ -18,3 +18,4 @@ condition = "AND" # We limit length to 35 as a proxy for not allowlisting high-entropy values here. # Variable names are probably usually under that length. regexes = ['''^\s*[A-Z\d][A-Z\d_-]{0,35}=$'''] +paths = ['''(?i).*\.env$'''] diff --git a/ee/tables/secretscan/table_test.go b/ee/tables/secretscan/table_test.go index 1411e156b..0444305ef 100644 --- a/ee/tables/secretscan/table_test.go +++ b/ee/tables/secretscan/table_test.go @@ -452,6 +452,7 @@ spec: }, { testCaseName: "empty variable, with underscore", + pathName: ".env", rawData: ` 123_S3_CREDS= 123_S3_IP_REGION= @@ -460,6 +461,7 @@ spec: }, { testCaseName: "empty variable, with hyphen", + pathName: ".env", rawData: ` 123-S3-CREDS= 123-S3-IP-REGION= @@ -468,6 +470,7 @@ spec: }, { testCaseName: "empty variable, with alphanumeric", + pathName: ".env", rawData: ` 123S3CREDS= 123S3IPREGION= @@ -476,6 +479,7 @@ spec: }, { testCaseName: "empty variable, with tab before empty variable", + pathName: ".env", rawData: ` 123_S3_CREDS= 123_S3_IP_REGION= @@ -484,6 +488,7 @@ spec: }, { testCaseName: "empty variable (true positive, variable is not empty)", + pathName: ".env", rawData: ` 123_S3_CREDS=9b065cc5-cf2e-4b3f-9a20-3422e060807a 123_S3_IP_REGION=52b22b1e-2178-4a1e-bbba-50d0160ffab3 @@ -492,6 +497,7 @@ spec: }, { testCaseName: "empty variable (true positive, long variable with high entropy)", + pathName: ".env", rawData: ` 375E6860-39D4-11F1-B4AC-0800200C9A66-375E6861-39D4-11F1-B4AC-0800200C9A66_123_S3_CREDS= 4DE613D1-39D4-11F1-B4AC-0800200C9A66_123_S3_IP_REGION_4DE613D0-39D4-11F1-B4AC-0800200C9A66= From 426877411c1b0ccd80532ac284d40d0e32525aa7 Mon Sep 17 00:00:00 2001 From: RebeccaMahany Date: Tue, 21 Apr 2026 08:56:21 -0400 Subject: [PATCH 7/8] Split up all-caps and all-lowercase empty var allowlists --- ee/tables/secretscan/config.toml | 13 +++++++++++-- ee/tables/secretscan/table_test.go | 9 +++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/ee/tables/secretscan/config.toml b/ee/tables/secretscan/config.toml index f7dea9b00..96bc7c1bc 100644 --- a/ee/tables/secretscan/config.toml +++ b/ee/tables/secretscan/config.toml @@ -13,9 +13,18 @@ condition = "AND" regexes = ['''Ag[a-zA-Z0-9+/]{500,}={0,2}'''] paths = ['''(?i).*\.ya?ml$'''] [[rules.allowlists]] -description = "Ignore variable names with empty values (https://github.com/gitleaks/gitleaks/issues/1828)" +description = "Ignore all-caps variable names with empty values (https://github.com/gitleaks/gitleaks/issues/1828)" condition = "AND" # We limit length to 35 as a proxy for not allowlisting high-entropy values here. -# Variable names are probably usually under that length. +# Variable names are probably usually under that length. We restrict to all-caps +# for the same reason. regexes = ['''^\s*[A-Z\d][A-Z\d_-]{0,35}=$'''] paths = ['''(?i).*\.env$'''] +[[rules.allowlists]] +description = "Ignore all-lowercase variable names with empty values (https://github.com/gitleaks/gitleaks/issues/1828)" +condition = "AND" +# We limit length to 35 as a proxy for not allowlisting high-entropy values here. +# Variable names are probably usually under that length. We restrict to all-lowercase +# for the same reason. +regexes = ['''^\s*[a-z\d][a-z\d_-]{0,35}=$'''] +paths = ['''(?i).*\.env$'''] diff --git a/ee/tables/secretscan/table_test.go b/ee/tables/secretscan/table_test.go index 0444305ef..55c0ebd27 100644 --- a/ee/tables/secretscan/table_test.go +++ b/ee/tables/secretscan/table_test.go @@ -483,6 +483,15 @@ spec: rawData: ` 123_S3_CREDS= 123_S3_IP_REGION= +`, + expectedFinding: false, + }, + { + testCaseName: "empty variable, all lowercase", + pathName: ".env", + rawData: ` +123_s3_creds= +123_s3_ip_region= `, expectedFinding: false, }, From 53c967ec8f97add9e54070c1eee2d2e232eb3936 Mon Sep 17 00:00:00 2001 From: RebeccaMahany Date: Tue, 21 Apr 2026 09:02:00 -0400 Subject: [PATCH 8/8] Make path regex for .env consistent --- ee/tables/secretscan/config.toml | 4 ++-- ee/tables/secretscan/table_test.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ee/tables/secretscan/config.toml b/ee/tables/secretscan/config.toml index 96bc7c1bc..c51a344a3 100644 --- a/ee/tables/secretscan/config.toml +++ b/ee/tables/secretscan/config.toml @@ -19,7 +19,7 @@ condition = "AND" # Variable names are probably usually under that length. We restrict to all-caps # for the same reason. regexes = ['''^\s*[A-Z\d][A-Z\d_-]{0,35}=$'''] -paths = ['''(?i).*\.env$'''] +paths = ['''.*\.env(\..+)?$'''] [[rules.allowlists]] description = "Ignore all-lowercase variable names with empty values (https://github.com/gitleaks/gitleaks/issues/1828)" condition = "AND" @@ -27,4 +27,4 @@ condition = "AND" # Variable names are probably usually under that length. We restrict to all-lowercase # for the same reason. regexes = ['''^\s*[a-z\d][a-z\d_-]{0,35}=$'''] -paths = ['''(?i).*\.env$'''] +paths = ['''.*\.env(\..+)?$'''] diff --git a/ee/tables/secretscan/table_test.go b/ee/tables/secretscan/table_test.go index 55c0ebd27..fb924a028 100644 --- a/ee/tables/secretscan/table_test.go +++ b/ee/tables/secretscan/table_test.go @@ -470,7 +470,7 @@ spec: }, { testCaseName: "empty variable, with alphanumeric", - pathName: ".env", + pathName: ".env.local", rawData: ` 123S3CREDS= 123S3IPREGION= @@ -479,7 +479,7 @@ spec: }, { testCaseName: "empty variable, with tab before empty variable", - pathName: ".env", + pathName: "aws.env", rawData: ` 123_S3_CREDS= 123_S3_IP_REGION=