diff --git a/pkg/detectors/hashicorpvaultbatchtoken/hashicorpvaultbatchtoken.go b/pkg/detectors/hashicorpvaultbatchtoken/hashicorpvaultbatchtoken.go new file mode 100644 index 000000000000..58ad20c0d733 --- /dev/null +++ b/pkg/detectors/hashicorpvaultbatchtoken/hashicorpvaultbatchtoken.go @@ -0,0 +1,181 @@ +package hashicorpbatchtoken + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "strings" + + regexp "github.com/wasilibs/go-re2" + + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +type Scanner struct { + client *http.Client + detectors.DefaultMultiPartCredentialProvider + detectors.EndpointSetter +} + +var _ detectors.Detector = (*Scanner)(nil) +var _ detectors.EndpointCustomizer = (*Scanner)(nil) + +var ( + defaultClient = detectors.DetectorHttpClientWithNoLocalAddresses + + // Batch tokens: hvb.<50-300 chars> + batchTokenPat = regexp.MustCompile( + `\b(hvb\.[A-Za-z0-9_.-]{50,300})\b`, + ) + + vaultUrlPat = regexp.MustCompile(`(https?:\/\/[^\s\/]*\.hashicorp\.cloud(?::\d+)?)(?:\/[^\s]*)?`) +) + +func (s Scanner) Keywords() []string { + return []string{"hvb."} +} + +func (Scanner) CloudEndpoint() string { return "" } + +func (s Scanner) Description() string { + return "This detector detects and verifies HashiCorp Vault batch tokens" +} + +func (s Scanner) getClient() *http.Client { + if s.client != nil { + return s.client + } + return defaultClient +} + +func (s Scanner) FromData( + ctx context.Context, + verify bool, + data []byte, +) (results []detectors.Result, err error) { + + dataStr := string(data) + + uniqueTokens := make(map[string]struct{}) + for _, match := range batchTokenPat.FindAllStringSubmatch(dataStr, -1) { + uniqueTokens[match[1]] = struct{}{} + } + + var uniqueVaultUrls = make(map[string]struct{}) + for _, match := range vaultUrlPat.FindAllStringSubmatch(dataStr, -1) { + url := strings.TrimSpace(match[1]) + uniqueVaultUrls[url] = struct{}{} + } + + endpoints := make([]string, 0, len(uniqueVaultUrls)) + for endpoint := range uniqueVaultUrls { + endpoints = append(endpoints, endpoint) + } + + for _, endpoint := range s.Endpoints(endpoints...) { + for token := range uniqueTokens { + result := detectors.Result{ + DetectorType: detectorspb.DetectorType_HashiCorpVaultBatchToken, + Raw: []byte(token), + RawV2: []byte(token + endpoint), + Redacted: token[:8] + "...", + } + + if verify { + verified, verificationResp, verificationErr := verifyVaultToken( + ctx, + s.getClient(), + endpoint, + token, + ) + result.SetVerificationError(verificationErr, token) + result.Verified = verified + + if verificationResp != nil { + result.ExtraData = map[string]string{ + "policies": strings.Join(verificationResp.Data.Policies, ", "), + "orphan": fmt.Sprintf("%v", verificationResp.Data.Orphan), + "renewable": fmt.Sprintf("%v", verificationResp.Data.Renewable), + "type": verificationResp.Data.Type, + "entity_id": verificationResp.Data.EntityId, + } + } + } + + results = append(results, result) + } + } + + return +} + +type lookupResponse struct { + Data struct { + DisplayName string `json:"display_name"` + EntityId string `json:"entity_id"` + ExpireTime string `json:"expire_time"` + Orphan bool `json:"orphan"` + Policies []string `json:"policies"` + Renewable bool `json:"renewable"` + Type string `json:"type"` + } +} + +func verifyVaultToken( + ctx context.Context, + client *http.Client, + baseUrl string, + token string, +) (bool, *lookupResponse, error) { + url, err := url.JoinPath(baseUrl, "/v1/auth/token/lookup-self") + if err != nil { + return false, nil, err + } + req, err := http.NewRequestWithContext( + ctx, + http.MethodGet, + url, + http.NoBody, + ) + if err != nil { + return false, nil, err + } + + req.Header.Set("X-Vault-Token", token) + + res, err := client.Do(req) + if err != nil { + return false, nil, err + } + defer func() { + _, _ = io.Copy(io.Discard, res.Body) + _ = res.Body.Close() + }() + + switch res.StatusCode { + case http.StatusOK: + var resp lookupResponse + if err := json.NewDecoder(res.Body).Decode(&resp); err != nil { + return false, nil, err + } + + return true, &resp, nil + + case http.StatusForbidden, http.StatusUnauthorized: + return false, nil, nil + + default: + return false, nil, fmt.Errorf( + "unexpected HTTP response status %d", + res.StatusCode, + ) + } +} + +func (s Scanner) Type() detectorspb.DetectorType { + return detectorspb.DetectorType_HashiCorpVaultBatchToken +} diff --git a/pkg/detectors/hashicorpvaultbatchtoken/hashicorpvaultbatchtoken_integration_test.go b/pkg/detectors/hashicorpvaultbatchtoken/hashicorpvaultbatchtoken_integration_test.go new file mode 100644 index 000000000000..82097341e815 --- /dev/null +++ b/pkg/detectors/hashicorpvaultbatchtoken/hashicorpvaultbatchtoken_integration_test.go @@ -0,0 +1,123 @@ +//go:build detectors +// +build detectors + +package hashicorpbatchtoken + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" +) + +func TestBatchToken_FromData(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + + // Fetch test secrets + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors6") + if err != nil { + t.Fatalf("could not get test secrets: %s", err) + } + + vaultURL := testSecrets.MustGetField("HASHICORPVAULT_CLOUD_URL") + + // This token has maximum TTL of 32days (768h), so it should still be valid by the time this test runs + // but if the test fails due to an invalid token, this is the most likely culprit and the token may need to be regenerated. + // To regenerate the token run this command in vault web cli: + // write auth/token/create type=batch policies="test-policy" ttl="768h" no_parent=true + batchToken := testSecrets.MustGetField("HASHICORPVAULT_BATCH_TOKEN") + + fakeToken := "hvb.fakeinvalidtokenaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + + tests := []struct { + name string + input string + verify bool + wantTokens []string + wantVerified bool + wantVerificationErr bool + }{ + { + name: "valid batch token with URL, verify", + input: fmt.Sprintf("%s\n%s", batchToken, vaultURL), + verify: true, + wantTokens: []string{ + batchToken + vaultURL, + }, + wantVerified: true, + wantVerificationErr: false, + }, + { + name: "invalid batch token with URL, verify", + input: fmt.Sprintf("%s\n%s", fakeToken, vaultURL), + verify: true, + wantTokens: []string{ + fakeToken + vaultURL, + }, + wantVerified: false, + wantVerificationErr: false, + }, + { + name: "valid batch token with URL, no verify", + input: fmt.Sprintf("%s\n%s", batchToken, vaultURL), + verify: false, + wantTokens: []string{ + batchToken + vaultURL, + }, + wantVerified: false, + wantVerificationErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + scanner := Scanner{} + scanner.UseFoundEndpoints(true) + scanner.UseCloudEndpoint(true) + + results, err := scanner.FromData(ctx, tt.verify, []byte(tt.input)) + require.NoError(t, err) + + if len(results) != len(tt.wantTokens) { + t.Fatalf("expected %d results, got %d", len(tt.wantTokens), len(results)) + } + + for i, r := range results { + if string(r.RawV2) != tt.wantTokens[i] && string(r.Raw) != tt.wantTokens[i] { + t.Errorf("expected token %s, got %s", tt.wantTokens[i], string(r.Raw)) + } + + if r.Verified != tt.wantVerified { + t.Errorf("expected verified=%v, got %v", tt.wantVerified, r.Verified) + } + + if (r.VerificationError() != nil) != tt.wantVerificationErr { + t.Errorf("expected verification error=%v, got %v", tt.wantVerificationErr, r.VerificationError()) + } + } + }) + } +} + +func BenchmarkFromData(b *testing.B) { + ctx := context.Background() + s := Scanner{} + + for name, data := range detectors.MustGetBenchmarkData() { + b.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/hashicorpvaultbatchtoken/hashicorpvaultbatchtoken_test.go b/pkg/detectors/hashicorpvaultbatchtoken/hashicorpvaultbatchtoken_test.go new file mode 100644 index 000000000000..309d79366386 --- /dev/null +++ b/pkg/detectors/hashicorpvaultbatchtoken/hashicorpvaultbatchtoken_test.go @@ -0,0 +1,112 @@ +package hashicorpbatchtoken + +import ( + "context" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/stretchr/testify/require" + + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" +) + +func TestBatchToken_PatternWithURL(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) + + tests := []struct { + name string + input string + want []string + }{ + { + name: "valid hvb token with vault url", + input: ` + X-Vault-Token=hvb.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + https://vault-cluster-abc123.hashicorp.cloud:8200 + `, + want: []string{ + "hvb.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaahttps://vault-cluster-abc123.hashicorp.cloud:8200", + }, + }, + { + name: "valid hvb token with longer length", + input: ` + hvb.bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb + https://vault-cluster-xyz.hashicorp.cloud + `, + want: []string{ + "hvb.bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbhttps://vault-cluster-xyz.hashicorp.cloud", + }, + }, + { + name: "token only, no URL", + input: ` + hvb.cccccccccccccccccccccccccccccccccccccccccccccccccc + `, + want: nil, + }, + { + name: "URL only, no token", + input: ` + https://vault-cluster-abc123.hashicorp.cloud:8200 + `, + want: nil, + }, + { + name: "invalid token - too short", + input: ` + hvb.shorttoken + https://vault-cluster-abc123.hashicorp.cloud + `, + want: nil, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 && len(test.want) > 0 { + t.Errorf( + "test %q failed: expected keywords %v to be found in the input", + test.name, + d.Keywords(), + ) + return + } + + d.UseFoundEndpoints(true) + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + require.NoError(t, err) + + if len(results) != len(test.want) { + t.Errorf( + "mismatch in result count: expected %d, got %d", + len(test.want), + len(results), + ) + return + } + + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} + } + } + + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } + }) + } +} diff --git a/pkg/engine/defaults/defaults.go b/pkg/engine/defaults/defaults.go index c3e76f3caed5..096514d813f4 100644 --- a/pkg/engine/defaults/defaults.go +++ b/pkg/engine/defaults/defaults.go @@ -355,6 +355,7 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/harness" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/harvest" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/hashicorpvaultauth" + hashicorpbatchtoken "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/hashicorpvaultbatchtoken" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/hasura" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/hellosign" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/helpcrunch" @@ -1230,6 +1231,7 @@ func buildDetectorList() []detectors.Detector { &harness.Scanner{}, &harvest.Scanner{}, &hashicorpvaultauth.Scanner{}, + &hashicorpbatchtoken.Scanner{}, &hasura.Scanner{}, &hellosign.Scanner{}, &helpcrunch.Scanner{}, diff --git a/pkg/engine/engine_test.go b/pkg/engine/engine_test.go index c6d4bdec57a5..9c4a5acdb68f 100644 --- a/pkg/engine/engine_test.go +++ b/pkg/engine/engine_test.go @@ -1377,6 +1377,7 @@ func TestEngineInitializesCloudProviderDetectors(t *testing.T) { detectorspb.DetectorType_ArtifactoryAccessToken: {}, detectorspb.DetectorType_ArtifactoryReferenceToken: {}, detectorspb.DetectorType_TableauPersonalAccessToken: {}, + detectorspb.DetectorType_HashiCorpVaultBatchToken: {}, // these do not have any cloud endpoint } diff --git a/pkg/pb/detectorspb/detectors.pb.go b/pkg/pb/detectorspb/detectors.pb.go index 9dc30b110d3b..4148dcbf7913 100644 --- a/pkg/pb/detectorspb/detectors.pb.go +++ b/pkg/pb/detectorspb/detectors.pb.go @@ -1150,6 +1150,7 @@ const ( DetectorType_GoogleGeminiAPIKey DetectorType = 1041 DetectorType_ArtifactoryReferenceToken DetectorType = 1042 DetectorType_DatadogApikey DetectorType = 1043 + DetectorType_HashiCorpVaultBatchToken DetectorType = 1044 ) // Enum value maps for DetectorType. @@ -2195,6 +2196,7 @@ var ( 1041: "GoogleGeminiAPIKey", 1042: "ArtifactoryReferenceToken", 1043: "DatadogApikey", + 1044: "HashiCorpVaultBatchToken", } DetectorType_value = map[string]int32{ "Alibaba": 0, @@ -3237,6 +3239,7 @@ var ( "GoogleGeminiAPIKey": 1041, "ArtifactoryReferenceToken": 1042, "DatadogApikey": 1043, + "HashiCorpVaultBatchToken": 1044, } ) @@ -3690,7 +3693,7 @@ var file_detectors_proto_rawDesc = []byte{ 0x4c, 0x41, 0x49, 0x4e, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x42, 0x41, 0x53, 0x45, 0x36, 0x34, 0x10, 0x02, 0x12, 0x09, 0x0a, 0x05, 0x55, 0x54, 0x46, 0x31, 0x36, 0x10, 0x03, 0x12, 0x13, 0x0a, 0x0f, 0x45, 0x53, 0x43, 0x41, 0x50, 0x45, 0x44, 0x5f, 0x55, 0x4e, 0x49, 0x43, 0x4f, 0x44, 0x45, - 0x10, 0x04, 0x2a, 0xa4, 0x87, 0x01, 0x0a, 0x0c, 0x44, 0x65, 0x74, 0x65, 0x63, 0x74, 0x6f, 0x72, + 0x10, 0x04, 0x2a, 0xc3, 0x87, 0x01, 0x0a, 0x0c, 0x44, 0x65, 0x74, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x41, 0x6c, 0x69, 0x62, 0x61, 0x62, 0x61, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x41, 0x4d, 0x51, 0x50, 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x41, 0x57, 0x53, 0x10, 0x02, 0x12, 0x09, 0x0a, 0x05, 0x41, 0x7a, 0x75, 0x72, 0x65, 0x10, 0x03, 0x12, @@ -4772,12 +4775,13 @@ var file_detectors_proto_rawDesc = []byte{ 0x4b, 0x65, 0x79, 0x10, 0x91, 0x08, 0x12, 0x1e, 0x0a, 0x19, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x79, 0x52, 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x10, 0x92, 0x08, 0x12, 0x12, 0x0a, 0x0d, 0x44, 0x61, 0x74, 0x61, 0x64, 0x6f, - 0x67, 0x41, 0x70, 0x69, 0x6b, 0x65, 0x79, 0x10, 0x93, 0x08, 0x42, 0x3d, 0x5a, 0x3b, 0x67, 0x69, - 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x74, 0x72, 0x75, 0x66, 0x66, 0x6c, 0x65, - 0x73, 0x65, 0x63, 0x75, 0x72, 0x69, 0x74, 0x79, 0x2f, 0x74, 0x72, 0x75, 0x66, 0x66, 0x6c, 0x65, - 0x68, 0x6f, 0x67, 0x2f, 0x76, 0x33, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x70, 0x62, 0x2f, 0x64, 0x65, - 0x74, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x33, + 0x67, 0x41, 0x70, 0x69, 0x6b, 0x65, 0x79, 0x10, 0x93, 0x08, 0x12, 0x1d, 0x0a, 0x18, 0x48, 0x61, + 0x73, 0x68, 0x69, 0x43, 0x6f, 0x72, 0x70, 0x56, 0x61, 0x75, 0x6c, 0x74, 0x42, 0x61, 0x74, 0x63, + 0x68, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x10, 0x94, 0x08, 0x42, 0x3d, 0x5a, 0x3b, 0x67, 0x69, 0x74, + 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x74, 0x72, 0x75, 0x66, 0x66, 0x6c, 0x65, 0x73, + 0x65, 0x63, 0x75, 0x72, 0x69, 0x74, 0x79, 0x2f, 0x74, 0x72, 0x75, 0x66, 0x66, 0x6c, 0x65, 0x68, + 0x6f, 0x67, 0x2f, 0x76, 0x33, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x70, 0x62, 0x2f, 0x64, 0x65, 0x74, + 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/proto/detectors.proto b/proto/detectors.proto index bb7bbe69dcff..0657c218675c 100644 --- a/proto/detectors.proto +++ b/proto/detectors.proto @@ -1053,6 +1053,7 @@ enum DetectorType { GoogleGeminiAPIKey = 1041; ArtifactoryReferenceToken = 1042; DatadogApikey = 1043; + HashiCorpVaultBatchToken = 1044; } message Result {