trufflesecurity · dipto-truffle · Mar 27, 2026 · Mar 31, 2026 · shahzadhaider1 · Apr 2, 2026
@@ -3,8 +3,10 @@ package v2
 import (
 	"context"
 	"errors"
+	"maps"
 	"net/http"
 	"regexp"
+	"slices"
 	"strings"
 
 	"github.com/trufflesecurity/trufflehog/v3/pkg/common"
@@ -74,21 +76,31 @@ func ProcessData(ctx context.Context, clientSecrets, clientIds, tenantIds map[st
 	logCtx := logContext.AddLogger(ctx)
 	invalidClientsForTenant := make(map[string]map[string]struct{})
 
+	// Clone maps so verification-driven deletions don't mutate the caller's
+	// data or produce non-deterministic results across scanner runs.
+	activeClients := maps.Clone(clientIds)
+	activeTenants := maps.Clone(tenantIds)
+
 SecretLoop:
-	for clientSecret := range clientSecrets {
+	for _, clientSecret := range slices.Sorted(maps.Keys(clientSecrets)) {
 		var (
 			r        *detectors.Result
 			clientId string
 			tenantId string
 		)
 
 	ClientLoop:
-		for cId := range clientIds {
+		for _, cId := range slices.Sorted(maps.Keys(activeClients)) {
+			if _, ok := activeClients[cId]; !ok {
+				continue
+			}
 			clientId = cId
-			for tId := range tenantIds {
+			for _, tId := range slices.Sorted(maps.Keys(activeTenants)) {
+				if _, ok := activeTenants[tId]; !ok {
+					continue
+				}
 				tenantId = tId
 
-				// Skip known invalid tenants.
 				invalidClients := invalidClientsForTenant[tenantId]
 				if invalidClients == nil {
 					invalidClients = map[string]struct{}{}
@@ -100,15 +112,12 @@ SecretLoop:
 
 				if verify {
 					if !azure_entra.TenantExists(logCtx, client, tenantId) {
-						// Tenant doesn't exist
-						delete(tenantIds, tenantId)
+						delete(activeTenants, tenantId)
 						continue
 					}
-					// Tenant exists, ensure this isn't attempted as a clientId.
-					delete(clientIds, tenantId)
+					delete(activeClients, tenantId)
 
 					isVerified, extraData, verificationErr := serviceprincipal.VerifyCredentials(ctx, client, tenantId, clientId, clientSecret)
-					// Handle errors.
 					if verificationErr != nil {
 						switch {
 						case errors.Is(verificationErr, serviceprincipal.ErrConditionalAccessPolicy):
@@ -118,18 +127,15 @@ SecretLoop:
 						case errors.Is(verificationErr, serviceprincipal.ErrSecretExpired):
 							continue SecretLoop
 						case errors.Is(verificationErr, serviceprincipal.ErrTenantNotFound):
-							// Tenant doesn't exist. This shouldn't happen with the check above.
-							delete(tenantIds, tenantId)
+							delete(activeTenants, tenantId)
 							continue
 						case errors.Is(verificationErr, serviceprincipal.ErrClientNotFoundInTenant):
-							// Tenant is valid but the ClientID doesn't exist.
 							invalidClients[clientId] = struct{}{}
 							continue
 						}
 					}
 
-					// The result is verified or there's only one associated client and tenant.
-					if isVerified || (len(clientIds) == 1 && len(tenantIds) == 1) {
+					if isVerified || (len(activeClients) == 1 && len(activeTenants) == 1) {
 						r = createResult(tenantId, clientId, clientSecret, isVerified, extraData, verificationErr)
 						break ClientLoop
 					}
@@ -138,11 +144,10 @@ SecretLoop:
 		}
 
 		if r == nil {
-			// Only include the clientId and tenantId if we're confident which one it is.
-			if len(clientIds) != 1 {
+			if len(activeClients) != 1 {
 				clientId = ""
 			}
-			if len(tenantIds) != 1 {
+			if len(activeTenants) != 1 {
 				tenantId = ""
 			}
 			r = createResult(tenantId, clientId, clientSecret, false, nil, nil)

@@ -0,0 +1,173 @@
+package v2
+
+import (
+	"context"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// TestProcessData_RawV2DependsOnIDCount shows that the same Azure client
+// secret produces different RawV2 values depending on how many candidate
+// client/tenant IDs appear in the chunk.
+//
+// With verify=false, ProcessData populates RawV2 only when there is exactly
+// one client ID and exactly one tenant ID. If either set is ambiguous (>1),
+// the IDs are cleared and RawV2 is nil.
+//
+// This is the root cause of CSM-1857's secondary issue: the same logical
+// secret can get different hash_v2 values across scans if the surrounding
+// chunk context changes, producing duplicate secret rows in the database.
+func TestProcessData_RawV2DependsOnIDCount(t *testing.T) {
+	const (
+		clientSecret = "abc4Q~fake-secret-that-is-long-enough1234567"
+		clientID1    = "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
+		clientID2    = "f9e8d7c6-b5a4-3210-fedc-ba9876543210"
+		tenantID1    = "11111111-2222-3333-4444-555566667777"
+		tenantID2    = "aaaaaaaa-bbbb-cccc-dddd-eeeeffff0000"
+	)
+
+	tests := []struct {
+		name      string
+		clientIDs map[string]struct{}
+		tenantIDs map[string]struct{}
+		wantRawV2 bool
+	}{
+		{
+			name:      "single client and tenant",
+			clientIDs: map[string]struct{}{clientID1: {}},
+			tenantIDs: map[string]struct{}{tenantID1: {}},
+			wantRawV2: true,
+		},
+		{
+			name:      "multiple clients clears IDs",
+			clientIDs: map[string]struct{}{clientID1: {}, clientID2: {}},
+			tenantIDs: map[string]struct{}{tenantID1: {}},
+			wantRawV2: false,
+		},
+		{
+			name:      "multiple tenants clears IDs",
+			clientIDs: map[string]struct{}{clientID1: {}},
+			tenantIDs: map[string]struct{}{tenantID1: {}, tenantID2: {}},
+			wantRawV2: false,
+		},
+		{
+			name:      "multiple clients and tenants clears IDs",
+			clientIDs: map[string]struct{}{clientID1: {}, clientID2: {}},
+			tenantIDs: map[string]struct{}{tenantID1: {}, tenantID2: {}},
+			wantRawV2: false,
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			secrets := map[string]struct{}{clientSecret: {}}
+			results := ProcessData(context.Background(), secrets, tc.clientIDs, tc.tenantIDs, false, nil)
+
+			require.Len(t, results, 1)
+			assert.Equal(t, []byte(clientSecret), results[0].Raw, "Raw should always be the client secret")
+
+			if tc.wantRawV2 {
+				assert.NotNil(t, results[0].RawV2, "RawV2 should be populated with unambiguous IDs")
+			} else {
+				assert.Nil(t, results[0].RawV2, "RawV2 should be nil when IDs are ambiguous")
+			}
+		})
+	}
+}
+
+// TestProcessData_DeterministicRawV2 verifies that ProcessData produces
+// identical RawV2 on repeated calls with the same inputs. Before the sorted
+// iteration fix, Go map randomization could cause different (clientId, tenantId)
+// pairings across runs.
+func TestProcessData_DeterministicRawV2(t *testing.T) {
+	const (
+		clientSecret = "abc4Q~fake-secret-that-is-long-enough1234567"
+		clientID     = "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
+		tenantID     = "11111111-2222-3333-4444-555566667777"
+	)
+
+	var firstRawV2 []byte
+	for i := 0; i < 50; i++ {
+		results := ProcessData(
+			context.Background(),
+			map[string]struct{}{clientSecret: {}},
+			map[string]struct{}{clientID: {}},
+			map[string]struct{}{tenantID: {}},
+			false, nil,
+		)
+		require.Len(t, results, 1)
+		if i == 0 {
+			firstRawV2 = results[0].RawV2
+			require.NotNil(t, firstRawV2)
+		} else {
+			assert.Equal(t, firstRawV2, results[0].RawV2,
+				"RawV2 must be identical across repeated calls (iteration %d)", i)
+		}
+	}
+}
+
+// TestProcessData_DoesNotMutateCallerMaps verifies that ProcessData does not
+// modify the maps passed by the caller.
+func TestProcessData_DoesNotMutateCallerMaps(t *testing.T) {
+	const clientSecret = "abc4Q~fake-secret-that-is-long-enough1234567"
+
+	secrets := map[string]struct{}{clientSecret: {}}
+	clientIDs := map[string]struct{}{
+		"a1b2c3d4-e5f6-7890-abcd-ef1234567890": {},
+		"f9e8d7c6-b5a4-3210-fedc-ba9876543210": {},
+	}
+	tenantIDs := map[string]struct{}{
+		"11111111-2222-3333-4444-555566667777": {},
+		"aaaaaaaa-bbbb-cccc-dddd-eeeeffff0000": {},
+	}
+
+	origSecretLen := len(secrets)
+	origClientLen := len(clientIDs)
+	origTenantLen := len(tenantIDs)
+
+	_ = ProcessData(context.Background(), secrets, clientIDs, tenantIDs, false, nil)
+
+	assert.Len(t, secrets, origSecretLen, "caller's secrets map must not be mutated")
+	assert.Len(t, clientIDs, origClientLen, "caller's clientIDs map must not be mutated")
+	assert.Len(t, tenantIDs, origTenantLen, "caller's tenantIDs map must not be mutated")
+}
+
+// TestProcessData_SameSecretDifferentRawV2 demonstrates the chain:
+// the same client secret scanned with different chunk contexts produces
+// different RawV2 bytes depending on whether IDs are ambiguous.
+func TestProcessData_SameSecretDifferentRawV2(t *testing.T) {
+	const (
+		clientSecret = "abc4Q~fake-secret-that-is-long-enough1234567"
+		clientID     = "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
+		tenantID     = "11111111-2222-3333-4444-555566667777"
+	)
+
+	// Scan 1: chunk has exactly one client ID and one tenant ID.
+	results1 := ProcessData(
+		context.Background(),
+		map[string]struct{}{clientSecret: {}},
+		map[string]struct{}{clientID: {}},
+		map[string]struct{}{tenantID: {}},
+		false, nil,
+	)
+	require.Len(t, results1, 1)
+	rawV2Populated := results1[0].RawV2
+	require.NotNil(t, rawV2Populated, "scan 1: RawV2 should be populated")
+
+	// Scan 2: same secret, but chunk now contains an extra client-like UUID.
+	results2 := ProcessData(
+		context.Background(),
+		map[string]struct{}{clientSecret: {}},
+		map[string]struct{}{clientID: {}, "f9e8d7c6-b5a4-3210-fedc-ba9876543210": {}},
+		map[string]struct{}{tenantID: {}},
+		false, nil,
+	)
+	require.Len(t, results2, 1)
+	rawV2Nil := results2[0].RawV2
+	assert.Nil(t, rawV2Nil, "scan 2: RawV2 should be nil due to ambiguous client IDs")
+
+	assert.NotEqual(t, rawV2Populated, rawV2Nil,
+		"same logical secret produces different RawV2 depending on chunk context")
+}