-
Notifications
You must be signed in to change notification settings - Fork 2.3k
Expand file tree
/
Copy pathcustom_detectors.go
More file actions
424 lines (366 loc) · 12 KB
/
custom_detectors.go
File metadata and controls
424 lines (366 loc) · 12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
package custom_detectors
import (
"bytes"
"context"
"encoding/json"
"io"
"maps"
"net/http"
"regexp"
"slices"
"strings"
"golang.org/x/sync/errgroup"
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/custom_detectorspb"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detector_typepb"
)
// The maximum number of matches from one chunk. This const is used when
// permutating each regex match to protect the scanner from doing too much work
// for poorly defined regexps.
const maxTotalMatches = 100
// CustomRegexWebhook is a CustomRegex with webhook validation that is
// guaranteed to be valid (assuming the data is not changed after
// initialization).
type CustomRegexWebhook struct {
*custom_detectorspb.CustomRegex
}
// Ensure the Scanner satisfies the interface at compile time.
var _ detectors.Detector = (*CustomRegexWebhook)(nil)
var _ detectors.CustomFalsePositiveChecker = (*CustomRegexWebhook)(nil)
var _ detectors.MaxSecretSizeProvider = (*CustomRegexWebhook)(nil)
// NewWebhookCustomRegex initializes and validates a CustomRegexWebhook. An
// unexported type is intentionally returned here to ensure the values have
// been validated.
func NewWebhookCustomRegex(pb *custom_detectorspb.CustomRegex) (*CustomRegexWebhook, error) {
// TODO: Return all validation errors.
if err := ValidateKeywords(pb.Keywords); err != nil {
return nil, err
}
if err := ValidateRegex(pb.Regex); err != nil {
return nil, err
}
if err := ValidateRegexSlice(pb.ExcludeRegexesCapture); err != nil {
return nil, err
}
if err := ValidateRegexSlice(pb.ExcludeRegexesMatch); err != nil {
return nil, err
}
if err := ValidatePrimaryRegexName(pb.PrimaryRegexName, pb.Regex); err != nil {
return nil, err
}
for _, verify := range pb.Verify {
if err := ValidateVerifyEndpoint(verify.Endpoint, verify.Unsafe); err != nil {
return nil, err
}
if err := ValidateVerifyHeaders(verify.Headers); err != nil {
return nil, err
}
}
// Ensure primary regex name is set.
ensurePrimaryRegexNameSet(pb)
// TODO: Copy only necessary data out of pb.
return &CustomRegexWebhook{pb}, nil
}
var httpClient = common.SaneHttpClient()
func (c *CustomRegexWebhook) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
dataStr := string(data)
regexMatches := make(map[string][][]string, len(c.GetRegex()))
// Compile exclude regexes targeting the capture group
excludeRegexesCapture := make([]*regexp.Regexp, 0, len(c.GetExcludeRegexesCapture()))
for _, exclude := range c.GetExcludeRegexesCapture() {
regex, err := regexp.Compile(exclude)
if err != nil {
// This will only happen if the regex is invalid.
return nil, err
}
excludeRegexesCapture = append(excludeRegexesCapture, regex)
}
// Compile exclude regexes targeting the entire match
excludeRegexes := make([]*regexp.Regexp, 0, len(c.GetExcludeRegexesMatch()))
for _, exclude := range c.GetExcludeRegexesMatch() {
regex, err := regexp.Compile(exclude)
if err != nil {
// This will only happen if the regex is invalid.
return nil, err
}
excludeRegexes = append(excludeRegexes, regex)
}
// Find all submatches for each regex.
for name, regex := range c.GetRegex() {
regex, err := regexp.Compile(regex)
if err != nil {
// This will only happen if the regex is invalid.
return nil, err
}
regexMatches[name] = regex.FindAllStringSubmatch(dataStr, -1)
}
// Permutate each individual match.
// {
// "foo": [["match1"]]
// "bar": [["match2"], ["match3"]]
// }
// becomes
// [
// {"foo": ["match1"], "bar": ["match2"]},
// {"foo": ["match1"], "bar": ["match3"]},
// ]
matches := permutateMatches(regexMatches)
g := new(errgroup.Group)
// Create result object and test for verification.
resultsCh := make(chan detectors.Result, maxTotalMatches)
MatchLoop:
for _, match := range matches {
for key, values := range match {
// attempt to use capture group
secret := values[0]
if len(values) > 1 {
secret = values[1]
}
// check entropy
entropy := c.GetEntropy()
if entropy > 0.0 && detectors.StringShannonEntropy(secret) < float64(entropy) {
continue MatchLoop
}
// check for exclude words
for _, excludeWord := range c.GetExcludeWords() {
if strings.Contains(strings.ToLower(secret), excludeWord) {
continue MatchLoop
}
}
// exclude checks
for _, excludeMatch := range excludeRegexes {
if excludeMatch.MatchString(values[0]) {
continue MatchLoop
}
}
// exclude secret (capture group), or if no capture group is set,
// check against entire match.
for _, excludeSecret := range excludeRegexesCapture {
if excludeSecret.MatchString(secret) {
continue MatchLoop
}
}
if validations := c.GetValidations(); validations != nil {
validationRules := []struct {
enabled bool
validator func(string) bool
}{
{validations[key].GetContainsDigit(), ContainsDigit},
{validations[key].GetContainsLowercase(), ContainsLowercase},
{validations[key].GetContainsUppercase(), ContainsUppercase},
{validations[key].GetContainsSpecialChar(), ContainsSpecialChar},
}
for _, rule := range validationRules {
if rule.enabled && !rule.validator(secret) {
// skip this match if a validation rule is enabled but missing from the secret
continue MatchLoop
}
}
}
}
g.Go(func() error {
return c.createResults(ctx, match, verify, resultsCh)
})
}
// Ignore any errors and collect as many of the results as we can.
_ = g.Wait()
close(resultsCh)
for result := range resultsCh {
if result.ExtraData != nil {
result.ExtraData["name"] = c.GetName()
}
results = append(results, result)
}
return results, nil
}
func (c *CustomRegexWebhook) IsFalsePositive(_ detectors.Result) (bool, string) {
return false, ""
}
// custom max size for custom detector
func (c *CustomRegexWebhook) MaxSecretSize() int64 {
return 1000
}
func (c *CustomRegexWebhook) createResults(ctx context.Context, match map[string][]string, verify bool, results chan<- detectors.Result) error {
if common.IsDone(ctx) {
// TODO: Log we're possibly leaving out results.
return ctx.Err()
}
result := detectors.Result{
DetectorType: detector_typepb.DetectorType_CustomRegex,
DetectorName: c.GetName(),
ExtraData: map[string]string{},
}
var raw string
for _, key := range slices.Sorted(maps.Keys(match)) {
values := match[key]
// values[0] contains the entire regex match.
secret := values[0]
fullMatch := values[0]
if len(values) > 1 {
secret = values[1]
}
raw += secret
// We set the full regex match as the primary secret value.
// Reasoning:
// The engine calculates the line number using the match. When a primary secret is set, it uses that value instead of the raw secret.
// While the secret match itself is sufficient to calculate the line number, the same group match could appear elsewhere in the data.
// To avoid ambiguity, we store the full regex match as the primary secret value.
// This primary secret value is used only for identifying the exact line number and is not used anywhere else.
// Example:
// Full regex match: secret = ABC123
// Secret (raw): ABC123
// In this case, the primary secret value stores the full string `secret = ABC123`,
// allowing the engine to pinpoint the exact location and avoid matching redundant occurrences of `ABC123` in the data.
if c.PrimaryRegexName == key {
result.SetPrimarySecretValue(fullMatch)
}
}
result.Raw = []byte(raw)
if !verify {
select {
case <-ctx.Done():
return ctx.Err()
case results <- result:
return nil
}
}
// Verify via webhook.
jsonBody, err := json.Marshal(map[string]map[string][]string{
c.GetName(): match,
})
if err != nil {
// This should never happen, but if it does, return nil to not
// disrupt other verification.
return nil
}
// Try each config until we successfully verify.
for _, verifyConfig := range c.GetVerify() {
if common.IsDone(ctx) {
// TODO: Log we're possibly leaving out results.
return ctx.Err()
}
req, err := http.NewRequestWithContext(ctx, "POST", verifyConfig.GetEndpoint(), bytes.NewReader(jsonBody))
if err != nil {
continue
}
for _, header := range verifyConfig.GetHeaders() {
key, value, found := strings.Cut(header, ":")
if !found {
// Should be unreachable due to validation.
continue
}
req.Header.Add(key, strings.TrimLeft(value, "\t\n\v\f\r "))
}
resp, err := httpClient.Do(req)
if err != nil {
continue
}
defer func() {
_, _ = io.Copy(io.Discard, resp.Body)
_ = resp.Body.Close()
}()
if resp.StatusCode == http.StatusOK {
// mark the result as verified
result.Verified = true
body, err := io.ReadAll(resp.Body)
if err != nil {
continue
}
// TODO: handle different content-type responses seperatly when implement custom detector configurations
responseStr := string(body)
// truncate to 200 characters if response length exceeds 200
if len(responseStr) > 200 {
responseStr = responseStr[:200]
}
// store the processed response in ExtraData
result.ExtraData["response"] = responseStr
break
}
}
select {
case <-ctx.Done():
return ctx.Err()
case results <- result:
return nil
}
}
func (c *CustomRegexWebhook) Keywords() []string {
return c.GetKeywords()
}
// productIndices produces a permutation of indices for each length. Example:
// productIndices(3, 2) -> [[0 0] [1 0] [2 0] [0 1] [1 1] [2 1]]. It returns
// a slice of length no larger than maxTotalMatches.
func productIndices(lengths ...int) [][]int {
count := 1
for _, l := range lengths {
count *= l
}
if count == 0 {
return nil
}
if count > maxTotalMatches {
count = maxTotalMatches
}
results := make([][]int, count)
for i := 0; i < count; i++ {
j := 1
result := make([]int, 0, len(lengths))
for _, l := range lengths {
result = append(result, (i/j)%l)
j *= l
}
results[i] = result
}
return results
}
// permutateMatches converts the list of all regex matches into all possible
// permutations selecting one from each named entry in the map. For example:
// {"foo": [matchA, matchB], "bar": [matchC]} becomes
//
// [{"foo": matchA, "bar": matchC}, {"foo": matchB, "bar": matchC}]
func permutateMatches(regexMatches map[string][][]string) []map[string][]string {
// Get a consistent order for names and their matching lengths.
// The lengths are used in calculating the permutation so order matters.
names := make([]string, 0, len(regexMatches))
lengths := make([]int, 0, len(regexMatches))
for key, value := range regexMatches {
names = append(names, key)
lengths = append(lengths, len(value))
}
// Permutate all the indices for each match. For example, if "foo" has
// [matchA, matchB] and "bar" has [matchC], we will get indices [0 0] [1 0].
permutationIndices := productIndices(lengths...)
// Build {"foo": matchA, "bar": matchC} and {"foo": matchB, "bar": matchC}
// from the indices.
var matches []map[string][]string
for _, permutation := range permutationIndices {
candidate := make(map[string][]string, len(permutationIndices))
for i, name := range names {
candidate[name] = regexMatches[name][permutation[i]]
}
matches = append(matches, candidate)
}
return matches
}
func (c *CustomRegexWebhook) Type() detector_typepb.DetectorType {
return detector_typepb.DetectorType_CustomRegex
}
const defaultDescription = "This is a user-defined detector with no description provided."
func (c *CustomRegexWebhook) Description() string {
if c.GetDescription() == "" {
return defaultDescription
}
return c.GetDescription()
}
// ensurePrimaryRegexNameSet sets the PrimaryRegexName field to the
// first regex name in sorted order if it is not already set.
// We're sorting to ensure deterministic behavior.
func ensurePrimaryRegexNameSet(pb *custom_detectorspb.CustomRegex) {
if pb.PrimaryRegexName == "" {
for _, name := range slices.Sorted(maps.Keys(pb.Regex)) {
pb.PrimaryRegexName = name
return
}
}
}