diff --git a/pkg/decoders/decoders.go b/pkg/decoders/decoders.go index c49eee403ff2..3cac3c36c833 100644 --- a/pkg/decoders/decoders.go +++ b/pkg/decoders/decoders.go @@ -12,6 +12,7 @@ func DefaultDecoders() []Decoder { &Base64{}, &UTF16{}, &EscapedUnicode{}, + &HTML{}, } } diff --git a/pkg/decoders/html.go b/pkg/decoders/html.go new file mode 100644 index 000000000000..93e7366bd1c9 --- /dev/null +++ b/pkg/decoders/html.go @@ -0,0 +1,251 @@ +package decoders + +import ( + "bytes" + "net/url" + "regexp" + "strings" + + "golang.org/x/net/html" + + "github.com/trufflesecurity/trufflehog/v3/pkg/feature" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/sources" +) + +// HTML is a decoder that extracts textual content from HTML documents. +// It produces a normalized view containing visible text, attribute values, +// script/style content, and HTML comments with entities and URL-encoding decoded. +// Gated at runtime by feature.HTMLDecoderEnabled. +type HTML struct{} + +func (d *HTML) Type() detectorspb.DecoderType { + return detectorspb.DecoderType_HTML +} + +var htmlTagPattern = regexp.MustCompile(`<[a-zA-Z][a-zA-Z0-9]*[\s>/]`) + +// highSignalAttrs are attribute names whose values are extracted into the +// decoded output because they commonly contain URLs, tokens, or other secrets. +var highSignalAttrs = map[string]bool{ + "href": true, + "src": true, + "action": true, + "value": true, + "content": true, + "alt": true, + "title": true, +} + +// syntaxHighlightPrefixes lists CSS class prefixes used by syntax highlighting +// libraries. Elements with these classes mark logical line boundaries in code +// blocks where the platform (e.g. Teams) strips actual newlines. +var syntaxHighlightPrefixes = []string{"hljs-"} + +// residualEntityReplacer decodes common HTML entities that survive double-encoding. +// When content is entity-encoded twice (e.g. &), the parser's first pass +// leaves residual entity sequences that this replacer cleans up. +var residualEntityReplacer = strings.NewReplacer( + "&", "&", + "<", "<", + ">", ">", + """, `"`, + "'", "'", + "'", "'", +) + +// invisibleReplacer strips zero-width and invisible Unicode codepoints that +// rich text editors may insert between characters, breaking detector regexes. +var invisibleReplacer = strings.NewReplacer( + "\u200B", "", // zero-width space + "\u200C", "", // zero-width non-joiner + "\u200D", "", // zero-width joiner + "\uFEFF", "", // byte order mark / zero-width no-break space + "\u00AD", "", // soft hyphen + "\u2060", "", // word joiner + "\u200E", "", // left-to-right mark + "\u200F", "", // right-to-left mark +) + +// blockElements insert newline boundaries when encountered during extraction. +var blockElements = map[string]bool{ + "p": true, "div": true, "br": true, "hr": true, + "h1": true, "h2": true, "h3": true, "h4": true, "h5": true, "h6": true, + "li": true, "ol": true, "ul": true, + "tr": true, "td": true, "th": true, "table": true, "thead": true, "tbody": true, "tfoot": true, + "blockquote": true, "section": true, "article": true, "header": true, "footer": true, + "pre": true, "address": true, "figcaption": true, "figure": true, + "details": true, "summary": true, "main": true, "nav": true, "aside": true, + "form": true, "fieldset": true, "legend": true, + "dd": true, "dt": true, "dl": true, + "script": true, "style": true, +} + +// rawTextElements are elements whose content the HTML parser treats as raw +// text (entities are NOT decoded). Residual entity decoding must be skipped +// for text nodes inside these elements to avoid corrupting literal sequences +// like & in JavaScript. +var rawTextElements = map[string]bool{ + "script": true, + "style": true, +} + +func (d *HTML) FromChunk(chunk *sources.Chunk) *DecodableChunk { + if !feature.HTMLDecoderEnabled.Load() { + return nil + } + if chunk == nil || len(chunk.Data) == 0 { + return nil + } + + if !looksLikeHTML(chunk.Data) { + return nil + } + + extracted := extractHTML(chunk.Data) + if len(extracted) == 0 { + return nil + } + + if bytes.Equal(chunk.Data, extracted) { + return nil + } + + chunk.Data = extracted + return &DecodableChunk{Chunk: chunk, DecoderType: d.Type()} +} + +func looksLikeHTML(data []byte) bool { + return htmlTagPattern.Match(data) +} + +func extractHTML(data []byte) []byte { + doc, err := html.Parse(bytes.NewReader(data)) + if err != nil { + return nil + } + + var buf bytes.Buffer + buf.Grow(len(data)) + + walkNode(&buf, doc, false) + + result := stripInvisible(buf.Bytes()) + return normalizeWhitespace(result) +} + +func walkNode(buf *bytes.Buffer, n *html.Node, inRawText bool) { + switch n.Type { + case html.TextNode: + text := n.Data + if text != "" { + if !inRawText { + text = residualEntityReplacer.Replace(text) + } + buf.WriteString(text) + } + + case html.CommentNode: + if content := strings.TrimSpace(n.Data); content != "" { + ensureNewline(buf) + buf.WriteString(content) + ensureNewline(buf) + } + + case html.ElementNode: + isBlock := blockElements[n.Data] + + if isBlock { + ensureNewline(buf) + } else if hasSyntaxHighlightClass(n) { + ensureNewline(buf) + } + + emitAttributes(buf, n) + + childRaw := inRawText || rawTextElements[n.Data] + for c := n.FirstChild; c != nil; c = c.NextSibling { + walkNode(buf, c, childRaw) + } + + if isBlock { + ensureNewline(buf) + } + + default: + for c := n.FirstChild; c != nil; c = c.NextSibling { + walkNode(buf, c, inRawText) + } + } +} + +func hasSyntaxHighlightClass(n *html.Node) bool { + for _, attr := range n.Attr { + if attr.Key != "class" { + continue + } + for _, cls := range strings.Fields(attr.Val) { + for _, prefix := range syntaxHighlightPrefixes { + if strings.HasPrefix(cls, prefix) { + return true + } + } + } + } + return false +} + +func emitAttributes(buf *bytes.Buffer, n *html.Node) { + for _, attr := range n.Attr { + isDataAttr := strings.HasPrefix(attr.Key, "data-") + if !highSignalAttrs[attr.Key] && !isDataAttr { + continue + } + val := strings.TrimSpace(attr.Val) + if val == "" || val == "#" { + continue + } + decoded, err := url.PathUnescape(val) + if err == nil && decoded != val { + val = decoded + } + ensureNewline(buf) + buf.WriteString(val) + ensureNewline(buf) + } +} + +func ensureNewline(buf *bytes.Buffer) { + if buf.Len() == 0 { + return + } + if buf.Bytes()[buf.Len()-1] != '\n' { + buf.WriteByte('\n') + } +} + +func stripInvisible(data []byte) []byte { + return []byte(invisibleReplacer.Replace(string(data))) +} + +// normalizeWhitespace collapses runs of blank lines and trims leading/trailing whitespace. +func normalizeWhitespace(data []byte) []byte { + lines := bytes.Split(data, []byte("\n")) + var result [][]byte + prevBlank := true + for _, line := range lines { + trimmed := bytes.TrimSpace(line) + if len(trimmed) == 0 { + if !prevBlank { + prevBlank = true + } + continue + } + if prevBlank && len(result) > 0 { + result = append(result, []byte("")) + } + result = append(result, trimmed) + prevBlank = false + } + return bytes.Join(result, []byte("\n")) +} diff --git a/pkg/decoders/html_test.go b/pkg/decoders/html_test.go new file mode 100644 index 000000000000..403ebdb2f93a --- /dev/null +++ b/pkg/decoders/html_test.go @@ -0,0 +1,474 @@ +package decoders + +import ( + "testing" + + "github.com/trufflesecurity/trufflehog/v3/pkg/feature" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/sources" +) + +func TestHTML_Type(t *testing.T) { + d := &HTML{} + if got := d.Type(); got != detectorspb.DecoderType_HTML { + t.Errorf("Type() = %v, want %v", got, detectorspb.DecoderType_HTML) + } +} + +// TestHTML_FromChunk verifies the HTML decoder extracts secrets from HTML content +// that sources like MS Teams and Confluence emit. The test cases are grouped by +// the category of extraction they exercise: +// +// - Guard clauses: nil, empty, and non-HTML input return nil. +// - Text node extraction: secrets split across inline tags are rejoined; +// HTML entities (&) are decoded by the parser. +// - Attribute value extraction: high-signal attrs (href, src, data-*, value, +// content, alt, title, action) are emitted; URL percent-encoding is decoded; +// empty/anchor-only hrefs are skipped. +// - Script / style / comment content: all included because they frequently +// contain embedded credentials. +// - Code and pre blocks: preserved verbatim (common secret location). +// - Whitespace and token boundaries: block elements (p, div, br, tr, td, li) +// insert newlines; inline elements preserve text continuity to avoid +// accidental token joins. +// - Real-world formats: Confluence storage-format HTML and Teams message HTML +// with secrets in typical positions. +// - Integration: a mixed-content case exercises text nodes, URL-decoded attrs, +// script content, and HTML comments in a single chunk. +func TestHTML_FromChunk(t *testing.T) { + tests := []struct { + name string + chunk *sources.Chunk + want string + wantNil bool + }{ + // --- Guard clauses: decoder returns nil for non-applicable input --- + { + name: "nil chunk", + chunk: nil, + wantNil: true, + }, + { + name: "empty data", + chunk: &sources.Chunk{Data: []byte{}}, + wantNil: true, + }, + { + name: "plain text (no HTML)", + chunk: &sources.Chunk{Data: []byte("just some plain text with no tags")}, + wantNil: true, + }, + + // --- Text node extraction --- + { + // Core scenario: a secret is split across formatting tags by the + // rich-text editor. The parser concatenates adjacent text nodes. + name: "secret split across span tags", + chunk: &sources.Chunk{Data: []byte(`
AKIA1234567890ABCDEF
`)}, + want: "AKIA1234567890ABCDEF", + }, + { + // Confluence/Teams encode '&' as '&'. The HTML parser + // automatically unescapes entities so detector regexes can match. + name: "HTML entities decoded", + chunk: &sources.Chunk{Data: []byte(`key=abc&secret=hunter2
`)}, + want: "key=abc&secret=hunter2", + }, + + // --- Attribute value extraction --- + { + // Secrets in href URLs (e.g. tokens in query params). + name: "attribute value extraction - href", + chunk: &sources.Chunk{Data: []byte(`link`)}, + want: "https://api.example.com?token=sk-live-1234\nlink", + }, + { + // Secrets in src URLs (e.g. image CDN tokens). + name: "attribute value extraction - src", + chunk: &sources.Chunk{Data: []byte(`text
`)}, + want: "text\nbody { background: url(\"https://cdn.com?key=secret\"); }", + }, + { + // Script following an inline element must NOT concatenate with + // the preceding text; it needs its own newline boundary. + name: "script adjacent to inline text gets boundary", + chunk: &sources.Chunk{Data: []byte(`text`)}, + want: "text\nvar key=\"secret\";", + }, + { + // Style following an inline element must NOT concatenate. + name: "style adjacent to inline text gets boundary", + chunk: &sources.Chunk{Data: []byte(`text`)}, + want: "text\n.x { color: red; }", + }, + { + // Entity-like sequences in script content are raw text and must + // NOT be decoded by the residual entity replacer. + name: "entities in script preserved as raw text", + chunk: &sources.Chunk{Data: []byte(``)}, + want: `var url = "a=1&b=2";`, + }, + { + // Entity-like sequences in style content are raw text. + name: "entities in style preserved as raw text", + chunk: &sources.Chunk{Data: []byte(``)}, + want: `body::after { content: "©"; }`, + }, + { + // HTML comments are a common place for debug credentials and + // TODO notes with hardcoded passwords. + name: "HTML comment content included", + chunk: &sources.Chunk{Data: []byte(`visible
`)}, + want: "visible\nTODO: remove hardcoded password=hunter2", + }, + + // --- Code and pre blocks --- + { + /// content is preserved verbatim; these blocks are a
+ // top location for pasted credentials and key exports.
+ name: "code/pre blocks preserved",
+ chunk: &sources.Chunk{Data: []byte(`export AWS_SECRET_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
`)},
+ want: "export AWS_SECRET_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
+ },
+ {
+ // Multi-line PEM private keys in blocks with
line breaks
+ // are reconstructed with proper newlines for detector matching.
+ name: "private key in pre block",
+ chunk: &sources.Chunk{Data: []byte(`-----BEGIN RSA PRIVATE KEY-----
MIIEpAIBAAKCAQEA04up8h
-----END RSA PRIVATE KEY-----
`)},
+ want: "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA04up8h\n-----END RSA PRIVATE KEY-----",
+ },
+
+ // --- Whitespace and token boundaries ---
+ {
+ // Block elements () produce newline boundaries so adjacent
+ // paragraphs don't merge tokens.
+ name: "block elements produce newlines",
+ chunk: &sources.Chunk{Data: []byte(`
first
second
`)},
+ want: "first\nsecond",
+ },
+ {
+ // All
variants produce newlines.
+ name: "br tags produce newlines",
+ chunk: &sources.Chunk{Data: []byte(`line1
line2
line3
`)},
+ want: "line1\nline2\nline3",
+ },
+ {
+ // Nested inline elements (, ) do not break the token;
+ // text flows continuously so "token=sk-live-abc123" stays intact.
+ name: "nested inline elements preserve text continuity",
+ chunk: &sources.Chunk{Data: []byte(`token=sk-live-abc123
`)},
+ want: "token=sk-live-abc123",
+ },
+ {
+ // elements are block-level: each cell gets its own line,
+ // keeping key/value pairs from merging.
+ name: "table with secrets",
+ chunk: &sources.Chunk{Data: []byte(
+ `API Key AKIAIOSFODNN7EXAMPLE ` +
+ `Secret wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
`,
+ )},
+ want: "API Key\nAKIAIOSFODNN7EXAMPLE\nSecret\nwJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
+ },
+ {
+ // Even without wrappers, still inserts block boundaries.
+ name: "td cells without enclosing tr still get block boundaries",
+ chunk: &sources.Chunk{Data: []byte(
+ `key value
`,
+ )},
+ want: "key\nvalue",
+ },
+ {
+ // elements produce separate lines.
+ name: "list items produce separate lines",
+ chunk: &sources.Chunk{Data: []byte(
+ `- token: abc123
- secret: def456
`,
+ )},
+ want: "token: abc123\nsecret: def456",
+ },
+
+ // --- Real-world source formats ---
+ {
+ // Confluence storage format: secrets split across tags,
+ // an AWS key in plain text, and an href with a URL. Exercises text
+ // node concatenation, attribute extraction, and block boundaries
+ // together.
+ name: "confluence storage format - real world",
+ chunk: &sources.Chunk{Data: []byte(
+ `Our API credentials:
` +
+ `Key: AKIAIOSFODNN7EXAMPLE
` +
+ `Secret: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
` +
+ `See AWS Console
`,
+ )},
+ want: "Our API credentials:\nKey: AKIAIOSFODNN7EXAMPLE\nSecret: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY\nSee\nhttps://console.aws.amazon.com\nAWS Console",
+ },
+ {
+ // Teams message HTML: nested wrappers around tags
+ // containing a GitHub PAT. Verifies that redundant block wrappers
+ // collapse to clean newlines.
+ name: "teams message HTML - real world",
+ chunk: &sources.Chunk{Data: []byte(
+ `
` +
+ `Here is the token for the staging env:
` +
+ `ghp_ABCDEFghijklmnop1234567890abcde
` +
+ ``,
+ )},
+ want: "Here is the token for the staging env:\nghp_ABCDEFghijklmnop1234567890abcde",
+ },
+
+ // --- Syntax highlight boundary detection ---
+ {
+ // Teams renders code blocks as adjacent elements within a
+ // single , using highlight.js classes for syntax coloring.
+ // Newlines from the original code are lost. The decoder detects
+ // hljs-* classes and inserts newlines at those boundaries while
+ // still concatenating non-hljs sibling spans (preserving
+ // mid-token color splits like the value below split across 3 spans).
+ name: "teams code block with hljs syntax highlighting",
+ chunk: &sources.Chunk{Data: []byte(
+ `
` +
+ `[header]` +
+ `key_one` +
+ ` = FIRST_VALUE_ABCDEFGH` +
+ `key_two` +
+ ` = SECOND_VAL_PART_` +
+ `X` +
+ `_END_OF_VALUE` +
+ `format` +
+ ` = json` +
+ `
`,
+ )},
+ want: "[header]\nkey_one = FIRST_VALUE_ABCDEFGH\nkey_two = SECOND_VAL_PART_X_END_OF_VALUE\nformat = json",
+ },
+ {
+ // Spans without hljs classes must still concatenate, preserving
+ // the existing split-secret behavior even when hljs spans are
+ // present elsewhere in the document.
+ name: "non-hljs sibling spans still concatenate",
+ chunk: &sources.Chunk{Data: []byte(
+ `SECRET_FIRST_HALF_1234
`,
+ )},
+ want: "SECRET_FIRST_HALF_1234",
+ },
+ {
+ // Various hljs-* class names (not just hljs-function) should
+ // all trigger line boundaries.
+ name: "multiple hljs class variants trigger boundaries",
+ chunk: &sources.Chunk{Data: []byte(
+ `` +
+ `const` +
+ ` x = ` +
+ `"value_one"` +
+ `const` +
+ ` y = ` +
+ `"value_two"` +
+ `
`,
+ )},
+ want: "const x =\n\"value_one\"\nconst y =\n\"value_two\"",
+ },
+
+ // --- Zero-width / invisible character stripping ---
+ {
+ // Zero-width spaces inserted between characters by rich text editors
+ // are stripped so detector regexes can match the full token.
+ name: "zero-width space stripped from secret",
+ chunk: &sources.Chunk{Data: []byte("TOKEN_\u200BABCDEF_1234
")},
+ want: "TOKEN_ABCDEF_1234",
+ },
+ {
+ // Multiple invisible codepoint types mixed into a single token.
+ name: "multiple invisible character types stripped",
+ chunk: &sources.Chunk{Data: []byte("SECRET\u200C_VALUE\u00AD_HERE\u2060_END\uFEFF
")},
+ want: "SECRET_VALUE_HERE_END",
+ },
+
+ // --- SVG xlink:href attribute extraction ---
+ {
+ // SVG elements use xlink:href for URLs which may contain tokens.
+ name: "xlink:href extracted from SVG element",
+ chunk: &sources.Chunk{Data: []byte(``)},
+ want: "https://api.example.com?token=secret_value_123\nicon",
+ },
+
+ // --- Double-encoded HTML entity decoding ---
+ {
+ // Content double-encoded as & becomes & after the parser's
+ // first pass; the residual entity replacer decodes it to &.
+ name: "double-encoded ampersand decoded",
+ chunk: &sources.Chunk{Data: []byte(`key=abc&secret=val
`)},
+ want: "key=abc&secret=val",
+ },
+ {
+ // Single-encoded entities are handled by the parser; verify the
+ // residual replacer does not corrupt already-decoded content.
+ name: "single-encoded entities not double-decoded",
+ chunk: &sources.Chunk{Data: []byte(`5 > 3 & 2 < 4
`)},
+ want: "5 > 3 & 2 < 4",
+ },
+
+ // --- Integration: all extraction types in one chunk ---
+ {
+ // Combines text nodes (split across spans), URL-decoded attribute
+ // values, inline script content, and an HTML comment -- all in a
+ // single chunk. Verifies the decoder handles the full extraction
+ // surface simultaneously.
+ name: "mixed content with all extraction types",
+ chunk: &sources.Chunk{Data: []byte(
+ `API key: AKIA1234567890ABCDEF
` +
+ `See docs
` +
+ `` +
+ ``,
+ )},
+ want: "API key: AKIA1234567890ABCDEF\nSee\nhttps://api.example.com?token=sk-live_1234\ndocs\nvar secret = \"ghp_abc123def456\";\nTODO: remove hardcoded password=hunter2",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ feature.HTMLDecoderEnabled.Store(true)
+ defer feature.HTMLDecoderEnabled.Store(false)
+
+ d := &HTML{}
+ got := d.FromChunk(tt.chunk)
+
+ if tt.wantNil {
+ if got != nil {
+ t.Errorf("FromChunk() = %q, want nil", string(got.Chunk.Data))
+ }
+ return
+ }
+
+ if got == nil {
+ t.Fatalf("FromChunk() returned nil, want %q", tt.want)
+ }
+ if got.DecoderType != detectorspb.DecoderType_HTML {
+ t.Errorf("DecoderType = %v, want %v", got.DecoderType, detectorspb.DecoderType_HTML)
+ }
+ if string(got.Chunk.Data) != tt.want {
+ t.Errorf("FromChunk() data =\n%q\nwant:\n%q", string(got.Chunk.Data), tt.want)
+ }
+ })
+ }
+}
+
+// TestHTML_FeatureFlagDisabled verifies that the decoder is a no-op when
+// feature.HTMLDecoderEnabled is false.
+func TestHTML_FeatureFlagDisabled(t *testing.T) {
+ feature.HTMLDecoderEnabled.Store(false)
+ d := &HTML{}
+ chunk := &sources.Chunk{Data: []byte(`secret: hunter2
`)}
+ if got := d.FromChunk(chunk); got != nil {
+ t.Errorf("FromChunk() should return nil when disabled, got %q", string(got.Chunk.Data))
+ }
+}
+
+// TestHTML_FeatureFlagEnabled verifies that the decoder processes HTML normally
+// when feature.HTMLDecoderEnabled is true.
+func TestHTML_FeatureFlagEnabled(t *testing.T) {
+ feature.HTMLDecoderEnabled.Store(true)
+ defer feature.HTMLDecoderEnabled.Store(false)
+
+ d := &HTML{}
+ chunk := &sources.Chunk{Data: []byte(`secret: hunter2
`)}
+ got := d.FromChunk(chunk)
+ if got == nil {
+ t.Fatal("FromChunk() returned nil, want decoded chunk")
+ }
+ if string(got.Chunk.Data) != "secret: hunter2" {
+ t.Errorf("FromChunk() data = %q, want %q", string(got.Chunk.Data), "secret: hunter2")
+ }
+}
+
+// TestLooksLikeHTML verifies the fast heuristic that decides whether chunk data
+// is worth parsing as HTML. It must accept valid HTML tags (including self-closing
+// and attribute-bearing) while rejecting plain text, arithmetic comparisons, and
+// bare HTML entities -- all of which could appear in non-HTML source content.
+func TestLooksLikeHTML(t *testing.T) {
+ tests := []struct {
+ name string
+ data string
+ want bool
+ }{
+ {"simple tag", "hello
", true},
+ {"self-closing", "
", true},
+ {"with attributes", ``, true},
+ {"plain text", "no html here", false},
+ {"angle brackets but not HTML", "5 < 10 and 20 > 15", false},
+ {"XML-like", "content ", true},
+ {"just less-than", "a < b", false},
+ {"html entity only", "& <", false},
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ if got := looksLikeHTML([]byte(tt.data)); got != tt.want {
+ t.Errorf("looksLikeHTML(%q) = %v, want %v", tt.data, got, tt.want)
+ }
+ })
+ }
+}
diff --git a/pkg/feature/feature.go b/pkg/feature/feature.go
index 080788c0218c..3aa92a3759a6 100644
--- a/pkg/feature/feature.go
+++ b/pkg/feature/feature.go
@@ -15,6 +15,7 @@ var (
UseGitMirror atomic.Bool
GitlabProjectsPerPage atomic.Int64
UseGithubGraphQLAPI atomic.Bool // use github graphql api to fetch issues, pr's and comments
+ HTMLDecoderEnabled atomic.Bool
)
type AtomicString struct {
diff --git a/pkg/pb/detectorspb/detectors.pb.go b/pkg/pb/detectorspb/detectors.pb.go
index 3ec2468eb95d..b942e1d75dc7 100644
--- a/pkg/pb/detectorspb/detectors.pb.go
+++ b/pkg/pb/detectorspb/detectors.pb.go
@@ -28,6 +28,7 @@ const (
DecoderType_BASE64 DecoderType = 2
DecoderType_UTF16 DecoderType = 3
DecoderType_ESCAPED_UNICODE DecoderType = 4
+ DecoderType_HTML DecoderType = 5
)
// Enum value maps for DecoderType.
@@ -38,6 +39,7 @@ var (
2: "BASE64",
3: "UTF16",
4: "ESCAPED_UNICODE",
+ 5: "HTML",
}
DecoderType_value = map[string]int32{
"UNKNOWN": 0,
@@ -45,6 +47,7 @@ var (
"BASE64": 2,
"UTF16": 3,
"ESCAPED_UNICODE": 4,
+ "HTML": 5,
}
)
diff --git a/proto/detectors.proto b/proto/detectors.proto
index 88829dd17d3c..076b848a6c09 100644
--- a/proto/detectors.proto
+++ b/proto/detectors.proto
@@ -10,6 +10,7 @@ enum DecoderType {
BASE64 = 2;
UTF16 = 3;
ESCAPED_UNICODE = 4;
+ HTML = 5;
}
enum DetectorType {