Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ var (
gitCloneTimeout = cli.Flag("git-clone-timeout", "Maximum time to spend cloning a repository, as a duration.").Hidden().Duration()
skipAdditionalRefs = cli.Flag("skip-additional-refs", "Skip additional references.").Bool()
userAgentSuffix = cli.Flag("user-agent-suffix", "Suffix to add to User-Agent.").String()
enableOCR = cli.Flag("enable-ocr", "Enable OCR scanning of images and video frames for secrets. Requires tesseract and ffmpeg.").Bool()

gitScan = cli.Command("git", "Find credentials in git repositories.")
gitScanURI = gitScan.Arg("uri", "Git repository URL. https://, file://, or ssh:// schema expected.").Required().String()
Expand Down Expand Up @@ -506,6 +507,10 @@ func run(state overseer.State, logSync func() error) {
feature.UserAgentSuffix.Store(*userAgentSuffix)
}

if *enableOCR {
feature.EnableOCR.Store(true)
}

// OSS Default APK handling on
feature.EnableAPKHandler.Store(true)

Expand Down
1 change: 1 addition & 0 deletions pkg/feature/feature.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ var (
UseGitMirror atomic.Bool
GitlabProjectsPerPage atomic.Int64
UseGithubGraphQLAPI atomic.Bool // use github graphql api to fetch issues, pr's and comments
EnableOCR atomic.Bool
)

type AtomicString struct {
Expand Down
25 changes: 25 additions & 0 deletions pkg/handlers/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ const (
rpmHandlerType handlerType = "rpm"
apkHandlerType handlerType = "apk"
defaultHandlerType handlerType = "default"
ocrHandlerType handlerType = "ocr"
apkExt = ".apk"
)

Expand Down Expand Up @@ -264,6 +265,15 @@ const (
jarMime mimeType = "application/java-archive"
msgMime mimeType = "application/vnd.ms-outlook"
docMime mimeType = "application/msword"

// Image MIME types for OCR.
pngMime mimeType = "image/png"
jpegMime mimeType = "image/jpeg"

// Video MIME types for OCR.
mp4Mime mimeType = "video/mp4"
mkvMime mimeType = "video/x-matroska"
webmMime mimeType = "video/webm"
)

// skipArchiverMimeTypes is a set of MIME types that should bypass archiver library processing because they are either
Expand Down Expand Up @@ -301,6 +311,11 @@ var skipArchiverMimeTypes = map[mimeType]struct{}{
apkMime: {},
msgMime: {},
docMime: {},
pngMime: {},
jpegMime: {},
mp4Mime: {},
mkvMime: {},
webmMime: {},
}

// selectHandler dynamically selects and configures a FileHandler based on the provided |mimetype| type and archive flag.
Expand All @@ -320,6 +335,16 @@ func selectHandler(mimeT mimeType, isGenericArchive bool) FileHandler {
return newRPMHandler()
case apkMime:
return newAPKHandler()
case pngMime, jpegMime:
if feature.EnableOCR.Load() {
return newOCRHandler()
}
return newDefaultHandler(defaultHandlerType)
case mp4Mime, mkvMime, webmMime:
if feature.EnableOCR.Load() {
return newOCRHandler()
}
return newDefaultHandler(defaultHandlerType)
default:
if isGenericArchive {
return newArchiveHandler()
Expand Down
283 changes: 283 additions & 0 deletions pkg/handlers/ocr.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,283 @@
package handlers

import (
"bytes"
"fmt"
"image"
"image/color"
_ "image/jpeg" // Register JPEG decoder for image.Decode.
"image/png"
"io"
"os"
"os/exec"
"path/filepath"
"sort"
"strings"
"time"

logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/feature"
)

const (
maxOCRImageSize = 50 * 1024 * 1024 // 50 MB
maxOCRVideoSize = 500 * 1024 * 1024 // 500 MB
frameIntervalSeconds = 1 // Extract 1 frame per second.
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interval constant incorrectly used as frame rate

Low Severity

The constant frameIntervalSeconds (named as a time interval) is passed directly to ffmpeg's fps filter, which expects a frame rate (frames per second). This works by coincidence because the value is 1 (1 fps = 1 second interval), but the semantics are inverted. If someone changes the value to 2 (intending a frame every 2 seconds), it would instead extract 2 frames per second — the exact opposite of the intent.

Additional Locations (1)
Fix in Cursor Fix in Web

)

// ocrHandler extracts text from images and video frames using external
// tools (tesseract for OCR, ffmpeg for video frame extraction) and feeds
// the extracted text into the standard text processing pipeline.
type ocrHandler struct{ *defaultHandler }

var _ FileHandler = (*ocrHandler)(nil)

func newOCRHandler() *ocrHandler {
return &ocrHandler{defaultHandler: newDefaultHandler(ocrHandlerType)}
}

// HandleFile processes image and video files by extracting text via OCR.
func (h *ocrHandler) HandleFile(ctx logContext.Context, input fileReader) chan DataOrErr {
dataOrErrChan := make(chan DataOrErr, defaultBufferSize)

if !feature.EnableOCR.Load() {
close(dataOrErrChan)
return dataOrErrChan
}

go func() {
defer close(dataOrErrChan)
defer func() {
if r := recover(); r != nil {
var panicErr error
if e, ok := r.(error); ok {
panicErr = e
} else {
panicErr = fmt.Errorf("panic occurred: %v", r)
}
dataOrErrChan <- DataOrErr{
Err: fmt.Errorf("%w: panic error: %v", ErrProcessingFatal, panicErr),
}
}
}()

start := time.Now()

mimeStr := mimeType(input.mime.String())
var text string
var err error

switch {
case isImageMime(mimeStr):
text, err = h.ocrImage(ctx, input)
case isVideoMime(mimeStr):
text, err = h.ocrVideo(ctx, input)
default:
err = fmt.Errorf("unsupported MIME type for OCR: %s", mimeStr)
}

if err != nil {
dataOrErrChan <- DataOrErr{
Err: fmt.Errorf("%w: OCR processing error: %v", ErrProcessingWarning, err),
}
h.measureLatencyAndHandleErrors(ctx, start, err, dataOrErrChan)
return
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OCR error handler sends duplicate errors to channel

Medium Severity

When OCR processing fails, the error is sent to dataOrErrChan twice — once explicitly on line 80–82, and again inside measureLatencyAndHandleErrors on line 83, which also writes the error to the same channel. Every other handler (defaultHandler, arHandler, archiveHandler, apkHandler) relies solely on measureLatencyAndHandleErrors for error reporting. This causes duplicate error events for consumers of the channel. Worse, if the error is context.DeadlineExceeded, the second write wraps it differently and isFatal returns true, potentially causing unexpected early termination.

Fix in Cursor Fix in Web

}

if strings.TrimSpace(text) == "" {
h.measureLatencyAndHandleErrors(ctx, start, nil, dataOrErrChan)
return
}

textReader := mimeTypeReader{
mimeExt: ".txt",
mimeName: textMime,
Reader: strings.NewReader(text),
}

if err := h.handleNonArchiveContent(ctx, textReader, dataOrErrChan); err != nil {
h.measureLatencyAndHandleErrors(ctx, start, err, dataOrErrChan)
return
}

h.metrics.incFilesProcessed()
h.measureLatencyAndHandleErrors(ctx, start, nil, dataOrErrChan)
}()

return dataOrErrChan
}

// ocrImage extracts text from a single image using tesseract.
func (h *ocrHandler) ocrImage(ctx logContext.Context, input io.Reader) (string, error) {
if _, err := exec.LookPath("tesseract"); err != nil {
return "", fmt.Errorf("tesseract not found in PATH: %w", err)
}

imgData, err := io.ReadAll(io.LimitReader(input, maxOCRImageSize+1))
if err != nil {
return "", fmt.Errorf("error reading image data: %w", err)
}
if len(imgData) > maxOCRImageSize {
ctx.Logger().V(2).Info("skipping image: size exceeds OCR limit", "limit", maxOCRImageSize)
return "", nil
}

processedData, err := preprocessImage(imgData)
if err != nil {
ctx.Logger().V(3).Info("image preprocessing failed, using original", "error", err)
processedData = imgData
}

tmpFile, err := os.CreateTemp("", "trufflehog-ocr-*.png")
if err != nil {
return "", fmt.Errorf("error creating temp file: %w", err)
}
defer os.Remove(tmpFile.Name())

if _, err := tmpFile.Write(processedData); err != nil {
tmpFile.Close()
return "", fmt.Errorf("error writing temp file: %w", err)
}
tmpFile.Close()

var stdout, stderr bytes.Buffer
cmd := exec.CommandContext(ctx, "tesseract",
tmpFile.Name(), "stdout",
"--psm", "6",
"--dpi", "300",
"-c", "preserve_interword_spaces=1",
"-c", "textord_space_size_is_variable=0",
)
cmd.Stdout = &stdout
cmd.Stderr = &stderr

if err := cmd.Run(); err != nil {
return "", fmt.Errorf("tesseract failed: %w (stderr: %s)", err, stderr.String())
}

return stdout.String(), nil
}

// ocrVideo extracts text from video frames using ffmpeg for frame extraction
// and tesseract for OCR on each frame.
func (h *ocrHandler) ocrVideo(ctx logContext.Context, input io.Reader) (string, error) {
if _, err := exec.LookPath("ffmpeg"); err != nil {
return "", fmt.Errorf("ffmpeg not found in PATH: %w", err)
}
if _, err := exec.LookPath("tesseract"); err != nil {
return "", fmt.Errorf("tesseract not found in PATH: %w", err)
}

videoData, err := io.ReadAll(io.LimitReader(input, maxOCRVideoSize+1))
if err != nil {
return "", fmt.Errorf("error reading video data: %w", err)
}
if len(videoData) > maxOCRVideoSize {
ctx.Logger().V(2).Info("skipping video: size exceeds OCR limit", "limit", maxOCRVideoSize)
return "", nil
}

tmpVideo, err := os.CreateTemp("", "trufflehog-ocr-video-*")
if err != nil {
return "", fmt.Errorf("error creating temp video file: %w", err)
}
defer os.Remove(tmpVideo.Name())

if _, err := tmpVideo.Write(videoData); err != nil {
tmpVideo.Close()
return "", fmt.Errorf("error writing temp video file: %w", err)
}
tmpVideo.Close()

tmpFrameDir, err := os.MkdirTemp("", "trufflehog-ocr-frames-*")
if err != nil {
return "", fmt.Errorf("error creating temp frame dir: %w", err)
}
defer os.RemoveAll(tmpFrameDir)

// Extract frames at 1fps.
var stderr bytes.Buffer
cmd := exec.CommandContext(ctx, "ffmpeg",
"-i", tmpVideo.Name(),
"-vf", fmt.Sprintf("fps=%d", frameIntervalSeconds),
"-vsync", "vfr",
filepath.Join(tmpFrameDir, "frame_%04d.png"),
)
cmd.Stderr = &stderr

if err := cmd.Run(); err != nil {
return "", fmt.Errorf("ffmpeg frame extraction failed: %w (stderr: %s)", err, stderr.String())
}

frames, err := filepath.Glob(filepath.Join(tmpFrameDir, "frame_*.png"))
if err != nil {
return "", fmt.Errorf("error listing extracted frames: %w", err)
}
sort.Strings(frames)

var allText strings.Builder
for _, framePath := range frames {
frameFile, err := os.Open(framePath)
if err != nil {
ctx.Logger().V(3).Info("skipping frame: unable to open", "path", framePath, "error", err)
continue
}

text, err := h.ocrImage(ctx, frameFile)
frameFile.Close()
if err != nil {
ctx.Logger().V(3).Info("skipping frame: OCR failed", "path", framePath, "error", err)
continue
}

if trimmed := strings.TrimSpace(text); trimmed != "" {
if allText.Len() > 0 {
allText.WriteString("\n")
}
allText.WriteString(trimmed)
}
}

return allText.String(), nil
}

func isImageMime(m mimeType) bool {
return m == pngMime || m == jpegMime
}

func isVideoMime(m mimeType) bool {
return m == mp4Mime || m == mkvMime || m == webmMime
}

const preprocessScaleFactor = 2

// preprocessImage decodes an image, converts it to grayscale, and scales it up
// by 2x to improve tesseract accuracy on small or low-contrast text.
// Falls back gracefully — callers should use the original data if this errors.
func preprocessImage(data []byte) ([]byte, error) {
src, _, err := image.Decode(bytes.NewReader(data))
if err != nil {
return nil, fmt.Errorf("decoding image: %w", err)
}

bounds := src.Bounds()
w, h := bounds.Dx()*preprocessScaleFactor, bounds.Dy()*preprocessScaleFactor

gray := image.NewGray(image.Rect(0, 0, w, h))
for y := 0; y < h; y++ {
srcY := bounds.Min.Y + y/preprocessScaleFactor
for x := 0; x < w; x++ {
srcX := bounds.Min.X + x/preprocessScaleFactor
r, g, b, _ := src.At(srcX, srcY).RGBA()
// ITU-R BT.601 luminance.
lum := (19595*r + 38470*g + 7471*b + 1<<15) >> 24
gray.SetGray(x, y, color.Gray{Y: uint8(lum)})
}
}

var buf bytes.Buffer
if err := png.Encode(&buf, gray); err != nil {
return nil, fmt.Errorf("encoding preprocessed image: %w", err)
}
return buf.Bytes(), nil
}
Loading
Loading