trufflesecurity · amanfcp · Apr 3, 2026 · Apr 3, 2026 · Apr 4, 2026 · Apr 6, 2026
@@ -384,6 +384,141 @@ trufflehog huggingface --model <model_id> --include-discussions --include-prs
 aws s3 cp s3://example/gzipped/data.gz - | gunzip -c | trufflehog stdin
 ```
 
+## 19. Scan image and video files for secrets (OCR)
+
+TruffleHog can extract text from **PNG/JPEG images** and **MP4/MKV/WebM video frames**, then scan that text for secrets. This catches credentials embedded in screenshots, screen recordings, and documentation images.
+
+Before text is sent to any OCR engine, TruffleHog runs an image preprocessing pipeline — grayscale conversion, contrast normalization, 3× upscaling, and Otsu binarization — to maximize character accuracy regardless of which provider you use.
+
+**FFmpeg is required for video files** (to extract frames). It is not needed for image-only scanning.
+
+```bash
+# Ubuntu / Debian
+sudo apt install ffmpeg
+
+# macOS
+brew install ffmpeg
+```
+
+---
+
+### Choosing an OCR provider
+
+TruffleHog supports four OCR providers. Pick the one that fits your setup:
+
+| Provider | Accuracy | Setup | Cost |
+|---|---|---|---|
+| Tesseract (local) | Good | Install binary | Free |
+| Google Cloud Vision | Excellent | API key | Pay-per-use |
+| OpenAI GPT-4o | Excellent | API key | Pay-per-use |
+| Custom HTTP server | Depends | Self-hosted | Varies |
+
+---
+
+### Option A — Tesseract (local, no API key)
+
+Install Tesseract and enable OCR with the `--enable-ocr` flag:
+
+```bash
+# Ubuntu / Debian
+sudo apt install tesseract-ocr
+
+# macOS
+brew install tesseract
+```
+
+```bash
+trufflehog filesystem /path/to/screenshots --enable-ocr
+```
+
+**Improving Tesseract accuracy**
+
+The default Tesseract model is optimized for speed. For better results with secret scanning — where a single misread character breaks a match — use the `tessdata-best` model:
+
+```bash
+mkdir -p ~/.tessdata-best
+curl -L -o ~/.tessdata-best/eng.traineddata \
+  https://github.com/tesseract-ocr/tessdata_best/raw/main/eng.traineddata
+```
+
+TruffleHog automatically detects and uses `~/.tessdata-best` when present. You can also override the path via the `TESSDATA_PREFIX` environment variable.
+
+---
+
+### Option B — Remote OCR provider (Google, OpenAI, or custom)
+
+For higher accuracy or to avoid installing local dependencies, configure a remote OCR provider. OCR is enabled automatically — no `--enable-ocr` needed.
+
+**Single-source scans** (`filesystem`, `git`, `github`, etc.) — use the dedicated `--ocr-config` flag pointing to a YAML file that contains only the `ocr:` block:
+
+```bash
+trufflehog filesystem /path/to/screenshots --ocr-config=ocr.yaml
+```
+
+**Multi-source scans** (`multi-scan`) — add the `ocr:` block directly to your existing `--config` file alongside `sources:` and `detectors:`, so everything stays in one place:
+
+```bash
+trufflehog multi-scan --config=config.yaml
+```
+
+In both cases the `ocr:` block has the same structure and accepts exactly one provider:
+
+**Google Cloud Vision**
+
+Service account credentials are recommended for production. Create a service account with the `Cloud Vision API User` role, download the JSON key file, and reference it in the config:
+
+```yaml
+ocr:
+  google:
+    credentials_file: "/path/to/service-account.json"
+```
+
+If you prefer an API key instead:
+
+```yaml
+ocr:
+  google:
+    api_key: "${GOOGLE_VISION_API_KEY}"
+```
+
+**OpenAI GPT-4o**
+
+```yaml
+ocr:
+  openai:
+    api_key: "${OPENAI_API_KEY}"
+    model: "gpt-4o"    # optional — gpt-4o is the default
+```
+
+**Custom HTTP server**
+
+For any other HTTP-based OCR service. The `body_template` is a Go template with two variables: `{{.Base64Image}}` (base64-encoded PNG) and `{{.MimeType}}` (always `image/png`). The `text_path` is a dot-separated path into the JSON response body; numeric segments are treated as array indices.
+
+```yaml
+ocr:
+  custom:
+    endpoint: "https://my-ocr.example.com/v1/extract"
+    auth:
+      type: bearer             # see auth types below
+      value: "${OCR_TOKEN}"
+    request:
+      content_type: "application/json"
+      body_template: '{"image": "{{.Base64Image}}", "mime_type": "{{.MimeType}}"}'
+    response:
+      text_path: "result.text"   # e.g. "choices.0.message.content" for nested responses
+```
+
+**Auth types**
+
+| `type` | Behaviour | Required fields |
+|---|---|---|
+| `bearer` | Adds `Authorization: Bearer <value>` | `value` |
+| `header` | Sets an arbitrary request header | `header_name`, `value` |
+| `api_key_query` | Appends the key as a URL query parameter | `param_name`, `value` |
+| `basic` | HTTP Basic Auth | `username`, `password` |
+
+All string fields support `${ENV_VAR}` expansion so secrets never need to be hardcoded in the config file.
+
 # :question: FAQ
 
 - All I see is `🐷🔑🐷  TruffleHog. Unearth your secrets. 🐷🔑🐷` and the program exits, what gives?

@@ -34,6 +34,8 @@ import (
 	"github.com/trufflesecurity/trufflehog/v3/pkg/engine/defaults"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/feature"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/handlers"
+	"github.com/trufflesecurity/trufflehog/v3/pkg/ocr"
+	"github.com/trufflesecurity/trufflehog/v3/pkg/pb/configpb"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/log"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/output"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
@@ -92,6 +94,8 @@ var (
 	gitCloneTimeout    = cli.Flag("git-clone-timeout", "Maximum time to spend cloning a repository, as a duration.").Hidden().Duration()
 	skipAdditionalRefs = cli.Flag("skip-additional-refs", "Skip additional references.").Bool()
 	userAgentSuffix    = cli.Flag("user-agent-suffix", "Suffix to add to User-Agent.").String()
+	enableOCR          = cli.Flag("enable-ocr", "Enable OCR scanning of images and video frames for secrets. Requires tesseract and ffmpeg.").Bool()
+	ocrConfigFilename  = cli.Flag("ocr-config", "Path to OCR provider config file. Configures the OCR provider (google, openai, or custom) and automatically enables OCR.").ExistingFile()
 
 	gitScan                = cli.Command("git", "Find credentials in git repositories.")
 	gitScanURI             = gitScan.Arg("uri", "Git repository URL. https://, file://, or ssh:// schema expected.").Required().String()
@@ -506,6 +510,11 @@ func run(state overseer.State, logSync func() error) {
 		feature.UserAgentSuffix.Store(*userAgentSuffix)
 	}
 
+	if *enableOCR {
+		feature.EnableOCR.Store(true)
+		logger.Info("OCR enabled", "provider", "tesseract")
+	}
+
 	// OSS Default APK handling on
 	feature.EnableAPKHandler.Store(true)
 
@@ -528,6 +537,35 @@ func run(state overseer.State, logSync func() error) {
 		}
 	}
 
+	// Wire up a remote OCR provider if configured. Two sources are supported:
+	//   1. --ocr-config: a dedicated OCR config file (takes precedence). Useful for
+	//      single-source scans (filesystem, git, etc.) where --config is not used.
+	//   2. ocr: block inside --config: convenient for multi-scan where sources,
+	//      detectors, and OCR config all live in one file.
+	// Either way, the provider is set and OCR is enabled automatically.
+	var ocrCfgBlock *configpb.OCRConfig
+	if *ocrConfigFilename != "" {
+		ocrFileCfg, err := config.Read(*ocrConfigFilename)
+		if err != nil {
+			logFatal(err, "error parsing the provided OCR config file")
+		}
+		if ocrFileCfg.Ocr == nil {
+			logFatal(fmt.Errorf("no ocr: block found in %s", *ocrConfigFilename), "invalid OCR config")
+		}
+		ocrCfgBlock = ocrFileCfg.Ocr
+	} else if conf.Ocr != nil {
+		ocrCfgBlock = conf.Ocr
+	}
+	if ocrCfgBlock != nil {
+		provider, err := ocr.NewProvider(ocrCfgBlock)
+		if err != nil {
+			logFatal(err, "error initializing OCR provider")
+		}
+		handlers.SetOCRProvider(provider)
+		feature.EnableOCR.Store(true)
+		logger.Info("OCR enabled", "provider", ocr.ProviderName(ocrCfgBlock))
+	}
+
 	if *detectorTimeout != 0 {
 		logger.Info("Setting detector timeout", "timeout", detectorTimeout.String())
 		engine.SetDetectorTimeout(*detectorTimeout)

@@ -25,6 +25,7 @@ import (
 type Config struct {
 	Sources   []sources.ConfiguredSource
 	Detectors []detectors.Detector
+	Ocr       *configpb.OCRConfig // populated when an ocr: block is present in the YAML
 }
 
 // Read parses a given filename into a Config.
@@ -69,6 +70,7 @@ func NewYAML(input []byte) (*Config, error) {
 	return &Config{
 		Detectors: detectorConfigs,
 		Sources:   sourceConfigs,
+		Ocr:       inputYAML.GetOcr(),
 	}, nil
 }
 

@@ -15,6 +15,7 @@ var (
 	UseGitMirror                   atomic.Bool
 	GitlabProjectsPerPage          atomic.Int64
 	UseGithubGraphQLAPI            atomic.Bool // use github graphql api to fetch issues, pr's and comments
+	EnableOCR                      atomic.Bool
 )
 
 type AtomicString struct {

@@ -16,10 +16,19 @@ import (
 	logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/feature"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/iobuf"
+	"github.com/trufflesecurity/trufflehog/v3/pkg/ocr"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
 )
 
+// activeOCRProvider is the ocr.Provider used when OCR is enabled.
+// It defaults to TesseractProvider and may be replaced via SetOCRProvider before scanning begins.
+var activeOCRProvider ocr.Provider = &ocr.TesseractProvider{}
+
+// SetOCRProvider replaces the package-level OCR provider.
+// Call this once at startup before any files are processed.
+func SetOCRProvider(p ocr.Provider) { activeOCRProvider = p }
+
 // fileReader is a custom reader that wraps an io.Reader and provides additional functionality for identifying
 // and handling different file types. It abstracts away the complexity of detecting file formats, MIME types,
 // and archive types, allowing for a more modular and extensible file handling process.
@@ -224,6 +233,7 @@ const (
 	rpmHandlerType     handlerType = "rpm"
 	apkHandlerType     handlerType = "apk"
 	defaultHandlerType handlerType = "default"
+	ocrHandlerType     handlerType = "ocr"
 	apkExt                         = ".apk"
 )
 
@@ -264,6 +274,15 @@ const (
 	jarMime      mimeType = "application/java-archive"
 	msgMime      mimeType = "application/vnd.ms-outlook"
 	docMime      mimeType = "application/msword"
+
+	// Image MIME types for OCR.
+	pngMime  mimeType = "image/png"
+	jpegMime mimeType = "image/jpeg"
+
+	// Video MIME types for OCR.
+	mp4Mime  mimeType = "video/mp4"
+	mkvMime  mimeType = "video/x-matroska"
+	webmMime mimeType = "video/webm"
 )
 
 // skipArchiverMimeTypes is a set of MIME types that should bypass archiver library processing because they are either
@@ -301,6 +320,11 @@ var skipArchiverMimeTypes = map[mimeType]struct{}{
 	apkMime:      {},
 	msgMime:      {},
 	docMime:      {},
+	pngMime:      {},
+	jpegMime:     {},
+	mp4Mime:      {},
+	mkvMime:      {},
+	webmMime:     {},
 }
 
 // selectHandler dynamically selects and configures a FileHandler based on the provided |mimetype| type and archive flag.
@@ -320,6 +344,16 @@ func selectHandler(mimeT mimeType, isGenericArchive bool) FileHandler {
 		return newRPMHandler()
 	case apkMime:
 		return newAPKHandler()
+	case pngMime, jpegMime:
+		if feature.EnableOCR.Load() {
+			return newOCRHandler(activeOCRProvider)
+		}
+		return newDefaultHandler(defaultHandlerType)
+	case mp4Mime, mkvMime, webmMime:
+		if feature.EnableOCR.Load() {
+			return newOCRHandler(activeOCRProvider)
+		}
+		return newDefaultHandler(defaultHandlerType)
 	default:
 		if isGenericArchive {
 			return newArchiveHandler()