diff --git a/.github/workflows/nrl-docs-github-pages.yml b/.github/workflows/nrl-docs-github-pages.yml
new file mode 100644
index 000000000..496fb5c9e
--- /dev/null
+++ b/.github/workflows/nrl-docs-github-pages.yml
@@ -0,0 +1,102 @@
+# NeMo Retriever Library (NRL) documentation only — GitHub Pages staging / nightly.
+# Does not run the full Docker + Sphinx pipeline (no nv-ingest / nv-ingest-api HTML API dump).
+name: NRL documentation — GitHub Pages (staging)
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - "docs/**"
+      - "nemo_retriever/**"
+      - ".github/workflows/nrl-docs-github-pages.yml"
+  schedule:
+    # Nightly (UTC): pick up doc changes even if no pushes
+    - cron: "0 7 * * *"
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+concurrency:
+  group: pages-nrl-staging
+  cancel-in-progress: false
+
+jobs:
+  build:
+    name: Build NRL docs (staging)
+    runs-on: ubuntu-latest
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683  # v4.2.2
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Configure Pages
+        id: pages
+        uses: actions/configure-pages@v5
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: pip
+          cache-dependency-path: docs/requirements.txt
+
+      - name: Install Python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r docs/requirements.txt
+          pip install -e ./nemo_retriever
+
+      - name: Print NRL site navigation (pre-deploy)
+        run: python docs/scripts/print_nrl_mkdocs_nav.py
+
+      - name: Write nav + scan summary for the workflow run
+        run: |
+          {
+            echo "### NRL GitHub Pages — site navigation"
+            echo
+            echo '```'
+            python docs/scripts/print_nrl_mkdocs_nav.py
+            echo '```'
+            echo
+            echo "### Non-NRL / legacy reference scan (excerpt)"
+            echo "Full report is attached as an artifact."
+            echo
+            echo '```'
+            python docs/scripts/scan_non_nrl_doc_references.py | head -n 120
+            echo '```'
+          } >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Scan for non-NRL references (full report)
+        run: python docs/scripts/scan_non_nrl_doc_references.py | tee non-nrl-review.txt
+
+      - name: Upload non-NRL scan artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: non-nrl-content-review
+          path: non-nrl-review.txt
+
+      - name: Build MkDocs (NRL only)
+        working-directory: docs
+        env:
+          SITE_URL: ${{ steps.pages.outputs.base_url }}
+        run: mkdocs build -f mkdocs.nrl-github-pages.yml --strict
+
+      - name: Upload Pages artifact
+        uses: actions/upload-pages-artifact@v3
+        with:
+          path: docs/site
+
+  deploy:
+    name: Deploy to GitHub Pages
+    needs: build
+    runs-on: ubuntu-latest
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    steps:
+      - name: Deploy
+        id: deployment
+        uses: actions/deploy-pages@v4
diff --git a/docs/docs/extraction/agentic-retrieval-concept.md b/docs/docs/extraction/agentic-retrieval-concept.md
new file mode 100644
index 000000000..32d4d623a
--- /dev/null
+++ b/docs/docs/extraction/agentic-retrieval-concept.md
@@ -0,0 +1,16 @@
+# Agentic retrieval (concept)
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+Agentic retrieval means **iterative, tool-driven** retrieval: an agent plans steps, issues searches, may refine filters, and optionally reranks until it has enough context to answer.
+
+NeMo Retriever Library focuses on document ingestion, embeddings, vector stores, hybrid search, and reranking. Orchestration frameworks call these building blocks from your application.
+
+**Related**
+
+- [Workflow: Agentic retrieval](workflow-agentic-retrieval.md)
+- [Semantic and hybrid retrieval](semantic-hybrid-retrieval.md)
+- Framework examples: [LangChain, LlamaIndex, Haystack](integrations-langchain-llamaindex-haystack.md)
diff --git a/docs/docs/extraction/audio.md b/docs/docs/extraction/audio.md
index 9d664d0b2..c5696c876 100644
--- a/docs/docs/extraction/audio.md
+++ b/docs/docs/extraction/audio.md
@@ -7,22 +7,17 @@ with the [parakeet-1-1b-ctc-en-us ASR NIM microservice](https://docs.nvidia.com/
 - Run the NIM locally by using Docker Compose
 - Use NVIDIA Cloud Functions (NVCF) endpoints for cloud-based inference
 
-!!! note
-
-   NVIDIA Ingest (nv-ingest) has been renamed NeMo Retriever Library. 
-
 Currently, you can extract speech from the following file types:
 
 - `mp3`
 - `wav`
 
 
-
 ## Overview
 
 [NeMo Retriever Library](overview.md) supports extracting speech from audio files for Retrieval Augmented Generation (RAG) applications. 
 Similar to how the multimodal document extraction pipeline leverages object detection and image OCR microservices, 
-NeMo Retriever leverages the [parakeet-1-1b-ctc-en-us ASR NIM microservice](https://docs.nvidia.com/nim/speech/latest/asr/deploy-asr-models/parakeet-ctc-en-us.html) 
+NeMo Retriever Library uses the [parakeet-1-1b-ctc-en-us ASR NIM microservice](https://docs.nvidia.com/nim/speech/latest/asr/deploy-asr-models/parakeet-ctc-en-us.html) 
 to transcribe speech to text, which is then embedded by using the NeMo Retriever embedding NIM. 
 
 !!! important
@@ -92,7 +87,7 @@ To generate one extracted element for each sentence-like ASR segment, include `e
 
     !!! tip
 
-        For more Python examples, refer to [NV-Ingest: Python Client Quick Start Guide](https://github.com/NVIDIA/nv-ingest/blob/main/client/client_examples/examples/python_client_usage.ipynb).
+        For more Python examples, refer to [Python Quick Start Guide](https://github.com/NVIDIA/NeMo-Retriever/blob/main/client/client_examples/examples/python_client_usage.ipynb).
 
 
 ## Use NVCF Endpoints for Cloud-Based Inference
@@ -128,7 +123,7 @@ Instead of running the pipeline locally, you can use NVCF to perform inference b
 
     !!! tip
 
-        For more Python examples, refer to [NV-Ingest: Python Client Quick Start Guide](https://github.com/NVIDIA/nv-ingest/blob/main/client/client_examples/examples/python_client_usage.ipynb).
+        For more Python examples, refer to [Python Quick Start Guide](https://github.com/NVIDIA/NeMo-Retriever/blob/main/client/client_examples/examples/python_client_usage.ipynb).
 
 
 
@@ -136,4 +131,4 @@ Instead of running the pipeline locally, you can use NVCF to perform inference b
 
 - [Support Matrix](support-matrix.md)
 - [Troubleshoot Nemo Retriever Extraction](troubleshoot.md)
-- [Use the Python API](nv-ingest-python-api.md)
+- [Use the Python API](python-api-reference.md)
diff --git a/docs/docs/extraction/benchmarking.md b/docs/docs/extraction/benchmarking.md
index 0fb0c07ae..eae7b7435 100644
--- a/docs/docs/extraction/benchmarking.md
+++ b/docs/docs/extraction/benchmarking.md
@@ -1,11 +1,16 @@
-# NV-Ingest Integration Testing Framework
+# NeMo Retriever Library integration testing framework
 
-A configurable, dataset-agnostic testing framework for end-to-end validation of nv-ingest pipelines. This framework uses structured YAML configuration for type safety, validation, and parameter management.
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+A configurable, dataset-agnostic testing framework for end-to-end validation of NeMo Retriever Library pipelines. This framework uses structured YAML configuration for type safety, validation, and parameter management.
 
 ## Dataset Prerequisites
 
     
-Before you run any benchmarking or evaluation tests, you must first download the benchmark datasets. The three primary datasets used in nv-ingest benchmarking and evaluations include:
+Before you run any benchmarking or evaluation tests, you must first download the benchmark datasets. The three primary datasets used in NeMo Retriever Library benchmarking and evaluations include:
     
 - **Bo20** - 20 PDFs for quick testing
 - **Bo767** - 767 PDFs for comprehensive benchmarking
@@ -13,7 +18,7 @@ Before you run any benchmarking or evaluation tests, you must first download the
     
 ### How to Download the Datasets
     
-Use the [Digital Corpora Download Notebook](https://github.com/NVIDIA/nv-ingest/blob/main/evaluation/digital_corpora_download.ipynb) to download these datasets from the public Digital Corpora source. This notebook provides automated download functions that enable the following:
+Use the [Digital Corpora Download Notebook](https://github.com/NVIDIA/NeMo-Retriever/blob/main/evaluation/digital_corpora_download.ipynb) to download these datasets from the public Digital Corpora source. This notebook provides automated download functions that enable the following:
     
 - Download PDFs directly from Digital Corpora's public repository.
 - Support all three dataset sizes (Bo20, Bo767, Bo10k).
@@ -29,26 +34,26 @@ Use the [Digital Corpora Download Notebook](https://github.com/NVIDIA/nv-ingest/
 Before you use this documentation, you need the following:
 
 - Docker and Docker Compose are running
-- A Python environment with nv-ingest-client installed
+- A Python environment with the NeMo Retriever Library client and harness dependencies installed
 - The [benchmark datasets are downloaded](#dataset-prerequisites)
 
 ### Run Your First Test
 
 ```bash
-# 1. Navigate to the nv-ingest-harness directory
+# 1. Navigate to the tools/harness directory
 cd tools/harness
 
 # 2. Install dependencies
 uv sync
 
 # 3. Run with a pre-configured dataset (assumes services are running)
-uv run nv-ingest-harness-run --case=e2e --dataset=bo767
+uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=bo767
 
 # Or use a custom path that uses the "active" configuration
-uv run nv-ingest-harness-run --case=e2e --dataset=/path/to/your/data
+uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=/path/to/your/data
 
 # With managed infrastructure (starts/stops services)
-uv run nv-ingest-harness-run --case=e2e --dataset=bo767 --managed
+uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=bo767 --managed
 ```
 
 ## Configuration System
@@ -114,7 +119,7 @@ Each dataset includes its path, extraction settings, and recall evaluator in one
 ```yaml
 datasets:
   bo767:
-    path: /datasets/nv-ingest/bo767
+    path: /datasets/nemo-retriever/bo767
     extract_text: true
     extract_tables: true
     extract_charts: true
@@ -123,7 +128,7 @@ datasets:
     recall_dataset: bo767  # Evaluator for recall testing
   
   bo20:
-    path: /datasets/nv-ingest/bo20
+    path: /datasets/nemo-retriever/bo20
     extract_text: true
     extract_tables: true
     extract_charts: true
@@ -132,7 +137,7 @@ datasets:
     recall_dataset: null  # bo20 does not have recall
   
   earnings:
-    path: /datasets/nv-ingest/earnings_consulting
+    path: /datasets/nemo-retriever/earnings_consulting
     extract_text: true
     extract_tables: true
     extract_charts: true
@@ -149,13 +154,13 @@ datasets:
 **Usage:**
 ```bash
 # Single dataset - configs applied automatically
-uv run nv-ingest-harness-run --case=e2e --dataset=bo767
+uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=bo767
 
 # Multiple datasets (sweeping) - each gets its own config
-uv run nv-ingest-harness-run --case=e2e --dataset=bo767,earnings,bo20
+uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=bo767,earnings,bo20
 
 # Custom path still works (uses active section config)
-uv run nv-ingest-harness-run --case=e2e --dataset=/custom/path
+uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=/custom/path
 ```
 
 **Dataset Extraction Settings:**
@@ -181,7 +186,7 @@ Example:
 # YAML active section has api_version: v2
 # Dataset bo767 has extract_images: false
 # Override via environment variable (highest priority)
-EXTRACT_IMAGES=true API_VERSION=v1 uv run nv-ingest-harness-run --case=e2e --dataset=bo767
+EXTRACT_IMAGES=true API_VERSION=v1 uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=bo767
 # Result: Uses bo767 path, but extract_images=true (env override) and api_version=v1 (env override)
 ```
 
@@ -245,13 +250,13 @@ Configuration is validated on load with helpful error messages.
 
 ```bash
 # Run with default YAML configuration (assumes services are running)
-uv run nv-ingest-harness-run --case=e2e --dataset=bo767
+uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=bo767
 
 # With document-level analysis
-uv run nv-ingest-harness-run --case=e2e --dataset=bo767 --doc-analysis
+uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=bo767 --doc-analysis
 
 # With managed infrastructure (starts/stops services)
-uv run nv-ingest-harness-run --case=e2e --dataset=bo767 --managed
+uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=bo767 --managed
 ```
 
 ### Dataset Sweeping
@@ -260,7 +265,7 @@ Run multiple datasets in a single command - each dataset automatically gets its
 
 ```bash
 # Sweep multiple datasets
-uv run nv-ingest-harness-run --case=e2e --dataset=bo767,earnings,bo20
+uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=bo767,earnings,bo20
 
 # Each dataset runs sequentially with its own:
 # - Extraction settings (from dataset config)
@@ -268,13 +273,13 @@ uv run nv-ingest-harness-run --case=e2e --dataset=bo767,earnings,bo20
 # - Results summary at the end
 
 # With managed infrastructure (services start once, shared across all datasets)
-uv run nv-ingest-harness-run --case=e2e --dataset=bo767,earnings,bo20 --managed
+uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=bo767,earnings,bo20 --managed
 
 # E2E+Recall sweep (each dataset ingests then evaluates recall)
-uv run nv-ingest-harness-run --case=e2e_recall --dataset=bo767,earnings
+uv run python -m nv_ingest_harness.cli.run --case=e2e_recall --dataset=bo767,earnings
 
 # Recall-only sweep (evaluates existing collections)
-uv run nv-ingest-harness-run --case=recall --dataset=bo767,earnings
+uv run python -m nv_ingest_harness.cli.run --case=recall --dataset=bo767,earnings
 ```
 
 **Sweep Behavior:**
@@ -288,10 +293,10 @@ uv run nv-ingest-harness-run --case=recall --dataset=bo767,earnings
 
 ```bash
 # Override via environment (useful for CI/CD)
-API_VERSION=v2 EXTRACT_TABLES=false uv run nv-ingest-harness-run --case=e2e
+API_VERSION=v2 EXTRACT_TABLES=false uv run python -m nv_ingest_harness.cli.run --case=e2e
 
 # Temporary changes without editing YAML
-DATASET_DIR=/custom/path uv run nv-ingest-harness-run --case=e2e
+DATASET_DIR=/custom/path uv run python -m nv_ingest_harness.cli.run --case=e2e
 ```
 
 ## Test Scenarios
@@ -454,7 +459,7 @@ All datasets use **multimodal-only** evaluation:
 **Other datasets** (finance_bench, earnings, audio):
 - Ground truth files must be obtained separately (not in public repo)
 - Set `ground_truth_dir` to point to your ground truth directory
-- Dataset-specific evaluators are extensible (see `recall_utils.py`)
+- Dataset-specific evaluators are extensible (refer to `recall_utils.py`)
 
 ### Configuration
 
@@ -477,23 +482,23 @@ recall:
 ```bash
 # Evaluate existing bo767 collections (no reranker)
 # recall_dataset automatically set from dataset config
-uv run nv-ingest-harness-run --case=recall --dataset=bo767
+uv run python -m nv_ingest_harness.cli.run --case=recall --dataset=bo767
 
 # With reranker only (set reranker_mode in YAML recall section)
-uv run nv-ingest-harness-run --case=recall --dataset=bo767
+uv run python -m nv_ingest_harness.cli.run --case=recall --dataset=bo767
 
 # Sweep multiple datasets for recall evaluation
-uv run nv-ingest-harness-run --case=recall --dataset=bo767,earnings
+uv run python -m nv_ingest_harness.cli.run --case=recall --dataset=bo767,earnings
 ```
 
 **E2E + Recall (fresh ingestion):**
 ```bash
 # Fresh ingestion with recall evaluation
 # recall_dataset automatically set from dataset config
-uv run nv-ingest-harness-run --case=e2e_recall --dataset=bo767
+uv run python -m nv_ingest_harness.cli.run --case=e2e_recall --dataset=bo767
 
 # Sweep multiple datasets (each ingests then evaluates)
-uv run nv-ingest-harness-run --case=e2e_recall --dataset=bo767,earnings
+uv run python -m nv_ingest_harness.cli.run --case=e2e_recall --dataset=bo767,earnings
 ```
 
 **Dataset configuration:**
@@ -541,7 +546,7 @@ The easiest way to test multiple datasets is using dataset sweeping:
 
 ```bash
 # Test multiple datasets - each gets its native config automatically
-uv run nv-ingest-harness-run --case=e2e --dataset=bo767,earnings,bo20
+uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=bo767,earnings,bo20
 
 # Each dataset runs with its pre-configured extraction settings
 # Results are organized in separate artifact directories
@@ -552,7 +557,7 @@ uv run nv-ingest-harness-run --case=e2e --dataset=bo767,earnings,bo20
 To sweep through different parameter values:
 
 1. **Edit** `test_configs.yaml` - Update values in the `active` section
-2. **Run** the test: `uv run nv-ingest-harness-run --case=e2e --dataset=<name>`
+2. **Run** the test: `uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=<name>`
 3. **Analyze** results in `artifacts/<test_name>_<timestamp>/`
 4. **Repeat** steps 1-3 for next parameter combination
 
@@ -560,18 +565,18 @@ Example parameter sweep workflow:
 ```bash
 # Test 1: Baseline V1
 vim test_configs.yaml  # Set: api_version=v1, extract_tables=true
-uv run nv-ingest-harness-run --case=e2e --dataset=bo767
+uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=bo767
 
 # Test 2: V2 with 32-page splitting
 vim test_configs.yaml  # Set: api_version=v2, pdf_split_page_count=32
-uv run nv-ingest-harness-run --case=e2e --dataset=bo767
+uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=bo767
 
 # Test 3: V2 with 8-page splitting
 vim test_configs.yaml  # Set: pdf_split_page_count=8
-uv run nv-ingest-harness-run --case=e2e --dataset=bo767
+uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=bo767
 
 # Test 4: Tables disabled (override via env var)
-EXTRACT_TABLES=false uv run nv-ingest-harness-run --case=e2e --dataset=bo767
+EXTRACT_TABLES=false uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=bo767
 ```
 
 **Note**: Each test run creates a new timestamped artifact directory, so you can compare results across sweeps.
@@ -581,7 +586,7 @@ EXTRACT_TABLES=false uv run nv-ingest-harness-run --case=e2e --dataset=bo767
 ### Attach Mode (Default)
 
 ```bash
-uv run nv-ingest-harness-run --case=e2e --dataset=bo767
+uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=bo767
 ```
 
 - **Default behavior**: Assumes services are already running
@@ -593,7 +598,7 @@ uv run nv-ingest-harness-run --case=e2e --dataset=bo767
 ### Managed Mode
 
 ```bash
-uv run nv-ingest-harness-run --case=e2e --dataset=bo767 --managed
+uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=bo767 --managed
 ```
 
 - Starts Docker services automatically
@@ -605,10 +610,10 @@ uv run nv-ingest-harness-run --case=e2e --dataset=bo767 --managed
 **Managed mode options:**
 ```bash
 # Skip Docker image rebuild (faster startup)
-uv run nv-ingest-harness-run --case=e2e --dataset=bo767 --managed --no-build
+uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=bo767 --managed --no-build
 
 # Keep services running after test (useful for multi-test scenarios)
-uv run nv-ingest-harness-run --case=e2e --dataset=bo767 --managed --keep-up
+uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=bo767 --managed --keep-up
 ```
 
 ## Artifacts and Logging
@@ -636,7 +641,7 @@ tools/harness/artifacts/<test_name>_<timestamp>_UTC/
 Enable per-document element breakdown:
 
 ```bash
-uv run nv-ingest-harness-run --case=e2e --dataset=bo767 --doc-analysis
+uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=bo767 --doc-analysis
 ```
 
 **Sample Output:**
@@ -817,7 +822,7 @@ The framework is dataset-agnostic and supports multiple approaches:
 **Option 1: Use pre-configured dataset (Recommended)**
 ```bash
 # Dataset configs automatically applied
-uv run nv-ingest-harness-run --case=e2e --dataset=bo767
+uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=bo767
 ```
 
 **Option 2: Add new dataset to YAML**
@@ -832,26 +837,26 @@ datasets:
     extract_infographics: false
     recall_dataset: null  # or set to evaluator name if applicable
 ```
-Then use: `uv run nv-ingest-harness-run --case=e2e --dataset=my_dataset`
+Then use: `uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=my_dataset`
 
 **Option 3: Use custom path (uses active section config)**
 ```bash
-uv run nv-ingest-harness-run --case=e2e --dataset=/path/to/your/dataset
+uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=/path/to/your/dataset
 ```
 
 **Option 4: Environment variable override**
 ```bash
 # Override specific settings via env vars
-EXTRACT_IMAGES=true uv run nv-ingest-harness-run --case=e2e --dataset=bo767
+EXTRACT_IMAGES=true uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=bo767
 ```
 
 **Best Practice**: For repeated testing, add your dataset to the `datasets` section with its native extraction settings. This ensures consistent configuration and enables dataset sweeping.
 
 ## Additional Resources
 
-- **Configuration**: See `config.py` for complete field list and validation logic
-- **Test utilities**: See `interact.py` for shared helper functions  
-- **Docker setup**: See project root README for service management commands
-- **API documentation**: See `docs/` for API version differences
+- **Configuration**: Refer to `config.py` for complete field list and validation logic
+- **Test utilities**: Refer to `interact.py` for shared helper functions  
+- **Docker setup**: Refer to project root README for service management commands
+- **API documentation**: Refer to `docs/` for API version differences
 
-The framework prioritizes clarity, type safety, and validation to support reliable testing of nv-ingest pipelines.
+The framework prioritizes clarity, type safety, and validation to support reliable testing of NeMo Retriever Library pipelines.
diff --git a/docs/docs/extraction/choose-your-path.md b/docs/docs/extraction/choose-your-path.md
new file mode 100644
index 000000000..f39f4900b
--- /dev/null
+++ b/docs/docs/extraction/choose-your-path.md
@@ -0,0 +1,37 @@
+# Choose your path
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+Use this page to pick documentation and deployment options that match your goal.
+
+## I want to run locally or embed the library
+
+1. [Prerequisites](prerequisites.md) and [Support matrix](support-matrix.md)
+2. [Deploy (Library mode)](quickstart-library-mode.md)
+3. [Use the Python API](python-api-reference.md) or [Use the CLI](cli-reference.md)
+
+## I want a Kubernetes / Helm deployment
+
+1. [Prerequisites](prerequisites.md)
+2. [Deploy (Helm Chart)](helm.md)
+3. [Environment variables](environment-config.md) and [Troubleshoot](troubleshoot.md) as needed
+
+## I want examples and notebooks
+
+1. [Jupyter Notebooks](notebooks.md)
+2. [Integrate with LangChain, LlamaIndex, Haystack](integrations-langchain-llamaindex-haystack.md)
+
+## I need API details and keys
+
+1. [Get your API key](ngc-api-key.md)
+2. [API reference](nemo-retriever-api-reference.md) and [V2 API guide](v2-api-guide.md) if applicable
+
+## I am tuning performance or cost
+
+1. [Benchmarking and performance](benchmarking.md)
+2. [Telemetry](telemetry.md)
+3. [Throughput is dataset-dependent](throughput-is-dataset-dependent.md)
+4. [Evaluate on your data](evaluate-on-your-data.md)
diff --git a/docs/docs/extraction/chunking.md b/docs/docs/extraction/chunking.md
index 540b147ca..50d548e10 100644
--- a/docs/docs/extraction/chunking.md
+++ b/docs/docs/extraction/chunking.md
@@ -1,5 +1,10 @@
 # Split Documents
 
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
 Splitting, also known as chunking, breaks large documents or text into smaller, manageable sections to improve retrieval efficiency. 
 After chunking, only the most relevant pieces of information are retrieved for a given query. 
 Chunking also prevents text from exceeding the context window of the embedding model.
@@ -106,6 +111,6 @@ If you are building the container yourself and want to pre-download this model,
 
 ## Related Topics
 
-- [Use the Python API](nv-ingest-python-api.md)
+- [Use the Python API](python-api-reference.md)
 - [NeMo Retriever Library V2 API Guide](v2-api-guide.md)
-- [Environment Variables](environment-variables.md)
+- [Environment variables](environment-config.md)
diff --git a/docs/docs/extraction/cli-reference.md b/docs/docs/extraction/cli-reference.md
index 6ce311402..51164b358 100644
--- a/docs/docs/extraction/cli-reference.md
+++ b/docs/docs/extraction/cli-reference.md
@@ -1,10 +1,15 @@
 # CLI Reference
 
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
 After you install the Python dependencies, you can use the [NeMo Retriever Library](overview.md) command line interface (CLI).
 To use the CLI, use the `nemo-retriever` command.
 
 !!! note "Command name"
-    Depending on your installation (NeMo Retriever Library vs. nv-ingest-client), you invoke the CLI by using `nemo-retriever` or `nv-ingest-cli`. Both expose the same options and behavior. The following sections use `nemo-retriever` for consistency with the examples.
+    Install the NeMo Retriever Library client package, then invoke the CLI with `nemo-retriever`. The examples in this page use that command name.
 
 To check the version of the CLI that you have installed, run the following command.
 
@@ -51,7 +56,7 @@ The following table lists all CLI options.
 | `--collect_profiling_traces` | — | flag | false | No | After the run, fetch Zipkin traces for submitted jobs and write them under `output_directory`. |
 | `--zipkin_host` | — | string | `localhost` | No | Host for Zipkin API (used when `--collect_profiling_traces` is set). |
 | `--zipkin_port` | — | int | `9411` | No | Port for Zipkin API. |
-| `--version` | — | flag | — | No | Print nv-ingest and nv-ingest-cli versions and exit. |
+| `--version` | — | flag | — | No | Print NeMo Retriever Library client and CLI version information and exit. |
 
 
 
@@ -108,7 +113,7 @@ Running with `--fail_on_error` causes the process to exit on the first job failu
 
 ## Complete --help Output
 
-The following is the standard help output for the CLI (equivalent to `nemo-retriever --help` or `nv-ingest-cli --help`). Use it as a quick reference when you cannot run the command locally.
+The following is the standard help output for the CLI (equivalent to `nemo-retriever --help`). Use it as a quick reference when you cannot run the command locally.
 
 ```text
 Usage: nemo-retriever [OPTIONS]
diff --git a/docs/docs/extraction/concepts.md b/docs/docs/extraction/concepts.md
new file mode 100644
index 000000000..31e778dec
--- /dev/null
+++ b/docs/docs/extraction/concepts.md
@@ -0,0 +1,32 @@
+# Concepts
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+These terms appear throughout NeMo Retriever Library documentation.
+
+## Job
+
+A **job** is a unit of work you submit with a JSON description: a document payload (or reference) and a list of **ingestion tasks** to run on that payload. Results are retrieved as structured metadata and annotations.
+
+## Pipeline and tasks
+
+NeMo Retriever Library does **not** run one static pipeline on every document. You configure **tasks** such as parsing, chunking, embedding, storage, and filtering per job. Related topics: [Customize your pipeline](user-defined-functions.md), [user-defined stages](user-defined-stages.md).
+
+## Extraction metadata
+
+Output is typically a **JSON dictionary** listing extracted objects (text regions, tables, images, and so on), processing notes, and timing or trace data. Field-level detail is in the [metadata reference](content-metadata.md).
+
+## Embeddings and retrieval
+
+Optionally, the library can compute **embeddings** for extracted content and store vectors in a database such as [LanceDB](https://lancedb.com/) or [Milvus](https://milvus.io/) for downstream **semantic or hybrid search** in your application.
+
+## Deployment modes
+
+- **Library mode** — Run without the full container stack where appropriate ([quickstart](quickstart-library-mode.md)).
+- **Helm / Kubernetes** — [Helm-based deployment](helm.md) for cluster operations.
+- **Notebooks** — [Jupyter examples](notebooks.md) for experimentation and RAG demos.
+
+For a concise comparison, refer to [Choose your path](choose-your-path.md).
diff --git a/docs/docs/extraction/content-metadata.md b/docs/docs/extraction/content-metadata.md
index 5a55b9e6a..332f75f1c 100644
--- a/docs/docs/extraction/content-metadata.md
+++ b/docs/docs/extraction/content-metadata.md
@@ -10,7 +10,7 @@ Metadata can be extracted from a source or content, or generated by using models
 
 !!! note
 
-    NVIDIA Ingest (nv-ingest) has been renamed NeMo Retriever Library.
+    This documentation describes NeMo Retriever Library.
 
 
 
@@ -108,15 +108,15 @@ The `MetadataSchema` is the primary container for all metadata. It includes the
 | `content`             | `str`                                 | `""`                   | The actual textual content extracted from the source.                                                      |
 | `content_url`         | `str`                                 | `""`                   | URL pointing to the location of the content, if applicable.                                                |
 | `embedding`           | `Optional[List[float]]`               | `None`                 | Optional numerical vector representation (embedding) of the content.                                       |
-| `source_metadata`     | `Optional[SourceMetadataSchema]`      | `None`                 | Metadata about the original source of the content. See [SourceMetadataSchema](#sourcemetadataschema).       |
-| `content_metadata`    | `Optional[ContentMetadataSchema]`     | `None`                 | General metadata about the extracted content itself. See [ContentMetadataSchema](#contentmetadataschema).    |
-| `audio_metadata`      | `Optional[AudioMetadataSchema]`       | `None`                 | Specific metadata for audio content. Automatically set to `None` if `content_metadata.type` is not `AUDIO`. See [AudioMetadataSchema](#audiometadataschema). |
-| `text_metadata`       | `Optional[TextMetadataSchema]`        | `None`                 | Specific metadata for text content. Automatically set to `None` if `content_metadata.type` is not `TEXT`. See [TextMetadataSchema](#textmetadataschema). |
-| `image_metadata`      | `Optional[ImageMetadataSchema]`       | `None`                 | Specific metadata for image content. Automatically set to `None` if `content_metadata.type` is not `IMAGE`. See [ImageMetadataSchema](#imagemetadataschema). |
-| `table_metadata`      | `Optional[TableMetadataSchema]`       | `None`                 | Specific metadata for tabular content. Automatically set to `None` if `content_metadata.type` is not `STRUCTURED`. See [TableMetadataSchema](#tablemetadataschema). |
-| `chart_metadata`      | `Optional[ChartMetadataSchema]`       | `None`                 | Specific metadata for chart content. See [ChartMetadataSchema](#chartmetadataschema).                      |
-| `error_metadata`      | `Optional[ErrorMetadataSchema]`       | `None`                 | Metadata describing any errors encountered during processing. See [ErrorMetadataSchema](#errormetadataschema). |
-| `info_message_metadata` | `Optional[InfoMessageMetadataSchema]` | `None`                 | Informational messages related to the processing. See [InfoMessageMetadataSchema](#infomessagemetadataschema). |
+| `source_metadata`     | `Optional[SourceMetadataSchema]`      | `None`                 | Metadata about the original source of the content. Refer to [SourceMetadataSchema](#sourcemetadataschema).       |
+| `content_metadata`    | `Optional[ContentMetadataSchema]`     | `None`                 | General metadata about the extracted content itself. Refer to [ContentMetadataSchema](#contentmetadataschema).    |
+| `audio_metadata`      | `Optional[AudioMetadataSchema]`       | `None`                 | Specific metadata for audio content. Automatically set to `None` if `content_metadata.type` is not `AUDIO`. Refer to [AudioMetadataSchema](#audiometadataschema). |
+| `text_metadata`       | `Optional[TextMetadataSchema]`        | `None`                 | Specific metadata for text content. Automatically set to `None` if `content_metadata.type` is not `TEXT`. Refer to [TextMetadataSchema](#textmetadataschema). |
+| `image_metadata`      | `Optional[ImageMetadataSchema]`       | `None`                 | Specific metadata for image content. Automatically set to `None` if `content_metadata.type` is not `IMAGE`. Refer to [ImageMetadataSchema](#imagemetadataschema). |
+| `table_metadata`      | `Optional[TableMetadataSchema]`       | `None`                 | Specific metadata for tabular content. Automatically set to `None` if `content_metadata.type` is not `STRUCTURED`. Refer to [TableMetadataSchema](#tablemetadataschema). |
+| `chart_metadata`      | `Optional[ChartMetadataSchema]`       | `None`                 | Specific metadata for chart content. Refer to [ChartMetadataSchema](#chartmetadataschema).                      |
+| `error_metadata`      | `Optional[ErrorMetadataSchema]`       | `None`                 | Metadata describing any errors encountered during processing. Refer to [ErrorMetadataSchema](#errormetadataschema). |
+| `info_message_metadata` | `Optional[InfoMessageMetadataSchema]` | `None`                 | Informational messages related to the processing. Refer to [InfoMessageMetadataSchema](#infomessagemetadataschema). |
 | `debug_metadata`      | `Optional[Dict[str, Any]]`            | `None`                 | A dictionary for storing any arbitrary debug information.                                                  |
 | `raise_on_failure`    | `bool`                                | `False`                | If `True`, indicates that processing should halt on failure.                                               |
 
@@ -147,7 +147,7 @@ General metadata about the extracted content.
 | `type`          | `ContentTypeEnum`                     | *Required*                        | The type of the extracted content (e.g., `TEXT`, `IMAGE`, `AUDIO`). Uses `ContentTypeEnum`.                |
 | `description`   | `str`                                 | `""`                              | A description of the extracted content.                                                                    |
 | `page_number`   | `int`                                 | `-1`                              | Page number from which the content was extracted, if applicable (e.g., for PDFs).                        |
-| `hierarchy`     | `ContentHierarchySchema`              | `ContentHierarchySchema()`        | Hierarchical information about the content's location within the source. See [ContentHierarchySchema](#contenthierarchyschema). |
+| `hierarchy`     | `ContentHierarchySchema`              | `ContentHierarchySchema()`        | Hierarchical information about the content's location within the source. Refer to [ContentHierarchySchema](#contenthierarchyschema). |
 | `subtype`       | `Union[ContentTypeEnum, str]`         | `""`                              | A more specific subtype for the content (e.g., if `type` is `IMAGE`, `subtype` could be `diagram`).      |
 | `start_time`    | `int`                                 | `-1`                              | Start time in milliseconds for time-based media (e.g., audio, video).                                    |
 | `end_time`      | `int`                                 | `-1`                              | End time in milliseconds for time-based media.                                                           |
@@ -162,18 +162,24 @@ Describes the structural location of content within a document.
 | `block`          | `int`                 | `-1`                       | Identifier for a block of content (e.g., paragraph, section).                                         |
 | `line`           | `int`                 | `-1`                       | Line number within a block, if applicable.                                                              |
 | `span`           | `int`                 | `-1`                       | Span identifier within a line, for finer granularity.                                                   |
-| `nearby_objects` | `NearbyObjectsSchema` | `NearbyObjectsSchema()`    | Information about objects (text, images, structured data) near the current content. See [NearbyObjectsSchema](#nearbyobjectsschema). |
+| `nearby_objects` | `NearbyObjectsSchema` | `NearbyObjectsSchema()`    | Information about objects (text, images, structured data) near the current content. Refer to [NearbyObjectsSchema](#nearbyobjectsschema). |
+
+<a id="nearbyobjectsschema"></a>
 
 ### `NearbyObjectsSchema` (Currently Unused)
+
 Container for different types of nearby objects.
 
 | Field        | Type                   | Default Value                | Description                                                              |
 |--------------|------------------------|------------------------------|--------------------------------------------------------------------------|
-| `text`       | `NearbyObjectsSubSchema` | `NearbyObjectsSubSchema()`   | Nearby textual objects. See [NearbyObjectsSubSchema](#nearbyobjectssubschema). |
+| `text`       | `NearbyObjectsSubSchema` | `NearbyObjectsSubSchema()`   | Nearby textual objects. Refer to [NearbyObjectsSubSchema](#nearbyobjectssubschema). |
 | `images`     | `NearbyObjectsSubSchema` | `NearbyObjectsSubSchema()`   | Nearby image objects.                                                    |
 | `structured` | `NearbyObjectsSubSchema` | `NearbyObjectsSubSchema()`   | Nearby structured data objects (e.g., tables).                           |
 
+<a id="nearbyobjectssubschema"></a>
+
 ### `NearbyObjectsSubSchema`
+
 Describes a list of nearby objects of a specific type.
 
 | Field     | Type          | Default Value        | Description                                                              |
@@ -243,7 +249,10 @@ Specific metadata for audio content.
 | `audio_transcript` | `str` | `""`          | Transcript of the audio content.                |
 | `audio_type`       | `str` | `""`          | Type or format of the audio (e.g., `mp3`, `wav`). |
 
+<a id="errormetadataschema"></a>
+
 ### `ErrorMetadataSchema` (Currently Unused)
+
 Metadata describing errors encountered during processing.
 
 | Field       | Type           | Default Value | Description                                                              |
@@ -253,7 +262,10 @@ Metadata describing errors encountered during processing.
 | `source_id` | `str`          | `""`          | Identifier of the source item that caused the error, if applicable.        |
 | `error_msg` | `str`          | *Required*    | The error message.                                                       |
 
+<a id="infomessagemetadataschema"></a>
+
 ### `InfoMessageMetadataSchema` (Currently Unused)
+
 Informational messages related to processing.
 
 | Field     | Type           | Default Value | Description                                                              |
@@ -282,7 +294,7 @@ The following enums are used by this schema:
 
 The following is an example JSON representation of metadata. 
 This is an example only, and does not contain the full metadata.
-For the full file, refer to the [data folder](https://github.com/NVIDIA/nv-ingest/blob/main/data/multimodal_test.json).
+For the full file, refer to the [data folder](https://github.com/NVIDIA/NeMo-Retriever/blob/main/data/multimodal_test.json).
 
 ```json
 {
@@ -374,4 +386,4 @@ For the full file, refer to the [data folder](https://github.com/NVIDIA/nv-inges
 
 ## Related Topics
 
-- [Environment Variables](environment-variables.md)
+- [Environment variables](environment-config.md)
diff --git a/docs/docs/extraction/contributing.md b/docs/docs/extraction/contributing.md
index 6a136c218..eb4a02e27 100644
--- a/docs/docs/extraction/contributing.md
+++ b/docs/docs/extraction/contributing.md
@@ -1,4 +1,9 @@
-# Contributing to NV-Ingest
+# Contributing to NeMo Retriever Library
 
-External contributions to NV-Ingest will be welcome soon, and they are greatly appreciated! 
-For more information, refer to [Contributing to NV-Ingest](https://github.com/NVIDIA/nv-ingest/blob/main/CONTRIBUTING.md).
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+External contributions will be welcome soon, and they are greatly appreciated! 
+For more information, refer to [Contributing to NeMo Retriever](https://github.com/NVIDIA/NeMo-Retriever/blob/main/CONTRIBUTING.md).
diff --git a/docs/docs/extraction/custom-metadata.md b/docs/docs/extraction/custom-metadata.md
index 805aef628..51a132d21 100644
--- a/docs/docs/extraction/custom-metadata.md
+++ b/docs/docs/extraction/custom-metadata.md
@@ -1,5 +1,10 @@
 # Use Custom Metadata to Filter Search Results
 
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
 You can upload custom metadata for documents during ingestion. 
 By uploading custom metadata you can attach additional information to documents, 
 and use it for filtering results during retrieval operations. 
@@ -56,14 +61,14 @@ meta_df.to_csv(file_path)
 ### Example: Add Custom Metadata During Ingestion
 
 The following example adds custom metadata during ingestion. 
-For more information about the `Ingestor` class, see [Use the Python API](nv-ingest-python-api.md).
-For more information about the `vdb_upload` method, see [Upload Data](data-store.md).
+For more information about the `Ingestor` class, refer to [Use the Python API](python-api-reference.md).
+For more information about the `vdb_upload` method, refer to [Upload Data](data-store.md).
 
 ```python
 from nv_ingest_client.client import Ingestor
 
 hostname="localhost"
-collection_name = "nv_ingest_collection"
+collection_name = "nemo_retriever_collection"
 sparse = True
 
 ingestor = ( 
@@ -145,7 +150,7 @@ The following example uses a filter expression to narrow results by department.
 from nv_ingest_client.util.milvus import nvingest_retrieval
 
 hostname="localhost"
-collection_name = "nv_ingest_collection"
+collection_name = "nemo_retriever_collection"
 sparse = True
 top_k = 5
 model_name="nvidia/llama-3.2-nv-embedqa-1b-v2"
@@ -176,5 +181,5 @@ print(f"{q_results}")
 
 ## Related Content
 
-- For a notebook that uses the CLI to add custom metadata and filter query results, see [metadata_and_filtered_search.ipynb
-](https://github.com/NVIDIA/nv-ingest/blob/main/examples/metadata_and_filtered_search.ipynb).
+- For a notebook that uses the CLI to add custom metadata and filter query results, refer to [metadata_and_filtered_search.ipynb
+](https://github.com/NVIDIA/NeMo-Retriever/blob/main/examples/metadata_and_filtered_search.ipynb).
diff --git a/docs/docs/extraction/data-store.md b/docs/docs/extraction/data-store.md
index 6b0d039ce..4e714a66b 100644
--- a/docs/docs/extraction/data-store.md
+++ b/docs/docs/extraction/data-store.md
@@ -4,7 +4,7 @@ Use this documentation to learn how [NeMo Retriever Library](overview.md) handle
 
 !!! note
 
-    NVIDIA Ingest (nv-ingest) has been renamed NeMo Retriever Library.
+    This documentation describes NeMo Retriever Library.
 
 
 ## Overview
@@ -20,10 +20,10 @@ It does not store the embeddings for images.
 
 !!! tip "Storing Extracted Images"
 
-    To persist extracted images, tables, and chart renderings to disk or object storage, use the `store` task in addition to `vdb_upload`. The `store` task supports any fsspec-compatible backend (local filesystem, S3, GCS, etc.). For details, refer to [Store Extracted Images](nv-ingest-python-api.md#store-extracted-images).
+    To persist extracted images, tables, and chart renderings to disk or object storage, use the `store` task in addition to `vdb_upload`. The `store` task supports any fsspec-compatible backend (local filesystem, S3, GCS, etc.). For details, refer to [Store Extracted Images](python-api-reference.md#store-extracted-images).
 
-NeMo Retriever Library supports uploading data by using the [Ingestor.vdb_upload API](nv-ingest-python-api.md).
-Currently, data upload is not supported through the [CLI](nv-ingest_cli.md).
+NeMo Retriever Library supports uploading data by using the [Ingestor.vdb_upload API](python-api-reference.md).
+Currently, data upload is not supported through the [CLI](cli-reference.md).
 
 
 
@@ -50,7 +50,7 @@ from nv_ingest_client.util.vdb.lancedb import LanceDB
 
 vdb = LanceDB(
     uri="lancedb",           # Path to LanceDB database directory
-    table_name="nv-ingest",  # Table name
+    table_name="nemo-retriever",  # Table name
     index_type="IVF_HNSW_SQ",  # Index type (default)
     hybrid=False,            # Enable hybrid search (BM25 FTS + vector)
 )
@@ -62,7 +62,7 @@ vdb.run(results)
 docs = vdb.retrieval(queries, top_k=10)
 ```
 
-When using the `Ingestor` with `vdb_upload`, the backend defaults to LanceDB unless you configure Milvus (see [Upload to Milvus](#upload-to-milvus)).
+When using the `Ingestor` with `vdb_upload`, the backend defaults to LanceDB unless you configure Milvus (refer to [Upload to Milvus](#upload-to-milvus)).
 
 ### Test harness configuration
 
@@ -79,10 +79,10 @@ Or via environment variables:
 
 ```bash
 # Switch to Milvus
-VDB_BACKEND=milvus uv run nv-ingest-harness-run --case=e2e --dataset=bo767
+VDB_BACKEND=milvus uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=bo767
 
 # Enable LanceDB hybrid search
-HYBRID=true uv run nv-ingest-harness-run --case=e2e --dataset=bo767
+HYBRID=true uv run python -m nv_ingest_harness.cli.run --case=e2e --dataset=bo767
 ```
 
 
@@ -140,7 +140,7 @@ You can delete all collections by deleting that volume, and then restarting the
 
 !!! tip
 
-    When you use the `vdb_upload` method, the behavior of the upload depends on the `return_failures` parameter of the `ingest` method. For details, refer to [Capture Job Failures](nv-ingest-python-api.md#capture-job-failures).
+    When you use the `vdb_upload` method, the behavior of the upload depends on the `return_failures` parameter of the `ingest` method. For details, refer to [Capture Job Failures](python-api-reference.md#capture-job-failures).
 
 To upload to Milvus, use code similar to the following to define your `Ingestor`.
 
@@ -173,13 +173,13 @@ NeMo Retriever Library does not provide connections to other data sources.
 
     NVIDIA makes no claim about accuracy, performance, or functionality of any vector database except Milvus. If you use a different vector database, it's your responsibility to test and maintain it.
 
-For more information, refer to [Build a Custom Vector Database Operator](https://github.com/NVIDIA/nv-ingest/blob/main/examples/building_vdb_operator.ipynb).
+For more information, refer to [Build a Custom Vector Database Operator](https://github.com/NVIDIA/NeMo-Retriever/blob/main/examples/building_vdb_operator.ipynb).
 
 
 
 ## Related Topics
 
-- [Use the NeMo Retriever Library Python API](nv-ingest-python-api.md)
-- [Store Extracted Images](nv-ingest-python-api.md#store-extracted-images)
+- [Use the NeMo Retriever Library Python API](python-api-reference.md)
+- [Store Extracted Images](python-api-reference.md#store-extracted-images)
 - [Environment Variables](environment-config.md)
 - [Troubleshoot Nemo Retriever Extraction](troubleshoot.md)
diff --git a/docs/docs/extraction/embedding-nims-models.md b/docs/docs/extraction/embedding-nims-models.md
new file mode 100644
index 000000000..48410f9d8
--- /dev/null
+++ b/docs/docs/extraction/embedding-nims-models.md
@@ -0,0 +1,12 @@
+# Embedding NIMs and models
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+Embeddings turn extracted text and multimodal content into vectors for semantic search. NeMo Retriever Library integrates with NVIDIA NIM microservices for embedding. Model names and compatibility vary by release; refer to the [Support matrix](support-matrix.md) and the [NVIDIA NIM catalog](https://build.nvidia.com/).
+
+For multimodal or VLM embeddings, refer to [Multimodal embeddings (VLM)](vlm-embed.md).
+
+After embedding, content is stored in a vector database; refer to [Vector databases](data-store.md). RAG-style collections are created and populated through your pipeline configuration and harness runs. For details, refer to [Benchmarking](benchmarking.md) and the [data store](data-store.md) documentation for your backend.
diff --git a/docs/docs/extraction/environment-config.md b/docs/docs/extraction/environment-config.md
index 2c7be750b..db776e348 100644
--- a/docs/docs/extraction/environment-config.md
+++ b/docs/docs/extraction/environment-config.md
@@ -5,7 +5,7 @@ You can specify these in your .env file or directly in your environment.
 
 !!! note
 
-    NVIDIA Ingest (nv-ingest) has been renamed NeMo Retriever Library.
+    This documentation describes NeMo Retriever Library.
 
 
 ## General Environment Variables
@@ -17,13 +17,13 @@ You can specify these in your .env file or directly in your environment.
 | `INGEST_LOG_LEVEL`               | - `DEBUG` <br/> - `INFO` <br/> - `WARNING` <br/> - `ERROR` <br/> - `CRITICAL` <br/> | The log level for the ingest service, which controls the verbosity of the logging output. |
 | `MESSAGE_CLIENT_HOST`            | - `redis` <br/> - `localhost` <br/> - `192.168.1.10` <br/> | Specifies the hostname or IP address of the message broker used for communication between services. |
 | `MESSAGE_CLIENT_PORT`            | - `7670` <br/> - `6379` <br/>                              | Specifies the port number on which the message broker is listening. |
-| `MINIO_BUCKET`                   | `nv-ingest` <br/>                                        | Name of MinIO bucket, used to store image, table, and chart extractions. |
+| `MINIO_BUCKET`                   | `nemo-retriever` <br/>                                        | Name of MinIO bucket, used to store image, table, and chart extractions. |
 | `NGC_API_KEY`                    | `nvapi-*************` <br/>                              | An authorized NGC API key, used to interact with hosted NIMs. To create an NGC key, go to [https://org.ngc.nvidia.com/setup/api-keys](https://org.ngc.nvidia.com/setup/api-keys). |
 | `NIM_NGC_API_KEY`                | —                                                          | The key that NIM microservices inside docker containers use to access NGC resources. This is necessary only in some cases when it is different from `NGC_API_KEY`. If this is not specified, `NGC_API_KEY` is used to access NGC resources. |
 | `OTEL_EXPORTER_OTLP_ENDPOINT`    | `http://otel-collector:4317` <br/>                       | The endpoint for the OpenTelemetry exporter, used for sending telemetry data. |
 | `REDIS_INGEST_TASK_QUEUE`        | `ingest_task_queue` <br/>                              | The name of the task queue in Redis where tasks are stored and processed. |
 | `REDIS_POOL_SIZE`                | - `50` (default) <br/> - `100` <br/> - `200` <br/>     | Maximum Redis connection pool size. Increase for high-concurrency workloads processing many documents in parallel. Default of 50 works well for most deployments. |
-| `IMAGE_STORAGE_URI`              | `s3://nv-ingest/artifacts/store/images` <br/>          | Default fsspec-compatible URI for the `store` task. Supports `s3://`, `file://`, `gs://`, etc. See [Store Extracted Images](nv-ingest-python-api.md#store-extracted-images). |
+| `IMAGE_STORAGE_URI`              | `s3://nemo-retriever/artifacts/store/images` <br/>          | Default fsspec-compatible URI for the `store` task. Supports `s3://`, `file://`, `gs://`, etc. Refer to [Store Extracted Images](python-api-reference.md#store-extracted-images). |
 | `IMAGE_STORAGE_PUBLIC_BASE_URL`  | `https://assets.example.com/images` <br/>              | Optional HTTP(S) base URL for serving stored images. |
 
 
diff --git a/docs/docs/extraction/evaluate-on-your-data.md b/docs/docs/extraction/evaluate-on-your-data.md
new file mode 100644
index 000000000..edede12fa
--- /dev/null
+++ b/docs/docs/extraction/evaluate-on-your-data.md
@@ -0,0 +1,22 @@
+# Evaluate on your data
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+Retrieval and ingestion performance **depend on your documents**, hardware, and pipeline settings. Use the following when measuring quality and throughput on **your** datasets.
+
+## Benchmarking and baselines
+
+Start with [Benchmarking](benchmarking.md) for methodology and baseline expectations. Combine with [Telemetry](telemetry.md) to observe production-like runs.
+
+## Throughput and dataset effects
+
+Read [Throughput is dataset-dependent](throughput-is-dataset-dependent.md) for why raw numbers from generic benchmarks may not match your corpus (layout complexity, file types, image density, and so on).
+
+## Operational tuning
+
+- [Resource scaling modes](scaling-modes.md)
+- [Support matrix](support-matrix.md) for supported configurations
+- [Troubleshoot](troubleshoot.md) when results or performance diverge from expectations
diff --git a/docs/docs/extraction/extraction-charts-infographics.md b/docs/docs/extraction/extraction-charts-infographics.md
new file mode 100644
index 000000000..1e2814c70
--- /dev/null
+++ b/docs/docs/extraction/extraction-charts-infographics.md
@@ -0,0 +1,14 @@
+# Charts and infographics
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+Charts and infographic regions are classified as graphic elements and processed with the corresponding NVIDIA NIM workflows (for example, **yolox-graphic-elements** in current releases). Outputs use the same metadata schema as other extracted objects.
+
+**Related**
+
+- [What is NeMo Retriever Library?](overview.md)
+- [Support matrix](support-matrix.md)
+- [Multimodal embeddings (VLM)](vlm-embed.md) when you treat graphics as images for embedding
diff --git a/docs/docs/extraction/extraction-ocr-scanned.md b/docs/docs/extraction/extraction-ocr-scanned.md
new file mode 100644
index 000000000..21eabb10b
--- /dev/null
+++ b/docs/docs/extraction/extraction-ocr-scanned.md
@@ -0,0 +1,14 @@
+# OCR and scanned documents
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+Scanned PDFs and image-only pages rely on OCR and hybrid paths that combine native text extraction with OCR when needed. For extract methods such as `ocr` and `pdfium_hybrid`, refer to the [Python API reference](python-api-reference.md).
+
+**Related**
+
+- [Text and layout extraction](text-layout-extraction.md)
+- [Nemotron Parse](nemoretriever-parse.md)
+- [Throughput is dataset-dependent](throughput-is-dataset-dependent.md)
diff --git a/docs/docs/extraction/extraction-tables.md b/docs/docs/extraction/extraction-tables.md
new file mode 100644
index 000000000..ec01624ca
--- /dev/null
+++ b/docs/docs/extraction/extraction-tables.md
@@ -0,0 +1,14 @@
+# Tables
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+NeMo Retriever Library detects tables as structured page elements, processes them through the appropriate NIMs, and exports formats suitable for downstream RAG (including Markdown-oriented representations where configured). Availability depends on pipeline and model configuration; refer to the [Support matrix](support-matrix.md).
+
+**Related**
+
+- [What is NeMo Retriever Library?](overview.md) for artifact classification
+- [Nemotron Parse](nemoretriever-parse.md) for advanced visual parsing
+- [Metadata reference](content-metadata.md)
diff --git a/docs/docs/extraction/faq.md b/docs/docs/extraction/faq.md
index 40b5ea376..00f8dddc9 100644
--- a/docs/docs/extraction/faq.md
+++ b/docs/docs/extraction/faq.md
@@ -4,7 +4,7 @@ This documentation contains the Frequently Asked Questions (FAQ) for [NeMo Retri
 
 !!! note
 
-    NVIDIA Ingest (nv-ingest) has been renamed NeMo Retriever Library.
+    This documentation describes NeMo Retriever Library.
 
 
 
@@ -12,8 +12,8 @@ This documentation contains the Frequently Asked Questions (FAQ) for [NeMo Retri
 
 You can use the CLI or Python APIs to perform extraction only, and then consume the results.
 Using the Python API, `results` is a list object with one entry.
-For code examples, see the Jupyter notebooks [Multimodal RAG with LlamaIndex](https://github.com/NVIDIA/nv-ingest/blob/main/examples/llama_index_multimodal_rag.ipynb) 
-and [Multimodal RAG with LangChain](https://github.com/NVIDIA/nv-ingest/blob/main/examples/langchain_multimodal_rag.ipynb).
+For code examples, refer to the Jupyter notebooks [Multimodal RAG with LlamaIndex](https://github.com/NVIDIA/NeMo-Retriever/blob/main/examples/llama_index_multimodal_rag.ipynb) 
+and [Multimodal RAG with LangChain](https://github.com/NVIDIA/NeMo-Retriever/blob/main/examples/langchain_multimodal_rag.ipynb).
 
 
 
@@ -32,7 +32,7 @@ For more information, refer to [Data Upload](data-store.md).
 For images that `nemoretriever-page-elements-v3` does not classify as tables, charts, or infographics,
 you can use our VLM caption task to create a dense caption of the detected image. 
 That caption is then be embedded along with the rest of your content. 
-For more information, refer to [Extract Captions from Images](nv-ingest-python-api.md#extract-captions-from-images).
+For more information, refer to [Extract Captions from Images](python-api-reference.md#extract-captions-from-images).
 
 
 
@@ -61,7 +61,7 @@ For production environments, you should use the provided Helm charts. For [libra
 
 For advanced scenarios, you might want to use library mode with self-hosted NIM instances. 
 You can set custom endpoints for each NIM. 
-For examples of `*_ENDPOINT` variables, refer to [nv-ingest/docker-compose.yaml](https://github.com/NVIDIA/nv-ingest/blob/main/docker-compose.yaml).
+For examples of `*_ENDPOINT` variables, refer to [docker-compose.yaml](https://github.com/NVIDIA/NeMo-Retriever/blob/main/docker-compose.yaml).
 
 
 
@@ -71,13 +71,13 @@ For examples of `*_ENDPOINT` variables, refer to [nv-ingest/docker-compose.yaml]
 
 ## What parameters or settings can I adjust to optimize extraction from my documents or data? 
 
-See the [Profile Information](quickstart-guide.md#profile-information) section 
+Refer to the [Profile Information](quickstart-guide.md#profile-information) section 
 for information about the optional NIM components of the pipeline.
 
-You can configure the `extract`, `caption`, and other tasks by using the [Ingestor API](nv-ingest-python-api.md).
+You can configure the `extract`, `caption`, and other tasks by using the [Ingestor API](python-api-reference.md).
 
 To choose what types of content to extract, use code similar to the following. 
-For more information, refer to [Extract Specific Elements from PDFs](nv-ingest-python-api.md#extract-specific-elements-from-pdfs).
+For more information, refer to [Extract Specific Elements from PDFs](python-api-reference.md#extract-specific-elements-from-pdfs).
 
 ```python
 Ingestor(client=client)
@@ -93,7 +93,7 @@ Ingestor(client=client)
 ```
 
 To generate captions for images, use code similar to the following.
-For more information, refer to [Extract Captions from Images](nv-ingest-python-api.md#extract-captions-from-images).
+For more information, refer to [Extract Captions from Images](python-api-reference.md#extract-captions-from-images).
 
 ```python
 Ingestor(client=client)
diff --git a/docs/docs/extraction/getting-started-about.md b/docs/docs/extraction/getting-started-about.md
new file mode 100644
index 000000000..01f1fe4db
--- /dev/null
+++ b/docs/docs/extraction/getting-started-about.md
@@ -0,0 +1,19 @@
+# About getting started
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+This section walks you from **access and prerequisites** through **first deployment** and **hands-on notebooks**.
+
+Typical order:
+
+1. [Get your API key](ngc-api-key.md) (NGC / API access as required by your workflow).
+2. Confirm [Prerequisites](prerequisites.md) and the [Support matrix](support-matrix.md) for your OS, GPU, and software stack.
+3. Deploy using one of:
+    - [Library mode](quickstart-library-mode.md) (without full stack containers where appropriate)
+    - [Helm Chart](helm.md) for Kubernetes environments
+4. Explore [Jupyter Notebooks](notebooks.md) for end-to-end examples.
+
+If you are new to the product, read [What is NeMo Retriever Library?](overview.md), [Key features](key-features.md), and [Concepts](concepts.md) under **Introduction** first.
diff --git a/docs/docs/extraction/helm.md b/docs/docs/extraction/helm.md
index 0983a382e..0097b0ac4 100644
--- a/docs/docs/extraction/helm.md
+++ b/docs/docs/extraction/helm.md
@@ -2,7 +2,11 @@
 
 # Deploy With Helm for NeMo Retriever Library
 
-To deploy [NeMo Retriever Library](overview.md) by using Helm, refer to [NV-Ingest Helm Charts](https://github.com/NVIDIA/NeMo-Retriever/blob/26.03/helm/README.md).
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+To deploy [NeMo Retriever Library](overview.md) by using Helm, refer to [NeMo Retriever Helm charts](https://github.com/NVIDIA/NeMo-Retriever/blob/26.03/helm/README.md).
 
 !!! note "Air-gapped environments"
    
diff --git a/docs/docs/extraction/hosted-nims-when-to-use.md b/docs/docs/extraction/hosted-nims-when-to-use.md
new file mode 100644
index 000000000..02e1a249d
--- /dev/null
+++ b/docs/docs/extraction/hosted-nims-when-to-use.md
@@ -0,0 +1,19 @@
+# When to use NVIDIA-hosted NIMs
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+[NVIDIA-hosted NIMs](https://build.nvidia.com/) run inference on NVIDIA-managed infrastructure. You call models with API keys (refer to [Get your API key](ngc-api-key.md)) without operating GPU nodes yourself.
+
+Consider hosted NIMs when:
+
+- You want the fastest path to try models and iterate without installing drivers, containers, or the [NIM Operator](https://docs.nvidia.com/nim-operator/latest/index.html) on your own clusters.
+- Latency to NVIDIA endpoints works for your region and use case.
+- Your compliance and data policies allow document or query content in the hosted service (confirm with your security review).
+
+For more information, refer to the following pages:
+
+- [NVIDIA NIM catalog](https://build.nvidia.com/)
+- [Compare deployment options](choose-your-path.md)
diff --git a/docs/docs/extraction/how-to-use-this-documentation.md b/docs/docs/extraction/how-to-use-this-documentation.md
new file mode 100644
index 000000000..889584aa7
--- /dev/null
+++ b/docs/docs/extraction/how-to-use-this-documentation.md
@@ -0,0 +1,27 @@
+# How to use this documentation
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+Use the sections below as a reading order that matches how you run NeMo Retriever Library.
+
+## NeMo Retriever Library (local or embedded)
+
+Start with the [Introduction](overview.md), [Concepts](concepts.md), and [Get started](getting-started-about.md) pages. Then follow [Prerequisites](prerequisites.md), [Quickstart: Library mode](quickstart-library-mode.md), and either the [Python API](python-api-reference.md) or [CLI](cli-reference.md). For deeper topics, refer to [Core workflows](v2-api-guide.md) and [Multimodal extraction](supported-file-types.md).
+
+## Microservices, Helm, and production clusters
+
+Follow [Choose your deployment](choose-your-path.md), [Deploy (Helm Chart)](helm.md), [Environment variables](environment-config.md), and the [V2 API guide](v2-api-guide.md). For operations topics, refer to [Scaling modes](scaling-modes.md), [Ray logging](ray-logging.md), [Telemetry](telemetry.md), and [Benchmarking](benchmarking.md).
+
+## NVIDIA Blueprints and end-to-end RAG
+
+For solution-level patterns, read [End-to-end RAG with NVIDIA Blueprints](resources-links.md), including links to [NVIDIA AI Blueprints](resources-links.md). These docs cover ingestion, embedding, and retrieval primitives that Blueprints combine into full applications.
+
+## Related
+
+The following pages supplement this overview:
+
+- [About getting started](getting-started-about.md), for a step-by-step first deployment
+- [Release notes](releasenotes.md)
diff --git a/docs/docs/extraction/image-captioning.md b/docs/docs/extraction/image-captioning.md
new file mode 100644
index 000000000..2ab11998f
--- /dev/null
+++ b/docs/docs/extraction/image-captioning.md
@@ -0,0 +1,14 @@
+# Image captioning
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+Image captioning generates natural-language descriptions for unstructured image content. Retrieval can then use text embeddings over captions and visual embeddings where you configure them.
+
+**Related**
+
+- [Multimodal embeddings (VLM)](vlm-embed.md)
+- [Metadata reference](content-metadata.md)
+- [What is NeMo Retriever Library?](overview.md)
diff --git a/docs/docs/extraction/integrations-langchain-llamaindex-haystack.md b/docs/docs/extraction/integrations-langchain-llamaindex-haystack.md
new file mode 100644
index 000000000..1f5f7dd8b
--- /dev/null
+++ b/docs/docs/extraction/integrations-langchain-llamaindex-haystack.md
@@ -0,0 +1,27 @@
+# Integrate with LangChain, LlamaIndex, and Haystack
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+NeMo Retriever Library is commonly used **behind** retrieval-augmented generation (RAG) apps built with popular orchestration frameworks.
+
+## Jupyter examples (LangChain and LlamaIndex)
+
+The repository includes notebooks that demonstrate multimodal RAG patterns:
+
+- [Multimodal RAG with LangChain](https://github.com/NVIDIA/NeMo-Retriever/blob/main/examples/langchain_multimodal_rag.ipynb)
+- [Multimodal RAG with LlamaIndex](https://github.com/NVIDIA/NeMo-Retriever/blob/main/examples/llama_index_multimodal_rag.ipynb)
+
+These are also linked from [Jupyter Notebooks](notebooks.md) and the [FAQ](faq.md).
+
+## Haystack
+
+Haystack-related extraction modes may appear in API tables as **deprecated** in favor of current pipeline options. For up-to-date integration patterns, prefer the Python API and CLI docs, and check [Release notes](releasenotes.md) for migration notes.
+
+## Related
+
+- [Use the Python API](python-api-reference.md)
+- [Use the CLI](cli-reference.md)
+- [Split documents](chunking.md), [Upload data](data-store.md), [Filter search](custom-metadata.md)
diff --git a/docs/docs/extraction/key-features.md b/docs/docs/extraction/key-features.md
new file mode 100644
index 000000000..14911dfb8
--- /dev/null
+++ b/docs/docs/extraction/key-features.md
@@ -0,0 +1,31 @@
+# Key features
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+NeMo Retriever Library is built for **accuracy**, **throughput**, and **flexible deployment** in enterprise retrieval pipelines.
+
+## Extraction and understanding
+
+- **Multimodal extraction** — Text, tables, charts, and infographics from documents and media formats supported by the [support matrix](support-matrix.md).
+- **Multiple extraction backends** — Trade off throughput and accuracy per document type (for example PDF via pdfium and [nemotron-parse](https://build.nvidia.com/nvidia/nemotron-parse)).
+- **Chunking and enrichment** — Splitting, transforms, filtering, embedding, and optional offload to storage.
+
+## Operations and scale
+
+- **High throughput** — Parallel extraction, embedding, and indexing designed for large document corpora.
+- **Configurable pipelines** — Jobs are described as JSON; you choose tasks per payload rather than a single fixed pipeline.
+- **Deployment options** — [Library mode](quickstart-library-mode.md), [Helm](helm.md), and [notebooks](notebooks.md) for different environments.
+
+## Enterprise-oriented behavior
+
+- **Composable services** — Use modules that fit your environment and security boundaries.
+- **Clear metadata** — Structured JSON results with extraction metadata, annotations, and trace data for observability.
+
+## Related topics
+
+- [Concepts](concepts.md) — Core ideas (jobs, pipelines, metadata).
+- [Choose your path](choose-your-path.md) — Pick a deployment and learning path.
+- [Overview](overview.md) — Full product description.
diff --git a/docs/docs/extraction/multimodal-metadata-schema.md b/docs/docs/extraction/multimodal-metadata-schema.md
new file mode 100644
index 000000000..fd68ff7af
--- /dev/null
+++ b/docs/docs/extraction/multimodal-metadata-schema.md
@@ -0,0 +1,8 @@
+# Metadata and content schema (multimodal extraction)
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+Extracted objects follow the schema and field descriptions in the [Metadata reference](content-metadata.md). Use that page for tables, types, and per-field notes.
diff --git a/docs/docs/extraction/nemo-retriever-api-reference.md b/docs/docs/extraction/nemo-retriever-api-reference.md
index 7e4f2c0e0..de30da043 100644
--- a/docs/docs/extraction/nemo-retriever-api-reference.md
+++ b/docs/docs/extraction/nemo-retriever-api-reference.md
@@ -1,5 +1,10 @@
 # NeMo Retriever API Reference
 
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
 ::: nemo_retriever.ingestor
 
 ::: nemo_retriever.retriever
diff --git a/docs/docs/extraction/nemoretriever-parse.md b/docs/docs/extraction/nemoretriever-parse.md
index 51fe136ed..5b5da7b8e 100644
--- a/docs/docs/extraction/nemoretriever-parse.md
+++ b/docs/docs/extraction/nemoretriever-parse.md
@@ -1,5 +1,10 @@
 # Advanced Visual Parsing with Nemotron Parse
 
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
 For scanned documents, or documents with complex layouts, 
 we recommend that you use [nemotron-parse](https://build.nvidia.com/nvidia/nemotron-parse). 
 Nemotron parse provides higher-accuracy text extraction. 
@@ -11,11 +16,6 @@ to run [NeMo Retriever Library](overview.md) with nemotron-parse.
 - Use NVIDIA Cloud Functions (NVCF) endpoints for cloud-based inference
 - Run the Ray batch pipeline with nemotron-parse ([library mode](quickstart-library-mode.md))
 
-!!! note
-
-    NVIDIA Ingest (nv-ingest) has been renamed NeMo Retriever Library.
-
-
 ## Limitations
 
 Currently, the limitations to using `nemotron-parse` with NeMo Retriever Library are the following:
@@ -61,7 +61,7 @@ Use the following procedure to run the NIM locally.
 
     !!! tip
 
-        For more Python examples, refer to [NV-Ingest: Python Client Quick Start Guide](https://github.com/NVIDIA/nv-ingest/blob/main/client/client_examples/examples/python_client_usage.ipynb).
+        For more Python examples, refer to [Python Quick Start Guide](https://github.com/NVIDIA/NeMo-Retriever/blob/main/client/client_examples/examples/python_client_usage.ipynb).
 
 
 ## Using NVCF Endpoints for Cloud-Based Inference
@@ -102,7 +102,7 @@ Instead of running the pipeline locally, you can use NVCF to perform inference b
 
     !!! tip
 
-        For more Python examples, refer to [NV-Ingest: Python Client Quick Start Guide](https://github.com/NVIDIA/nv-ingest/blob/main/client/client_examples/examples/python_client_usage.ipynb).
+        For more Python examples, refer to [Python Quick Start Guide](https://github.com/NVIDIA/NeMo-Retriever/blob/main/client/client_examples/examples/python_client_usage.ipynb).
 
 
 
@@ -110,4 +110,4 @@ Instead of running the pipeline locally, you can use NVCF to perform inference b
 
 - [Support Matrix](support-matrix.md)
 - [Troubleshoot Nemo Retriever Extraction](troubleshoot.md)
-- [Use the Python API](nv-ingest-python-api.md)
+- [Use the Python API](python-api-reference.md)
diff --git a/docs/docs/extraction/ngc-api-key.md b/docs/docs/extraction/ngc-api-key.md
index 8e205aca7..9ff911149 100644
--- a/docs/docs/extraction/ngc-api-key.md
+++ b/docs/docs/extraction/ngc-api-key.md
@@ -1,5 +1,10 @@
 # Generate Your NGC Keys
 
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
 NGC contains many public images, models, and datasets that can be pulled immediately without authentication. 
 To push and pull custom images, you must generate a key and authenticate with NGC.
 
diff --git a/docs/docs/extraction/nimclient.md b/docs/docs/extraction/nimclient.md
index 4f1d29c17..59044cc23 100644
--- a/docs/docs/extraction/nimclient.md
+++ b/docs/docs/extraction/nimclient.md
@@ -5,14 +5,14 @@ This documentation demonstrates how to create custom NIM integrations for use in
 
 !!! note
 
-    NVIDIA Ingest (nv-ingest) has been renamed NeMo Retriever Library.
+    This documentation describes NeMo Retriever Library.
 
 The NimClient architecture consists of two main components:
 
 1. **NimClient**: The client class that handles communication with NIM endpoints via gRPC or HTTP protocols
 2. **ModelInterface**: An abstract base class that defines how to format input data, parse output responses, and process inference results for specific models
 
-For advanced usage patterns, see the existing model interfaces in `api/src/nv_ingest_api/internal/primitives/nim/model_interface/`.
+For advanced usage patterns, refer to the existing model interfaces in `api/src/nv_ingest_api/internal/primitives/nim/model_interface/`.
 
 
 ## Quick Start
@@ -23,7 +23,7 @@ For advanced usage patterns, see the existing model interfaces in `api/src/nv_in
 from nv_ingest_api.util.nim import create_inference_client
 from nv_ingest_api.internal.primitives.nim import ModelInterface
 
-# Create a custom model interface (see examples below)
+# Create a custom model interface (refer to examples below)
 model_interface = MyCustomModelInterface()
 
 # Define endpoints (gRPC, HTTP)
diff --git a/docs/docs/extraction/notebooks.md b/docs/docs/extraction/notebooks.md
index b7748a01a..36a92b25f 100644
--- a/docs/docs/extraction/notebooks.md
+++ b/docs/docs/extraction/notebooks.md
@@ -1,33 +1,33 @@
 # Notebooks for NeMo Retriever Library
 
-To get started using [NeMo Retriever Library](overview.md), you can try one of the ready-made notebooks that are available.
-
 !!! note
 
-    NVIDIA Ingest (nv-ingest) has been renamed NeMo Retriever Library.
+    This documentation describes NeMo Retriever Library.
 
 
+To get started using [NeMo Retriever Library](overview.md), you can try one of the ready-made notebooks that are available.
+
 ## Dataset Downloads for Benchmarking
 
-If you plan to run benchmarking or evaluation tests, you must download the [Benchmark Datasets (Bo20, Bo767, Bo10k)](https://github.com/NVIDIA/nv-ingest/blob/main/evaluation/digital_corpora_download.ipynb) from Digital Corpora. This is a prerequisite for all benchmarking operations.
+If you plan to run benchmarking or evaluation tests, you must download the [Benchmark Datasets (Bo20, Bo767, Bo10k)](https://github.com/NVIDIA/NeMo-Retriever/blob/main/evaluation/digital_corpora_download.ipynb) from Digital Corpora. This is a prerequisite for all benchmarking operations.
 
 ## Getting Started
 
 To get started with the basics, try one of the following notebooks:
 
-- [NV-Ingest: CLI Client Quick Start Guide](https://github.com/NVIDIA/nv-ingest/blob/main/client/client_examples/examples/cli_client_usage.ipynb)
-- [NV-Ingest: Python Client Quick Start Guide](https://github.com/NVIDIA/nv-ingest/blob/main/client/client_examples/examples/python_client_usage.ipynb)
-- [How to add metadata to your documents and filter searches](https://github.com/NVIDIA/nv-ingest/blob/main/examples/metadata_and_filtered_search.ipynb)
-- [How to reindex a collection](https://github.com/NVIDIA/nv-ingest/blob/main/examples/reindex_example.ipynb)
+- [CLI Quick Start Guide](https://github.com/NVIDIA/NeMo-Retriever/blob/main/client/client_examples/examples/cli_client_usage.ipynb)
+- [Python Quick Start Guide](https://github.com/NVIDIA/NeMo-Retriever/blob/main/client/client_examples/examples/python_client_usage.ipynb)
+- [How to add metadata to your documents and filter searches](https://github.com/NVIDIA/NeMo-Retriever/blob/main/examples/metadata_and_filtered_search.ipynb)
+- [How to reindex a collection](https://github.com/NVIDIA/NeMo-Retriever/blob/main/examples/reindex_example.ipynb)
 
 
 For more advanced scenarios, try one of the following notebooks:
 
-- [Build a Custom Vector Database Operator](https://github.com/NVIDIA/nv-ingest/blob/main/examples/building_vdb_operator.ipynb)
-- [Try Enterprise RAG Blueprint](https://github.com/NVIDIA/nv-ingest/blob/main/deploy/pdf-blueprint.ipynb)
-- [Evaluate bo767 retrieval recall accuracy with NV-Ingest and Milvus](https://github.com/NVIDIA/nv-ingest/blob/main/evaluation/bo767_recall.ipynb)
-- [Multimodal RAG with LangChain](https://github.com/NVIDIA/nv-ingest/blob/main/examples/langchain_multimodal_rag.ipynb)
-- [Multimodal RAG with LlamaIndex](https://github.com/NVIDIA/nv-ingest/blob/main/examples/llama_index_multimodal_rag.ipynb)
+- [Build a Custom Vector Database Operator](https://github.com/NVIDIA/NeMo-Retriever/blob/main/examples/building_vdb_operator.ipynb)
+- [Try Enterprise RAG Blueprint](https://github.com/NVIDIA/NeMo-Retriever/blob/main/deploy/pdf-blueprint.ipynb)
+- [Evaluate bo767 retrieval recall accuracy with NeMo Retriever Library and Milvus](https://github.com/NVIDIA/NeMo-Retriever/blob/main/evaluation/bo767_recall.ipynb)
+- [Multimodal RAG with LangChain](https://github.com/NVIDIA/NeMo-Retriever/blob/main/examples/langchain_multimodal_rag.ipynb)
+- [Multimodal RAG with LlamaIndex](https://github.com/NVIDIA/NeMo-Retriever/blob/main/examples/llama_index_multimodal_rag.ipynb)
 
 
 
diff --git a/docs/docs/extraction/nv-ingest-python-api.md b/docs/docs/extraction/nv-ingest-python-api.md
index f0c5092ba..d7167d7ec 100644
--- a/docs/docs/extraction/nv-ingest-python-api.md
+++ b/docs/docs/extraction/nv-ingest-python-api.md
@@ -4,11 +4,11 @@ The [NeMo Retriever Library](overview.md) Python API provides a simple and flexi
 
 !!! note
 
-    NVIDIA Ingest (nv-ingest) has been renamed NeMo Retriever Library.
+    This documentation describes NeMo Retriever Library.
 
 !!! tip
 
-    There is a Jupyter notebook available to help you get started with the Python API. For more information, refer to [Python Client Quick Start Guide](https://github.com/NVIDIA/nv-ingest/blob/main/client/client_examples/examples/python_client_usage.ipynb).
+    There is a Jupyter notebook available to help you get started with the Python API. For more information, refer to [Python Client Quick Start Guide](https://github.com/NVIDIA/NeMo-Retriever/blob/main/client/client_examples/examples/python_client_usage.ipynb).
 
 
 ## Summary of Key Methods
@@ -478,11 +478,11 @@ The `store` task uses [fsspec](https://filesystem-spec.readthedocs.io/) for stor
 | Amazon S3 | `s3://` | `s3://my-bucket/extracted-images` |
 | Google Cloud Storage | `gs://` | `gs://my-bucket/images` |
 | Azure Blob Storage | `abfs://` | `abfs://container@account.dfs.core.windows.net/images` |
-| MinIO (S3-compatible) | `s3://` | `s3://nv-ingest/artifacts/store/images` (default) |
+| MinIO (S3-compatible) | `s3://` | `s3://nemo-retriever/artifacts/store/images` (default) |
 
 !!! tip
 
-    `storage_uri` defaults to the server-side `IMAGE_STORAGE_URI` environment variable (commonly `s3://nv-ingest/...`). If you change that variable—for example to a host-mounted `file://` path—restart the runtime so the container picks up the new value.
+    `storage_uri` defaults to the server-side `IMAGE_STORAGE_URI` environment variable (commonly `s3://nemo-retriever/...`). If you change that variable—for example to a host-mounted `file://` path—restart the runtime so the container picks up the new value.
 
 When `public_base_url` is provided, the metadata returned from `ingest()` surfaces that HTTP(S) link while still recording the underlying storage URI. Leave it unset when the storage endpoint itself is already publicly reachable.
 
@@ -520,18 +520,18 @@ ingestor = ingestor.store(
 
 ```bash
 # Set DATASET_ROOT before starting services
-export DATASET_ROOT=/raid/my-project/nv-ingest-data
+export DATASET_ROOT=/raid/my-project/nemo-retriever-data
 docker compose up -d
 ```
 
 ```python
-# Now /workspace/data maps to /raid/my-project/nv-ingest-data
+# Now /workspace/data maps to /raid/my-project/nemo-retriever-data
 ingestor = ingestor.store(
     structured=True,
     images=True,
     storage_uri="file:///workspace/data/extracted-images"
 )
-# Files save to /raid/my-project/nv-ingest-data/extracted-images on host
+# Files save to /raid/my-project/nemo-retriever-data/extracted-images on host
 ```
 
 For more information on environment variables, refer to [Environment Variables](environment-config.md).
diff --git a/docs/docs/extraction/nv-ingest_cli.md b/docs/docs/extraction/nv-ingest_cli.md
index 5ab8cd095..12a28d873 100644
--- a/docs/docs/extraction/nv-ingest_cli.md
+++ b/docs/docs/extraction/nv-ingest_cli.md
@@ -1,33 +1,38 @@
-# Use the NV-Ingest Command Line Interface
+# Use the NeMo Retriever Library command line interface
 
-After you install the Python dependencies, you can use the [NV-Ingest](overview.md) command line interface (CLI). 
-To use the CLI, use the `nv-ingest-cli` command.
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+After you install the Python dependencies, you can use the [NeMo Retriever Library](overview.md) command line interface (CLI). 
+To use the CLI, use the `nemo-retriever` command.
 
 To check the version of the CLI that you have installed, run the following command.
 
 ```bash
-nv-ingest-cli --version
+nemo-retriever --version
 ```
 
 To get a list of the current CLI commands and their options, run the following command.
 
 ```bash
-nv-ingest-cli --help
+nemo-retriever --help
 ```
 
 !!! tip
 
-    There is a Jupyter notebook available to help you get started with the CLI. For more information, refer to [CLI Client Quick Start Guide](https://github.com/NVIDIA/nv-ingest/blob/main/client/client_examples/examples/cli_client_usage.ipynb).
+    There is a Jupyter notebook available to help you get started with the CLI. For more information, refer to [CLI Client Quick Start Guide](https://github.com/NVIDIA/NeMo-Retriever/blob/main/client/client_examples/examples/cli_client_usage.ipynb).
 
 
 ## Examples
 
-Use the following code examples to submit a document to the `nv-ingest-ms-runtime` service.
+Use the following code examples to submit a document to the **ingestion runtime** service.
 
-Each of the following commands can be run from the host machine, or from within the `nv-ingest-ms-runtime` container.
+Each of the following commands can be run from the host machine, or from within the ingestion runtime container.
 
-- Host: `nv-ingest-cli ...`
-- Container: `nv-ingest-cli ...`
+- Host: `nemo-retriever ...`
+- Container: `nemo-retriever ...`
 
 
 ### Example: Text File With No Splitting
@@ -39,7 +44,7 @@ To submit a text file with no splitting, run the following code.
     You receive a response that contains a single document, which is the entire text file. The data that is returned is wrapped in the appropriate [metadata structure](content-metadata.md).
 
 ```bash
-nv-ingest-cli \
+nemo-retriever \
   --doc ./data/test.pdf \
   --client_host=localhost \
   --client_port=7670
@@ -51,7 +56,7 @@ nv-ingest-cli \
 To submit a .pdf file with only a splitting task, run the following code.
 
 ```bash
-nv-ingest-cli \
+nemo-retriever \
   --doc ./data/test.pdf \
   --output_directory ./processed_docs \
   --task='split' \
@@ -68,7 +73,7 @@ To submit a .pdf file with both a splitting task and an extraction task, run the
     Currently, `split` only works for pdfium and nemotron-parse.
 
 ```bash
-nv-ingest-cli \
+nemo-retriever \
   --doc ./data/test.pdf \
   --output_directory ./processed_docs \
   --task='extract:{"document_type": "pdf", "extract_method": "pdfium"}' \
@@ -91,7 +96,7 @@ This allows you to control how many pages are included in each PDF chunk during
     Smaller chunks provide more parallelism but increase overhead, while larger chunks reduce overhead but limit concurrency.
 
 ```bash
-nv-ingest-cli \
+nemo-retriever \
   --doc ./data/test.pdf \
   --output_directory ./processed_docs \
   --task='extract:{"document_type": "pdf", "extract_method": "pdfium", "extract_text": "true"}' \
@@ -106,7 +111,7 @@ nv-ingest-cli \
 To invoke image captioning and control reasoning:
 
 ```bash
-nv-ingest-cli \
+nemo-retriever \
   --doc ./data/test.pdf \
   --task='extract:{"document_type": "pdf", "extract_method": "pdfium", "extract_images": "true"}' \
   --task='caption:{"prompt": "Caption the content of this image:", "reasoning": true}' \
@@ -119,14 +124,14 @@ nv-ingest-cli \
 
 !!! tip
 
-  The caption service uses a default VLM which you can override by selecting other vision-language models to better match your image captioning needs. For more information, refer to [Extract Captions from Images](nv-ingest-python-api.md#extract-captions-from-images).
+  The caption service uses a default VLM which you can override by selecting other vision-language models to better match your image captioning needs. For more information, refer to [Extract Captions from Images](python-api-reference.md#extract-captions-from-images).
 
 Alternatively, you can use an environment variable to set the API version:
 
 ```bash
 export NV_INGEST_API_VERSION=v2
 
-nv-ingest-cli \
+nemo-retriever \
   --doc ./data/test.pdf \
   --output_directory ./processed_docs \
   --task='extract:{"document_type": "pdf", "extract_method": "pdfium", "extract_text": "true"}' \
@@ -142,7 +147,7 @@ To submit a dataset for processing, run the following code.
 To create a dataset, refer to [Command Line Dataset Creation with Enumeration and Sampling](#command-line-dataset-creation-with-enumeration-and-sampling).
 
 ```shell
-nv-ingest-cli \
+nemo-retriever \
   --dataset dataset.json \
   --output_directory ./processed_docs \
   --task='extract:{"document_type": "pdf", "extract_method": "pdfium"}' \
@@ -154,7 +159,7 @@ nv-ingest-cli \
 Submit a PDF file with extraction tasks and upload extracted images to MinIO.
 
 ```bash
-nv-ingest-cli \
+nemo-retriever \
   --doc ./data/test.pdf \
   --output_directory ./processed_docs \
   --task='extract:{"document_type": "pdf", "extract_method": "pdfium"}' \
diff --git a/docs/docs/extraction/overview.md b/docs/docs/extraction/overview.md
index ed1c7b206..bf9476657 100644
--- a/docs/docs/extraction/overview.md
+++ b/docs/docs/extraction/overview.md
@@ -1,12 +1,10 @@
 # What is NeMo Retriever Library?
 
-NeMo Retriever Library is a scalable, performance-oriented document content and metadata extraction microservice. 
-NeMo Retriever Library uses specialized NVIDIA NIM microservices 
-to find, contextualize, and extract text, tables, charts and infographics that you can use in downstream generative applications.
+NVIDIA NeMo Retriever Library (NRL) is a scalable, performance-oriented framework for document content and metadata extraction. It supports both NVIDIA NIM microservices and a wide range of models to find, contextualize, and extract text, tables, charts, and infographics for use in downstream generative and retrieval-augmented applications.
 
 !!! note
 
-    NVIDIA Ingest (nv-ingest) has been renamed NeMo Retriever Library.
+    This documentation describes NeMo Retriever Library.
 
 NeMo Retriever Library enables parallelization of splitting documents into pages where artifacts are classified (such as text, tables, charts, and infographics), extracted, and further contextualized through optical character recognition (OCR) into a well defined JSON schema. 
 From there, NeMo Retriever Library can optionally manage computation of embeddings for the extracted content, 
@@ -14,7 +12,7 @@ and optionally manage storing into a vector database ([LanceDB](https://lancedb.
 
 !!! note
 
-    Cached and Deplot are deprecated. Instead, NeMo Retriever Library now uses the yolox-graphic-elements NIM. With this change, you should now be able to run NeMo Retriever Library on a single 24GB A10G or better GPU. If you want to use the old pipeline, with Cached and Deplot, use the [NeMo Retriever Library 24.12.1 release](https://github.com/NVIDIA/nv-ingest/tree/24.12.1).
+    Cached and Deplot are deprecated. Instead, NeMo Retriever Library now uses the yolox-graphic-elements NIM. With this change, you should now be able to run NeMo Retriever Library on a single 24GB A10G or better GPU. If you want to use the old pipeline, with Cached and Deplot, use the [NeMo Retriever Library 24.12.1 release](https://github.com/NVIDIA/NeMo-Retriever/tree/24.12.1).
 
 
 
diff --git a/docs/docs/extraction/prerequisites.md b/docs/docs/extraction/prerequisites.md
index 3f7947f58..e76c94dea 100644
--- a/docs/docs/extraction/prerequisites.md
+++ b/docs/docs/extraction/prerequisites.md
@@ -4,7 +4,7 @@ Before you begin using [NeMo Retriever Library](overview.md), ensure the followi
 
 !!! note
 
-    NVIDIA Ingest (nv-ingest) has been renamed NeMo Retriever Library.
+    This documentation describes NeMo Retriever Library.
 
 
 
@@ -60,7 +60,7 @@ For production deployments processing large volumes of documents, consider:
 - Additional CPU cores for improved parallel processing
 - Multiple GPUs for distributed processing workloads
 
-For guidance on choosing between static and dynamic scaling modes, and how to configure them in `docker-compose.yaml`, see [Scaling Modes](scaling-modes.md).
+For guidance on choosing between static and dynamic scaling modes, and how to configure them in `docker-compose.yaml`, refer to [Scaling Modes](scaling-modes.md).
 
 
 
diff --git a/docs/docs/extraction/production-checklist.md b/docs/docs/extraction/production-checklist.md
new file mode 100644
index 000000000..809d4fc18
--- /dev/null
+++ b/docs/docs/extraction/production-checklist.md
@@ -0,0 +1,24 @@
+# Production checklist
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+Use this checklist before you run NeMo Retriever Library in production. Pair it with [Troubleshoot](troubleshoot.md) and your internal security review.
+
+**Security and access**
+
+- [ ] API keys and secrets follow least privilege ([Get your API key](ngc-api-key.md), [Environment variables](environment-config.md)).
+- [ ] Network policies match hosted versus self-hosted NIM choices ([When to use NVIDIA-hosted NIMs](hosted-nims-when-to-use.md), [When to self-host NIMs](self-host-nims-when-to-use.md)).
+
+**Operations**
+
+- [ ] Scaling model matches workload ([Scaling modes](scaling-modes.md)).
+- [ ] Logging and Ray workers are observable ([Ray logging](ray-logging.md)).
+- [ ] Telemetry is wired for your stack ([Telemetry](telemetry.md)).
+
+**Quality and capacity**
+
+- [ ] Benchmarks or load tests cover expected concurrency ([Benchmarking](benchmarking.md)).
+- [ ] Dataset-specific throughput expectations are set ([Throughput is dataset-dependent](throughput-is-dataset-dependent.md)).
diff --git a/docs/docs/extraction/published-metrics-comparisons.md b/docs/docs/extraction/published-metrics-comparisons.md
new file mode 100644
index 000000000..2e289ab6e
--- /dev/null
+++ b/docs/docs/extraction/published-metrics-comparisons.md
@@ -0,0 +1,14 @@
+# Published metrics and comparisons
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+Published scores for parsing and retrieval components (including table-structure metrics such as TEDS where they apply) appear in NVIDIA papers, model cards, and product announcements. Treat published benchmarks as reference points, not guarantees on your own documents. Always [evaluate on your data](evaluate-on-your-data.md).
+
+**Starting points**
+
+- [NVIDIA NIM catalog](https://build.nvidia.com/) for model and NIM listings
+- [Benchmarking](benchmarking.md) for how this repository measures recall and performance
+- [Throughput is dataset-dependent](throughput-is-dataset-dependent.md)
diff --git a/docs/docs/extraction/python-api-reference.md b/docs/docs/extraction/python-api-reference.md
index 6853c8968..2911ae3b0 100644
--- a/docs/docs/extraction/python-api-reference.md
+++ b/docs/docs/extraction/python-api-reference.md
@@ -4,7 +4,7 @@ The [NeMo Retriever Library](overview.md) Python API provides a simple and flexi
 
 !!! note
 
-    NVIDIA Ingest (nv-ingest) has been renamed NeMo Retriever Library.
+    This documentation describes NeMo Retriever Library.
 
 !!! tip
 
@@ -570,11 +570,11 @@ The `store` task uses [fsspec](https://filesystem-spec.readthedocs.io/) for stor
 | Amazon S3 | `s3://` | `s3://my-bucket/extracted-images` |
 | Google Cloud Storage | `gs://` | `gs://my-bucket/images` |
 | Azure Blob Storage | `abfs://` | `abfs://container@account.dfs.core.windows.net/images` |
-| MinIO (S3-compatible) | `s3://` | `s3://nv-ingest/artifacts/store/images` (default) |
+| MinIO (S3-compatible) | `s3://` | `s3://nemo-retriever/artifacts/store/images` (default) |
 
 !!! tip
 
-    `storage_uri` defaults to the server-side `IMAGE_STORAGE_URI` environment variable (commonly `s3://nv-ingest/...`). If you change that variable—for example to a host-mounted `file://` path—restart the NeMo Retriever Library runtime so the container picks up the new value.
+    `storage_uri` defaults to the server-side `IMAGE_STORAGE_URI` environment variable (commonly `s3://nemo-retriever/...`). If you change that variable—for example to a host-mounted `file://` path—restart the NeMo Retriever Library runtime so the container picks up the new value.
 
 When `public_base_url` is provided, the metadata returned from `ingest()` surfaces that HTTP(S) link while still recording the underlying storage URI. Leave it unset when the storage endpoint itself is already publicly reachable.
 
diff --git a/docs/docs/extraction/quickstart-guide.md b/docs/docs/extraction/quickstart-guide.md
index 0077cd529..f27c065f5 100644
--- a/docs/docs/extraction/quickstart-guide.md
+++ b/docs/docs/extraction/quickstart-guide.md
@@ -1,26 +1,31 @@
 # Deploy With Docker Compose (Self-Hosted) for NeMo Retriever Library
 
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
 This guide helps you get started using [NeMo Retriever Library](overview.md) in self-hosted mode.
 
 
 ## Step 1: Start Containers
 
-Use the provided [docker-compose.yaml](https://github.com/NVIDIA/nv-ingest/blob/main/docker-compose.yaml) to start all needed services with a few commands.
+Use the provided [docker-compose.yaml](https://github.com/NVIDIA/NeMo-Retriever/blob/main/docker-compose.yaml) to start all needed services with a few commands.
 
 !!! warning
 
     NIM containers on their first startup can take 10-15 minutes to pull and fully load models.
 
 
-If you prefer, you can run on Kubernetes by using [our Helm chart](https://github.com/NVIDIA/nv-ingest/blob/main/helm/README.md). Also, there are [additional environment variables](environment-config.md) you can configure.
+If you prefer, you can run on Kubernetes by using [our Helm chart](https://github.com/NVIDIA/NeMo-Retriever/blob/main/helm/README.md). Also, there are [additional environment variables](environment-config.md) you can configure.
 
 a. Git clone the repo:
 
-    `git clone https://github.com/nvidia/nv-ingest`
+    `git clone https://github.com/NVIDIA/NeMo-Retriever`
 
 b. Change the directory to the cloned repo by running the following code.
    
-    `cd nv-ingest`.
+    `cd NeMo-Retriever`.
 
 c. [Generate API keys](ngc-api-key.md) and authenticate with NGC with the `docker login` command.
 
@@ -48,17 +53,17 @@ e. Make sure that NVIDIA is set as your default container runtime before you run
 
     `sudo nvidia-ctk runtime configure --runtime=docker --set-as-default`
 
-f. Start core services. By default, the pipeline uses **LanceDB** as the vector database (embedded, in-process); no extra Docker profile is required. If you want to use **Milvus** instead, start with the retrieval profile. This example uses the retrieval profile to run Milvus. For more information about other profiles, see [Profile Information](#profile-information).
+f. Start core services. By default, the pipeline uses **LanceDB** as the vector database (embedded, in-process); no extra Docker profile is required. If you want to use **Milvus** instead, start with the retrieval profile. This example uses the retrieval profile to run Milvus. For more information about other profiles, refer to [Profile Information](#profile-information).
 
     `docker compose --profile retrieval up`
 
     !!! tip "LanceDB (default)"
 
-        To use the default LanceDB backend, you can run `docker compose up` without `--profile retrieval`. LanceDB runs in-process and does not require Milvus, etcd, or MinIO. For details, see [Data Upload](data-store.md).
+        To use the default LanceDB backend, you can run `docker compose up` without `--profile retrieval`. LanceDB runs in-process and does not require Milvus, etcd, or MinIO. For details, refer to [Data Upload](data-store.md).
 
     !!! tip
 
-        By default, we have [configured log levels to be verbose](https://github.com/NVIDIA/nv-ingest/blob/main/docker-compose.yaml). It's possible to observe service startup proceeding. You will notice a lot of log messages. Disable verbose logging by configuring `NIM_TRITON_LOG_VERBOSE=0` for each NIM in [docker-compose.yaml](https://github.com/NVIDIA/nv-ingest/blob/main/docker-compose.yaml).
+        By default, we have [configured log levels to be verbose](https://github.com/NVIDIA/NeMo-Retriever/blob/main/docker-compose.yaml). It's possible to observe service startup proceeding. You will notice a lot of log messages. Disable verbose logging by configuring `NIM_TRITON_LOG_VERBOSE=0` for each NIM in [docker-compose.yaml](https://github.com/NVIDIA/NeMo-Retriever/blob/main/docker-compose.yaml).
 
     !!! tip
 
@@ -86,28 +91,28 @@ h. Run the command `docker ps`. You should see output similar to the following.
 
     ```
     CONTAINER ID  IMAGE                                            COMMAND                 CREATED         STATUS                  PORTS            NAMES
-    1b885f37c991  nvcr.io/nvidia/nemo-microservices/nv-ingest:...  "/usr/bin/tini -- /w…"  7 minutes ago   Up 7 minutes (healthy)  0.0.0.0:7670...  nv-ingest-nv-ingest-ms-runtime-1
+    1b885f37c991  nvcr.io/nvidia/nemo-microservices/...            "/usr/bin/tini -- /w…"  7 minutes ago   Up 7 minutes (healthy)  0.0.0.0:7670...  nemo-retriever-ms-runtime-1
     14ef31ed7f49  milvusdb/milvus:v2.5.3-gpu                       "/tini -- bash -c 's…"  7 minutes ago   Up 7 minutes (healthy)  0.0.0.0:9091...  milvus-standalone
-    dceaf36cc5df  otel/opentelemetry-collector-contrib:...         "/otelcol-contrib --…"  7 minutes ago   Up 7 minutes            0.0.0.0:4317...  nv-ingest-otel-collector-1
-    5bd0b48eb71b  nvcr.io/nim/nvidia/nemoretriever-graphic-ele...  "/opt/nvidia/nvidia_…"  7 minutes ago   Up 7 minutes            0.0.0.0:8003...  nv-ingest-graphic-elements-1
-    daf878669036  nvcr.io/nim/nvidia/nemoretriever-ocr-v1:1.2.1    "/opt/nvidia/nvidia_…"  7 minutes ago   Up 7 minutes            0.0.0.0:8009...  nv-ingest-ocr-1
-    216bdf11c566  nvcr.io/nim/nvidia/nemoretriever-page-elements-v3:1.7.0  "/opt/nvidia/nvidia_…"  7 minutes ago   Up 7 minutes            0.0.0.0:8000...  nv-ingest-page-elements-1
-    aee9580b0b9a  nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2:1.10.0  "/opt/nvidia/nvidia_…"  7 minutes ago   Up 7 minutes            0.0.0.0:8012...  nv-ingest-embedding-1
-    178a92bf6f7f  nvcr.io/nim/nvidia/nemoretriever-table-struc...  "/opt/nvidia/nvidia_…"  7 minutes ago   Up 7 minutes            0.0.0.0:8006...  nv-ingest-table-structure-1
-    7ddbf7690036  openzipkin/zipkin                                "start-zipkin"          7 minutes ago   Up 7 minutes (healthy)  9410/tcp...      nv-ingest-zipkin-1
+    dceaf36cc5df  otel/opentelemetry-collector-contrib:...         "/otelcol-contrib --…"  7 minutes ago   Up 7 minutes            0.0.0.0:4317...  nemo-retriever-otel-collector-1
+    5bd0b48eb71b  nvcr.io/nim/nvidia/nemoretriever-graphic-ele...  "/opt/nvidia/nvidia_…"  7 minutes ago   Up 7 minutes            0.0.0.0:8003...  nemo-retriever-graphic-elements-1
+    daf878669036  nvcr.io/nim/nvidia/nemoretriever-ocr-v1:1.2.1    "/opt/nvidia/nvidia_…"  7 minutes ago   Up 7 minutes            0.0.0.0:8009...  nemo-retriever-ocr-1
+    216bdf11c566  nvcr.io/nim/nvidia/nemoretriever-page-elements-v3:1.7.0  "/opt/nvidia/nvidia_…"  7 minutes ago   Up 7 minutes            0.0.0.0:8000...  nemo-retriever-page-elements-1
+    aee9580b0b9a  nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2:1.10.0  "/opt/nvidia/nvidia_…"  7 minutes ago   Up 7 minutes            0.0.0.0:8012...  nemo-retriever-embedding-1
+    178a92bf6f7f  nvcr.io/nim/nvidia/nemoretriever-table-struc...  "/opt/nvidia/nvidia_…"  7 minutes ago   Up 7 minutes            0.0.0.0:8006...  nemo-retriever-table-structure-1
+    7ddbf7690036  openzipkin/zipkin                                "start-zipkin"          7 minutes ago   Up 7 minutes (healthy)  9410/tcp...      nemo-retriever-zipkin-1
     b73bbe0c202d  minio/minio:RELEASE.2023-03-20T20-16-18Z         "/usr/bin/docker-ent…"  7 minutes ago   Up 7 minutes (healthy)  0.0.0.0:9000...  minio
-    97fa798dbe4f  prom/prometheus:latest                           "/bin/prometheus --w…"  7 minutes ago   Up 7 minutes            0.0.0.0:9090...  nv-ingest-prometheus-1
+    97fa798dbe4f  prom/prometheus:latest                           "/bin/prometheus --w…"  7 minutes ago   Up 7 minutes            0.0.0.0:9090...  nemo-retriever-prometheus-1
     f17cb556b086  grafana/grafana                                  "/run.sh"               7 minutes ago   Up 7 minutes            0.0.0.0:3000...  grafana-service
-    3403c5a0e7be  redis/redis-stack                                "/entrypoint.sh"        7 minutes ago   Up 7 minutes            0.0.0.0:6379...  nv-ingest-redis-1
+    3403c5a0e7be  redis/redis-stack                                "/entrypoint.sh"        7 minutes ago   Up 7 minutes            0.0.0.0:6379...  nemo-retriever-redis-1
     ```
 
 ## Step 2: Ingest Documents
 
-You can submit jobs programmatically in Python or using the [CLI](nv-ingest_cli.md).
+You can submit jobs programmatically in Python or using the [CLI](cli-reference.md).
 
 !!! important "Python version"
 
-    Install the client and CLI into an environment that uses Python 3.12 or later. The published packages require Python `>= 3.12`; using Python 3.10 or 3.11 typically fails with dependency resolution errors. See [Prerequisites](prerequisites.md) and [Support Matrix](support-matrix.md).
+    Install the client and CLI into an environment that uses Python 3.12 or later. The published packages require Python `>= 3.12`; using Python 3.10 or 3.11 typically fails with dependency resolution errors. Refer to [Prerequisites](prerequisites.md) and [Support Matrix](support-matrix.md).
 
 The following examples demonstrate how to extract text, charts, tables, and images:
 
@@ -121,7 +126,7 @@ The following examples demonstrate how to extract text, charts, tables, and imag
 
 !!! tip
 
-    For more Python examples, refer to [NV-Ingest: Python Client Quick Start Guide](https://github.com/NVIDIA/nv-ingest/blob/main/client/client_examples/examples/python_client_usage.ipynb).
+    For more Python examples, refer to the [Python Quick Start Guide](https://github.com/NVIDIA/NeMo-Retriever/blob/main/client/client_examples/examples/python_client_usage.ipynb).
 
 <a id="ingest_python_example"></a>
 ```python
@@ -258,15 +263,15 @@ image_caption:[]
 
 ```
 
-### Using the `nv-ingest-cli`
+### Using the CLI
 
 !!! tip
 
-    There is a Jupyter notebook available to help you get started with the CLI. For more information, refer to [CLI Client Quick Start Guide](https://github.com/NVIDIA/nv-ingest/blob/main/client/client_examples/examples/cli_client_usage.ipynb).
+    There is a Jupyter notebook available to help you get started with the CLI. For more information, refer to the [CLI Quick Start Guide](https://github.com/NVIDIA/NeMo-Retriever/blob/main/client/client_examples/examples/cli_client_usage.ipynb).
 
 <a id="ingest_cli_example"></a>
 ```shell
-nv-ingest-cli \
+nemo-retriever \
   --doc ./data/multimodal_test.pdf \
   --output_directory ./processed_docs \
   --task='extract:{"document_type": "pdf", "extract_method": "pdfium", "extract_tables": "true", "extract_images": "true", "extract_charts": "true"}' \
@@ -282,47 +287,47 @@ None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be ava
 [nltk_data]     /path/to/your/venv/lib/python3.12/site-
 [nltk_data]     packages/llama_index/core/_static/nltk_cache...
 [nltk_data]   Package punkt_tab is already up-to-date!
-INFO:nv_ingest_client.nv_ingest_cli:Processing 1 documents.
-INFO:nv_ingest_client.nv_ingest_cli:Output will be written to: ./processed_docs
+INFO:retriever_client.cli:Processing 1 documents.
+INFO:retriever_client.cli:Output will be written to: ./processed_docs
 Processing files: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.34s/file, pages_per_sec=1.28]
-INFO:nv_ingest_client.cli.util.processing:message_broker_task_source: Avg: 2.39 ms, Median: 2.39 ms, Total Time: 2.39 ms, Total % of Trace Computation: 0.06%
-INFO:nv_ingest_client.cli.util.processing:broker_source_network_in: Avg: 9.51 ms, Median: 9.51 ms, Total Time: 9.51 ms, Total % of Trace Computation: 0.25%
-INFO:nv_ingest_client.cli.util.processing:job_counter: Avg: 1.47 ms, Median: 1.47 ms, Total Time: 1.47 ms, Total % of Trace Computation: 0.04%
-INFO:nv_ingest_client.cli.util.processing:job_counter_channel_in: Avg: 0.46 ms, Median: 0.46 ms, Total Time: 0.46 ms, Total % of Trace Computation: 0.01%
-INFO:nv_ingest_client.cli.util.processing:metadata_injection: Avg: 3.52 ms, Median: 3.52 ms, Total Time: 3.52 ms, Total % of Trace Computation: 0.09%
-INFO:nv_ingest_client.cli.util.processing:metadata_injection_channel_in: Avg: 0.16 ms, Median: 0.16 ms, Total Time: 0.16 ms, Total % of Trace Computation: 0.00%
-INFO:nv_ingest_client.cli.util.processing:pdf_content_extractor: Avg: 475.64 ms, Median: 163.77 ms, Total Time: 2378.21 ms, Total % of Trace Computation: 62.73%
-INFO:nv_ingest_client.cli.util.processing:pdf_content_extractor_channel_in: Avg: 0.31 ms, Median: 0.31 ms, Total Time: 0.31 ms, Total % of Trace Computation: 0.01%
-INFO:nv_ingest_client.cli.util.processing:image_content_extractor: Avg: 0.67 ms, Median: 0.67 ms, Total Time: 0.67 ms, Total % of Trace Computation: 0.02%
-INFO:nv_ingest_client.cli.util.processing:image_content_extractor_channel_in: Avg: 0.21 ms, Median: 0.21 ms, Total Time: 0.21 ms, Total % of Trace Computation: 0.01%
-INFO:nv_ingest_client.cli.util.processing:docx_content_extractor: Avg: 0.46 ms, Median: 0.46 ms, Total Time: 0.46 ms, Total % of Trace Computation: 0.01%
-INFO:nv_ingest_client.cli.util.processing:docx_content_extractor_channel_in: Avg: 0.20 ms, Median: 0.20 ms, Total Time: 0.20 ms, Total % of Trace Computation: 0.01%
-INFO:nv_ingest_client.cli.util.processing:pptx_content_extractor: Avg: 0.68 ms, Median: 0.68 ms, Total Time: 0.68 ms, Total % of Trace Computation: 0.02%
-INFO:nv_ingest_client.cli.util.processing:pptx_content_extractor_channel_in: Avg: 0.46 ms, Median: 0.46 ms, Total Time: 0.46 ms, Total % of Trace Computation: 0.01%
-INFO:nv_ingest_client.cli.util.processing:audio_data_extraction: Avg: 1.08 ms, Median: 1.08 ms, Total Time: 1.08 ms, Total % of Trace Computation: 0.03%
-INFO:nv_ingest_client.cli.util.processing:audio_data_extraction_channel_in: Avg: 0.20 ms, Median: 0.20 ms, Total Time: 0.20 ms, Total % of Trace Computation: 0.01%
-INFO:nv_ingest_client.cli.util.processing:dedup_images: Avg: 0.42 ms, Median: 0.42 ms, Total Time: 0.42 ms, Total % of Trace Computation: 0.01%
-INFO:nv_ingest_client.cli.util.processing:dedup_images_channel_in: Avg: 0.42 ms, Median: 0.42 ms, Total Time: 0.42 ms, Total % of Trace Computation: 0.01%
-INFO:nv_ingest_client.cli.util.processing:filter_images: Avg: 0.59 ms, Median: 0.59 ms, Total Time: 0.59 ms, Total % of Trace Computation: 0.02%
-INFO:nv_ingest_client.cli.util.processing:filter_images_channel_in: Avg: 0.57 ms, Median: 0.57 ms, Total Time: 0.57 ms, Total % of Trace Computation: 0.02%
-INFO:nv_ingest_client.cli.util.processing:table_data_extraction: Avg: 240.75 ms, Median: 240.75 ms, Total Time: 481.49 ms, Total % of Trace Computation: 12.70%
-INFO:nv_ingest_client.cli.util.processing:table_data_extraction_channel_in: Avg: 0.38 ms, Median: 0.38 ms, Total Time: 0.38 ms, Total % of Trace Computation: 0.01%
-INFO:nv_ingest_client.cli.util.processing:chart_data_extraction: Avg: 300.54 ms, Median: 299.94 ms, Total Time: 901.62 ms, Total % of Trace Computation: 23.78%
-INFO:nv_ingest_client.cli.util.processing:chart_data_extraction_channel_in: Avg: 0.23 ms, Median: 0.23 ms, Total Time: 0.23 ms, Total % of Trace Computation: 0.01%
-INFO:nv_ingest_client.cli.util.processing:infographic_data_extraction: Avg: 0.77 ms, Median: 0.77 ms, Total Time: 0.77 ms, Total % of Trace Computation: 0.02%
-INFO:nv_ingest_client.cli.util.processing:infographic_data_extraction_channel_in: Avg: 0.25 ms, Median: 0.25 ms, Total Time: 0.25 ms, Total % of Trace Computation: 0.01%
-INFO:nv_ingest_client.cli.util.processing:caption_ext: Avg: 0.55 ms, Median: 0.55 ms, Total Time: 0.55 ms, Total % of Trace Computation: 0.01%
-INFO:nv_ingest_client.cli.util.processing:caption_ext_channel_in: Avg: 0.51 ms, Median: 0.51 ms, Total Time: 0.51 ms, Total % of Trace Computation: 0.01%
-INFO:nv_ingest_client.cli.util.processing:embed_text: Avg: 1.21 ms, Median: 1.21 ms, Total Time: 1.21 ms, Total % of Trace Computation: 0.03%
-INFO:nv_ingest_client.cli.util.processing:embed_text_channel_in: Avg: 0.21 ms, Median: 0.21 ms, Total Time: 0.21 ms, Total % of Trace Computation: 0.01%
-INFO:nv_ingest_client.cli.util.processing:store_embedding_minio: Avg: 0.32 ms, Median: 0.32 ms, Total Time: 0.32 ms, Total % of Trace Computation: 0.01%
-INFO:nv_ingest_client.cli.util.processing:store_embedding_minio_channel_in: Avg: 1.18 ms, Median: 1.18 ms, Total Time: 1.18 ms, Total % of Trace Computation: 0.03%
-INFO:nv_ingest_client.cli.util.processing:message_broker_task_sink_channel_in: Avg: 0.42 ms, Median: 0.42 ms, Total Time: 0.42 ms, Total % of Trace Computation: 0.01%
-INFO:nv_ingest_client.cli.util.processing:No unresolved time detected. Trace times account for the entire elapsed duration.
-INFO:nv_ingest_client.cli.util.processing:Processed 1 files in 2.34 seconds.
-INFO:nv_ingest_client.cli.util.processing:Total pages processed: 3
-INFO:nv_ingest_client.cli.util.processing:Throughput (Pages/sec): 1.28
-INFO:nv_ingest_client.cli.util.processing:Throughput (Files/sec): 0.43
+INFO:retriever_client.processing:message_broker_task_source: Avg: 2.39 ms, Median: 2.39 ms, Total Time: 2.39 ms, Total % of Trace Computation: 0.06%
+INFO:retriever_client.processing:broker_source_network_in: Avg: 9.51 ms, Median: 9.51 ms, Total Time: 9.51 ms, Total % of Trace Computation: 0.25%
+INFO:retriever_client.processing:job_counter: Avg: 1.47 ms, Median: 1.47 ms, Total Time: 1.47 ms, Total % of Trace Computation: 0.04%
+INFO:retriever_client.processing:job_counter_channel_in: Avg: 0.46 ms, Median: 0.46 ms, Total Time: 0.46 ms, Total % of Trace Computation: 0.01%
+INFO:retriever_client.processing:metadata_injection: Avg: 3.52 ms, Median: 3.52 ms, Total Time: 3.52 ms, Total % of Trace Computation: 0.09%
+INFO:retriever_client.processing:metadata_injection_channel_in: Avg: 0.16 ms, Median: 0.16 ms, Total Time: 0.16 ms, Total % of Trace Computation: 0.00%
+INFO:retriever_client.processing:pdf_content_extractor: Avg: 475.64 ms, Median: 163.77 ms, Total Time: 2378.21 ms, Total % of Trace Computation: 62.73%
+INFO:retriever_client.processing:pdf_content_extractor_channel_in: Avg: 0.31 ms, Median: 0.31 ms, Total Time: 0.31 ms, Total % of Trace Computation: 0.01%
+INFO:retriever_client.processing:image_content_extractor: Avg: 0.67 ms, Median: 0.67 ms, Total Time: 0.67 ms, Total % of Trace Computation: 0.02%
+INFO:retriever_client.processing:image_content_extractor_channel_in: Avg: 0.21 ms, Median: 0.21 ms, Total Time: 0.21 ms, Total % of Trace Computation: 0.01%
+INFO:retriever_client.processing:docx_content_extractor: Avg: 0.46 ms, Median: 0.46 ms, Total Time: 0.46 ms, Total % of Trace Computation: 0.01%
+INFO:retriever_client.processing:docx_content_extractor_channel_in: Avg: 0.20 ms, Median: 0.20 ms, Total Time: 0.20 ms, Total % of Trace Computation: 0.01%
+INFO:retriever_client.processing:pptx_content_extractor: Avg: 0.68 ms, Median: 0.68 ms, Total Time: 0.68 ms, Total % of Trace Computation: 0.02%
+INFO:retriever_client.processing:pptx_content_extractor_channel_in: Avg: 0.46 ms, Median: 0.46 ms, Total Time: 0.46 ms, Total % of Trace Computation: 0.01%
+INFO:retriever_client.processing:audio_data_extraction: Avg: 1.08 ms, Median: 1.08 ms, Total Time: 1.08 ms, Total % of Trace Computation: 0.03%
+INFO:retriever_client.processing:audio_data_extraction_channel_in: Avg: 0.20 ms, Median: 0.20 ms, Total Time: 0.20 ms, Total % of Trace Computation: 0.01%
+INFO:retriever_client.processing:dedup_images: Avg: 0.42 ms, Median: 0.42 ms, Total Time: 0.42 ms, Total % of Trace Computation: 0.01%
+INFO:retriever_client.processing:dedup_images_channel_in: Avg: 0.42 ms, Median: 0.42 ms, Total Time: 0.42 ms, Total % of Trace Computation: 0.01%
+INFO:retriever_client.processing:filter_images: Avg: 0.59 ms, Median: 0.59 ms, Total Time: 0.59 ms, Total % of Trace Computation: 0.02%
+INFO:retriever_client.processing:filter_images_channel_in: Avg: 0.57 ms, Median: 0.57 ms, Total Time: 0.57 ms, Total % of Trace Computation: 0.02%
+INFO:retriever_client.processing:table_data_extraction: Avg: 240.75 ms, Median: 240.75 ms, Total Time: 481.49 ms, Total % of Trace Computation: 12.70%
+INFO:retriever_client.processing:table_data_extraction_channel_in: Avg: 0.38 ms, Median: 0.38 ms, Total Time: 0.38 ms, Total % of Trace Computation: 0.01%
+INFO:retriever_client.processing:chart_data_extraction: Avg: 300.54 ms, Median: 299.94 ms, Total Time: 901.62 ms, Total % of Trace Computation: 23.78%
+INFO:retriever_client.processing:chart_data_extraction_channel_in: Avg: 0.23 ms, Median: 0.23 ms, Total Time: 0.23 ms, Total % of Trace Computation: 0.01%
+INFO:retriever_client.processing:infographic_data_extraction: Avg: 0.77 ms, Median: 0.77 ms, Total Time: 0.77 ms, Total % of Trace Computation: 0.02%
+INFO:retriever_client.processing:infographic_data_extraction_channel_in: Avg: 0.25 ms, Median: 0.25 ms, Total Time: 0.25 ms, Total % of Trace Computation: 0.01%
+INFO:retriever_client.processing:caption_ext: Avg: 0.55 ms, Median: 0.55 ms, Total Time: 0.55 ms, Total % of Trace Computation: 0.01%
+INFO:retriever_client.processing:caption_ext_channel_in: Avg: 0.51 ms, Median: 0.51 ms, Total Time: 0.51 ms, Total % of Trace Computation: 0.01%
+INFO:retriever_client.processing:embed_text: Avg: 1.21 ms, Median: 1.21 ms, Total Time: 1.21 ms, Total % of Trace Computation: 0.03%
+INFO:retriever_client.processing:embed_text_channel_in: Avg: 0.21 ms, Median: 0.21 ms, Total Time: 0.21 ms, Total % of Trace Computation: 0.01%
+INFO:retriever_client.processing:store_embedding_minio: Avg: 0.32 ms, Median: 0.32 ms, Total Time: 0.32 ms, Total % of Trace Computation: 0.01%
+INFO:retriever_client.processing:store_embedding_minio_channel_in: Avg: 1.18 ms, Median: 1.18 ms, Total Time: 1.18 ms, Total % of Trace Computation: 0.03%
+INFO:retriever_client.processing:message_broker_task_sink_channel_in: Avg: 0.42 ms, Median: 0.42 ms, Total Time: 0.42 ms, Total % of Trace Computation: 0.01%
+INFO:retriever_client.processing:No unresolved time detected. Trace times account for the entire elapsed duration.
+INFO:retriever_client.processing:Processed 1 files in 2.34 seconds.
+INFO:retriever_client.processing:Total pages processed: 3
+INFO:retriever_client.processing:Throughput (Pages/sec): 1.28
+INFO:retriever_client.processing:Throughput (Files/sec): 0.43
 ```
 
 ## Step 3: Inspecting and Consuming Results
@@ -349,7 +354,7 @@ multimodal_test.pdf.metadata.json
 
 For the full metadata definitions, refer to [Content Metadata](content-metadata.md). 
 
-We also provide a script for inspecting [extracted images](https://github.com/NVIDIA/nv-ingest/blob/main/src/util/image_viewer.py).
+We also provide a script for inspecting [extracted images](https://github.com/NVIDIA/NeMo-Retriever/blob/main/src/util/image_viewer.py).
 
 First, install `tkinter` by running the following code. Choose the code for your OS.
 
@@ -380,7 +385,7 @@ python src/util/image_viewer.py --file_path ./processed_docs/image/multimodal_te
 
 !!! tip
 
-    Beyond inspecting the results, you can read them into things like [llama-index](https://github.com/NVIDIA/nv-ingest/blob/main/examples/llama_index_multimodal_rag.ipynb) or [langchain](https://github.com/NVIDIA/nv-ingest/blob/main/examples/langchain_multimodal_rag.ipynb) retrieval pipelines. Also, checkout our [Enterprise RAG Blueprint on build.nvidia.com](https://build.nvidia.com/nvidia/multimodal-pdf-data-extraction-for-enterprise-rag) to query over document content pre-extracted with the retriever pipeline.
+    Beyond inspecting the results, you can read them into things like [llama-index](https://github.com/NVIDIA/NeMo-Retriever/blob/main/examples/llama_index_multimodal_rag.ipynb) or [langchain](https://github.com/NVIDIA/NeMo-Retriever/blob/main/examples/langchain_multimodal_rag.ipynb) retrieval pipelines. Also, checkout our [Enterprise RAG Blueprint on build.nvidia.com](https://build.nvidia.com/nvidia/multimodal-pdf-data-extraction-for-enterprise-rag) to query over document content pre-extracted with the retriever pipeline.
 
 
 
@@ -394,7 +399,7 @@ You can specify multiple `--profile` options.
 | `retrieval`           | Core     | Enables the embedding NIM and (optional) GPU-accelerated Milvus. Omit this profile to use the default LanceDB backend.           | 
 | `audio`               | Advanced | Use the [parakeet-1-1b-ctc-en-us](https://docs.nvidia.com/nim/speech/latest/asr/deploy-asr-models/parakeet-ctc-en-us.html) ASR NIM (`nvcr.io/nim/nvidia/parakeet-1-1b-ctc-en-us`) for processing audio files. For more information, refer to [Audio Processing](audio.md). | 
 | `nemotron-parse`      | Advanced | Use [nemotron-parse](https://build.nvidia.com/nvidia/nemotron-parse), which adds state-of-the-art text and table extraction. For more information, refer to [Advanced Visual Parsing](nemoretriever-parse.md). | 
-| `vlm`                 | Advanced | Use [Nemotron Nano 12B v2 VL](https://build.nvidia.com/nvidia/nemotron-nano-12b-v2-vl/modelcard) for image captioning of unstructured images and infographics. This profile enables the `caption` method in the Python API to generate text descriptions of visual content. For more information, refer to [Use Multimodal Embedding](vlm-embed.md) and [Extract Captions from Images](nv-ingest-python-api.md#extract-captions-from-images). | 
+| `vlm`                 | Advanced | Use [Nemotron Nano 12B v2 VL](https://build.nvidia.com/nvidia/nemotron-nano-12b-v2-vl/modelcard) for image captioning of unstructured images and infographics. This profile enables the `caption` method in the Python API to generate text descriptions of visual content. For more information, refer to [Use Multimodal Embedding](vlm-embed.md) and [Extract Captions from Images](python-api-reference.md#extract-captions-from-images). | 
 
 ### Example: Using the VLM Profile for Infographic Captioning
 
@@ -421,7 +426,7 @@ Ensure the same image tags and `docker-compose.yaml` version are used in both en
 
 ## Docker Compose override files
 
-The default [docker-compose.yaml](https://github.com/NVIDIA/nv-ingest/blob/main/docker-compose.yaml) might exceed VRAM on a single GPU for some hardware. Override files reduce per-service memory, batch sizes, or concurrency so the full pipeline can run on the available GPU. To use an override, pass a second `-f` file after the base compose file; Docker Compose merges them and the override takes precedence.
+The default [docker-compose.yaml](https://github.com/NVIDIA/NeMo-Retriever/blob/main/docker-compose.yaml) might exceed VRAM on a single GPU for some hardware. Override files reduce per-service memory, batch sizes, or concurrency so the full pipeline can run on the available GPU. To use an override, pass a second `-f` file after the base compose file; Docker Compose merges them and the override takes precedence.
 
 | Override file | GPU target |
 |---------------|------------|
@@ -475,11 +480,11 @@ docker compose \
 
 ## Specify MIG slices for NIM models
 
-When you deploy the pipeline with NIM models on MIG‑enabled GPUs, MIG device slices are requested and scheduled through the `values.yaml` file for the corresponding NIM microservice. For IBM Content-Aware Storage (CAS) deployments, this allows NIM pods to land only on nodes that expose the desired MIG profiles [raw.githubusercontent](https://raw.githubusercontent.com/NVIDIA/nv-ingest/main/helm/README.md).
+When you deploy the pipeline with NIM models on MIG‑enabled GPUs, MIG device slices are requested and scheduled through the `values.yaml` file for the corresponding NIM microservice. For IBM Content-Aware Storage (CAS) deployments, this allows NIM pods to land only on nodes that expose the desired MIG profiles [raw.githubusercontent](https://raw.githubusercontent.com/NVIDIA/NeMo-Retriever/main/helm/README.md).
 
 To target a specific MIG profile—for example, a 3g.20gb slice on an A100, which is a hardware-partitioned virtual GPU instance that gives your workload a fixed mid-sized share of the A100’s compute plus 20 GB of dedicated GPU memory and behaves like a smaller independent GPU—for a given NIM, configure the `resources` and `nodeSelector` under that NIM’s values path in `values.yaml`.
 
-The following example shows the pattern. Paths vary by NIM, such as `nvingest.nvidiaNim.nemoretrieverPageElements` instead of the generic `nvingest.nim` placeholder. For details refer to [catalog.ngc.nvidia](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo-microservices/helm-charts/nv-ingest).
+The following example shows the pattern. Paths vary by NIM, such as `nvingest.nvidiaNim.nemoretrieverPageElements` instead of the generic `nvingest.nim` placeholder. For details refer to the [Helm chart README](https://github.com/NVIDIA/NeMo-Retriever/blob/main/helm/README.md) and the chart listing on [NGC](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo-microservices/containers).
 Set `resources.requests` and `resources.limits` to the name of the MIG resource that you want (for example, `nvidia.com/mig-3g.20gb`).
 ```shell
 nvingest:
@@ -495,10 +500,11 @@ nvingest:
         nvidia.com/gpu.product: A100-SXM4-40GB-MIG-3g.20gb
 ```
 Key points:
-* Use the appropriate NIM‑specific values path (for example, `nvingest.nvidiaNim.nemoretrieverPageElements.resources`) rather than the generic `nvingest.nim` placeholder.
-* Set `resources.requests` and `resources.limits` to the desired MIG resource name (for example, `nvidia.com/mig-3g.20gb`).
-* Use `nodeSelector` (or tolerations/affinity, if you prefer) to target nodes labeled with the corresponding MIG‑enabled GPU product (for example, `nvidia.com/gpu.product: A100-SXM4-40GB-MIG-3g.20gb`).
-This syntax and structure can be repeated for each NIM model used by CAS, ensuring that each NV-Ingest NIM pod is mapped to the correct MIG slice type and scheduled onto compatible nodes.
+
+ * Use the appropriate NIM‑specific values path (for example, `nvingest.nvidiaNim.nemoretrieverPageElements.resources`) rather than the generic `nvingest.nim` placeholder.
+ * Set `resources.requests` and `resources.limits` to the desired MIG resource name (for example, `nvidia.com/mig-3g.20gb`).
+ * Use `nodeSelector` (or tolerations/affinity, if you prefer) to target nodes labeled with the corresponding MIG‑enabled GPU product (for example, `nvidia.com/gpu.product: A100-SXM4-40GB-MIG-3g.20gb`).
+This syntax and structure can be repeated for each NIM model used by CAS, ensuring that each NeMo Retriever Library NIM pod is mapped to the correct MIG slice type and scheduled onto compatible nodes.
 
 !!! important
 
diff --git a/docs/docs/extraction/quickstart-library-mode.md b/docs/docs/extraction/quickstart-library-mode.md
index d86a1b9d0..ce932e4a1 100644
--- a/docs/docs/extraction/quickstart-library-mode.md
+++ b/docs/docs/extraction/quickstart-library-mode.md
@@ -2,9 +2,9 @@
 
 !!! note
 
-    NVIDIA Ingest (nv-ingest) has been renamed NeMo Retriever Library.
+    This documentation describes NeMo Retriever Library.
 
-Use the [Quick Start for NeMo Retriever Library](https://github.com/NVIDIA/NeMo-Retriever/blob/26.03/nemo_retriever/README.md) to set up and run the NeMo Retriever Library locally, so you can build a GPU‑accelerated, multimodal RAG ingestion pipeline that parses PDFs, HTML, text, audio, and video into LanceDB vector embeddings, integrates with Nemotron RAG models (locally or via NIM endpoints), which includes Ray‑based scaling with built‑in recall evaluation. Python 3.12 or later is required (see [Prerequisites](prerequisites.md)).
+Use the [Quick Start for NeMo Retriever Library](https://github.com/NVIDIA/NeMo-Retriever/blob/26.03/nemo_retriever/README.md) to set up and run the NeMo Retriever Library locally, so you can build a GPU‑accelerated, multimodal RAG ingestion pipeline that parses PDFs, HTML, text, audio, and video into LanceDB vector embeddings, integrates with Nemotron RAG models (locally or via NIM endpoints), which includes Ray‑based scaling with built‑in recall evaluation. Python 3.12 or later is required (refer to [Prerequisites](prerequisites.md)).
 
 ## `run_pipeline`
 
@@ -21,7 +21,7 @@ The following table matches the function signature in source (defaults and optio
 | Parameter | Required | Type (default) | Description |
 |-----------|----------|----------------|-------------|
 | `pipeline_config` | No | `Optional[PipelineConfigSchema]` (`None`) | Validated pipeline configuration. If `None` and `libmode=True`, the default library-mode pipeline is loaded automatically. If `None` and `libmode=False`, a `ValueError` is raised—you must pass a configuration. |
-| `block` | No | `bool` (`True`) | If `True`, the call blocks until the pipeline finishes. If `False`, returns immediately with a handle object (see [Return type](#return-type)). |
+| `block` | No | `bool` (`True`) | If `True`, the call blocks until the pipeline finishes. If `False`, returns immediately with a handle object (refer to [Return type](#return-type)). |
 | `disable_dynamic_scaling` | No | `Optional[bool]` (`None`) | If set, overrides the same field from the pipeline configuration. |
 | `dynamic_memory_threshold` | No | `Optional[float]` (`None`) | If set, overrides the same field from the pipeline configuration. |
 | `run_in_subprocess` | No | `bool` (`False`) | If `True`, runs the pipeline in a separate Python subprocess (`multiprocessing.Process`). If `False`, runs in the current process. |
diff --git a/docs/docs/extraction/ray-logging.md b/docs/docs/extraction/ray-logging.md
index 524674bae..9e4add405 100644
--- a/docs/docs/extraction/ray-logging.md
+++ b/docs/docs/extraction/ray-logging.md
@@ -1,5 +1,10 @@
 # Configure Ray Logging
 
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
 [NeMo Retriever Library](overview.md) uses [Ray](https://docs.ray.io/en/latest/index.html) for logging. 
 You can use environment variables for fine-grained control over [Ray's logging behavior](https://docs.ray.io/en/latest/ray-observability/user-guides/configure-logging.html). 
 In addition, NeMo Retriever Library provides preset configurations that you can use to quickly update Ray logging behavior.
@@ -84,7 +89,7 @@ This log level uses the following settings:
 - **Import Warnings** – Enabled
 - **Usage Stats** – Enabled
 - **Storage** – 20GB total (512MB × 40 files)
-- **Deduplication** – Disabled (see all duplicate messages)
+- **Deduplication** – Disabled (refer to all duplicate messages)
 - **Encoding** – JSON with function names and line numbers
 
 
diff --git a/docs/docs/extraction/releasenotes-nv-ingest.md b/docs/docs/extraction/releasenotes.md
similarity index 86%
rename from docs/docs/extraction/releasenotes-nv-ingest.md
rename to docs/docs/extraction/releasenotes.md
index d89a3148e..82275fd27 100644
--- a/docs/docs/extraction/releasenotes-nv-ingest.md
+++ b/docs/docs/extraction/releasenotes.md
@@ -4,7 +4,7 @@ This documentation contains the release notes for [NeMo Retriever Library](overv
 
 !!! note
 
-    NVIDIA Ingest (nv-ingest) has been renamed NeMo Retriever Library.   
+    This documentation describes NeMo Retriever Library.   
 
 ## 26.03 Release Notes (26.3.0)
 
@@ -14,7 +14,7 @@ To upgrade the Helm charts for this release, refer to the [NeMo Retriever Librar
 
 Highlights for the 26.03 release include:
 
-- NV-Ingest GitHub repo renamed to NeMo-Retriever  
+- Legacy ingestion repository consolidated under NeMo-Retriever  
 - NeMo Retriever Extraction pipeline renamed to NeMo Retriever Library  
 - NeMo Retriever Library now supports two deployment options:  
   - A new no-container, pip-installable in-process library for development (available on PyPI)  
@@ -35,15 +35,15 @@ Highlights for the 26.03 release include:
 
 ## Release Notes for Previous Versions
 
-| [26.1.2](https://docs.nvidia.com/nemo/retriever/26.1.2/extraction/releasenotes-nv-ingest/)
-| [26.1.1](https://docs.nvidia.com/nemo/retriever/26.1.1/extraction/releasenotes-nv-ingest/)
-| [25.9.0](https://docs.nvidia.com/nemo/retriever/25.9.0/extraction/releasenotes-nv-ingest/) 
-| [25.6.3](https://docs.nvidia.com/nemo/retriever/25.6.3/extraction/releasenotes-nv-ingest/) 
-| [25.6.2](https://docs.nvidia.com/nemo/retriever/25.6.2/extraction/releasenotes-nv-ingest/) 
-| [25.4.2](https://docs.nvidia.com/nemo/retriever/25.4.2/extraction/releasenotes-nv-ingest/) 
-| [25.3.0](https://docs.nvidia.com/nemo/retriever/25.3.0/extraction/releasenotes-nv-ingest/) 
-| [24.12.1](https://docs.nvidia.com/nemo/retriever/25.3.0/extraction/releasenotes-nv-ingest/#release-24121) 
-| [24.12.0](https://docs.nvidia.com/nemo/retriever/25.3.0/extraction/releasenotes-nv-ingest/#release-2412) 
+| [26.1.2](https://docs.nvidia.com/nemo/retriever/26.1.2/extraction/releasenotes/)
+| [26.1.1](https://docs.nvidia.com/nemo/retriever/26.1.1/extraction/releasenotes/)
+| [25.9.0](https://docs.nvidia.com/nemo/retriever/25.9.0/extraction/releasenotes/) 
+| [25.6.3](https://docs.nvidia.com/nemo/retriever/25.6.3/extraction/releasenotes/) 
+| [25.6.2](https://docs.nvidia.com/nemo/retriever/25.6.2/extraction/releasenotes/) 
+| [25.4.2](https://docs.nvidia.com/nemo/retriever/25.4.2/extraction/releasenotes/) 
+| [25.3.0](https://docs.nvidia.com/nemo/retriever/25.3.0/extraction/releasenotes/) 
+| [24.12.1](https://docs.nvidia.com/nemo/retriever/25.3.0/extraction/releasenotes/#release-24121) 
+| [24.12.0](https://docs.nvidia.com/nemo/retriever/25.3.0/extraction/releasenotes/#release-2412) 
 
 ## Related Topics
 
diff --git a/docs/docs/extraction/reranking.md b/docs/docs/extraction/reranking.md
new file mode 100644
index 000000000..65badf87b
--- /dev/null
+++ b/docs/docs/extraction/reranking.md
@@ -0,0 +1,23 @@
+# Reranking
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+A **reranker** model re-scores the top candidates from initial retrieval so the final top-K better matches the query. NeMo Retriever Library evaluation and deployments can integrate reranker NIMs (refer to the [Support matrix](support-matrix.md) for availability and GPU considerations).
+
+**When to enable reranking**
+
+Enable reranking when both of the following apply:
+
+- Recall at K from vector or hybrid search is acceptable, but ordering of the top results still needs improvement.
+- Benchmarking shows measurable gains for your domain (refer to reranker modes in [Benchmarking](benchmarking.md)).
+
+**Configuration pointers**
+
+Reranker options appear in benchmarking and recall configurations. Start with [Benchmarking](benchmarking.md) and your vector store setup in [Vector databases](data-store.md).
+
+**Related**
+
+- [Workflow: Query and rerank](workflow-query-rerank.md)
diff --git a/docs/docs/extraction/resources-links.md b/docs/docs/extraction/resources-links.md
new file mode 100644
index 000000000..7fc8a80a0
--- /dev/null
+++ b/docs/docs/extraction/resources-links.md
@@ -0,0 +1,18 @@
+# Resources
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+External pages for product context, catalogs, and solution blueprints.
+
+## Product and catalog
+
+- [NeMo Retriever — NVIDIA product information](https://www.nvidia.com/en-us/ai-data-science/products/nemo/) (landing context within the NeMo suite)
+- [NVIDIA NIM catalog](https://build.nvidia.com/) — Browse NIM microservices relevant to ingestion, embedding, and reranking
+- [Enterprise RAG — multimodal PDF data extraction blueprint](https://build.nvidia.com/nvidia/multimodal-pdf-data-extraction-for-enterprise-rag)
+
+## Open source
+
+- [OSS licences](../license.md) (optional reference for bundled or linked components)
diff --git a/docs/docs/extraction/scaling-modes.md b/docs/docs/extraction/scaling-modes.md
index 8fc4684a1..adfba617e 100644
--- a/docs/docs/extraction/scaling-modes.md
+++ b/docs/docs/extraction/scaling-modes.md
@@ -7,7 +7,7 @@ This guide covers how resource scaling modes work across stages in [NeMo Retriev
 
 !!! note
 
-    NVIDIA Ingest (nv-ingest) has been renamed NeMo Retriever Library.
+    This documentation describes NeMo Retriever Library.
 
 
 
@@ -18,7 +18,7 @@ This guide covers how resource scaling modes work across stages in [NeMo Retriev
 
 ## Configure (docker-compose)
 
-Edit `services > nv-ingest-ms-runtime > environment` in `docker-compose.yaml`.
+Edit the **ingestion runtime** service’s `environment` in `docker-compose.yaml` (the service that exposes the NeMo Retriever Library API; in the reference compose file this is the `*-ms-runtime` service for ingestion).
 
 ### Select mode
 
@@ -35,7 +35,7 @@ Example (Static):
 
 ```yaml
 services:
-  nv-ingest-ms-runtime:
+  ingestion-ms-runtime:
     environment:
       - INGEST_DISABLE_DYNAMIC_SCALING=true
       - INGEST_STATIC_MEMORY_THRESHOLD=0.85
@@ -45,7 +45,7 @@ Example (Dynamic):
 
 ```yaml
 services:
-  nv-ingest-ms-runtime:
+  ingestion-ms-runtime:
     environment:
       - INGEST_DISABLE_DYNAMIC_SCALING=false
       - INGEST_DYNAMIC_MEMORY_THRESHOLD=0.80
@@ -91,7 +91,7 @@ services:
 
 Open `docker-compose.yaml` and locate:
 
-- `services > nv-ingest-ms-runtime > environment`:
+- `services > <ingestion-runtime> > environment` (use the runtime service name from your compose file):
   - `INGEST_DISABLE_DYNAMIC_SCALING`
   - `INGEST_DYNAMIC_MEMORY_THRESHOLD`
   - `INGEST_STATIC_MEMORY_THRESHOLD`
@@ -102,4 +102,4 @@ Open `docker-compose.yaml` and locate:
 
 - [Prerequisites](prerequisites.md)
 - [Support Matrix](support-matrix.md)
-- [Troubleshooting](troubleshooting.md)
+- [Troubleshooting](troubleshoot.md)
diff --git a/docs/docs/extraction/self-host-nims-when-to-use.md b/docs/docs/extraction/self-host-nims-when-to-use.md
new file mode 100644
index 000000000..452f32d49
--- /dev/null
+++ b/docs/docs/extraction/self-host-nims-when-to-use.md
@@ -0,0 +1,22 @@
+# When to self-host NIMs
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+Self-hosted NIMs run on your GPUs or air-gapped hardware, typically with Kubernetes and the [NIM Operator](https://docs.nvidia.com/nim-operator/latest/index.html).
+
+Consider self-hosting when:
+
+- You need an air gap, strict data residency, or customer data must not leave your network.
+- You run at large scale where dedicated capacity can cost less than hosted API usage.
+- You must meet latency or locality requirements that hosted regions cannot satisfy.
+
+**GPU sharing.** The NIM Operator supports time-slicing and MIG so multiple NIM workloads can share GPUs. A NIM used with NeMo Retriever Library does not always need a full dedicated GPU when the operator and GPU profile are set correctly. For scheduling and GPU partitioning, refer to the [NIM Operator documentation](https://docs.nvidia.com/nim-operator/latest/index.html).
+
+**Related**
+
+- [Deploy (Helm Chart)](helm.md)
+- [Support matrix](support-matrix.md)
+- [Compare deployment options](choose-your-path.md)
diff --git a/docs/docs/extraction/semantic-hybrid-retrieval.md b/docs/docs/extraction/semantic-hybrid-retrieval.md
new file mode 100644
index 000000000..eae94123a
--- /dev/null
+++ b/docs/docs/extraction/semantic-hybrid-retrieval.md
@@ -0,0 +1,19 @@
+# Semantic and hybrid retrieval
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+**Semantic retrieval** uses dense embeddings to find content that is similar in meaning to a query. **Hybrid retrieval** combines dense vectors with sparse or lexical signals (for example, BM25-style full-text) and fuses ranked lists for better recall on keyword-heavy queries.
+
+In NeMo Retriever Library, use these resources:
+
+- [Concepts](concepts.md) for pipeline and search patterns
+- [Vector databases](data-store.md) for LanceDB hybrid mode (dense, BM25, and RRF) and Milvus dense or sparse patterns
+- [Environment variables](environment-config.md) for hybrid-related flags where documented
+- [Custom metadata and filtering](custom-metadata.md) for filtering
+
+**Evaluation**
+
+For harnesses and metrics, refer to [Benchmarking](benchmarking.md).
diff --git a/docs/docs/extraction/support-matrix.md b/docs/docs/extraction/support-matrix.md
index b109be691..22ef162b9 100644
--- a/docs/docs/extraction/support-matrix.md
+++ b/docs/docs/extraction/support-matrix.md
@@ -4,7 +4,7 @@ Before you begin using [NeMo Retriever Library](overview.md), ensure that you ha
 
 !!! note
 
-    NVIDIA Ingest (nv-ingest) has been renamed NeMo Retriever Library.
+    This documentation describes NeMo Retriever Library.
 
 
 ## Core and Advanced Pipeline Features
@@ -28,7 +28,7 @@ This includes the following:
     
     !!! note
     
-        While nemotron-nano-12b-v2-vl is the default VLM, you can configure and use other vision language models for image captioning based on your specific use case requirements. For more information, refer to [Extract Captions from Images](nv-ingest-python-api.md#extract-captions-from-images).
+        While nemotron-nano-12b-v2-vl is the default VLM, you can configure and use other vision language models for image captioning based on your specific use case requirements. For more information, refer to [Extract Captions from Images](python-api-reference.md#extract-captions-from-images).
 
 - Reranker — Use [llama-3.2-nv-rerankqa-1b-v2](https://build.nvidia.com/nvidia/llama-3.2-nv-rerankqa-1b-v2) for improved retrieval accuracy.
 
@@ -77,6 +77,6 @@ and run only the embedder, reranker, and your vector database.
 ## Related Topics
 
 - [Prerequisites](prerequisites.md)
-- [Release Notes](releasenotes-nv-ingest.md)
+- [Release Notes](releasenotes.md)
 - [NVIDIA NIM for Vision Language Models Support Matrix](https://docs.nvidia.com/nim/vision-language-models/latest/support-matrix.html)
 - [NVIDIA Speech NIM Microservices](https://docs.nvidia.com/nim/speech/latest/reference/support-matrix/index.html)
diff --git a/docs/docs/extraction/supported-file-types.md b/docs/docs/extraction/supported-file-types.md
new file mode 100644
index 000000000..3a9dde0f8
--- /dev/null
+++ b/docs/docs/extraction/supported-file-types.md
@@ -0,0 +1,14 @@
+# Supported file types and formats
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+NeMo Retriever Library accepts multiple document and media types. A current list (including PDF, Office formats, HTML, images, audio, and video, some early access) appears in [What is NeMo Retriever Library?](overview.md) under supported file types.
+
+**Related**
+
+- [Troubleshoot](troubleshoot.md) for format-specific issues
+- [Text and layout extraction](text-layout-extraction.md)
+- [Speech and audio](audio.md)
diff --git a/docs/docs/extraction/telemetry.md b/docs/docs/extraction/telemetry.md
index b42c3b6aa..8e2f75063 100644
--- a/docs/docs/extraction/telemetry.md
+++ b/docs/docs/extraction/telemetry.md
@@ -4,7 +4,7 @@ You can view telemetry data for [NeMo Retriever Library](overview.md).
 
 !!! note
 
-    NVIDIA Ingest (nv-ingest) has been renamed NeMo Retriever Library.
+    This documentation describes NeMo Retriever Library.
 
 
 ## OpenTelemetry
diff --git a/docs/docs/extraction/text-layout-extraction.md b/docs/docs/extraction/text-layout-extraction.md
new file mode 100644
index 000000000..173b46521
--- /dev/null
+++ b/docs/docs/extraction/text-layout-extraction.md
@@ -0,0 +1,14 @@
+# Text and layout extraction
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+For PDFs, NeMo Retriever Library typically uses **pdfium**-based extraction with configurable depth and paths. Scanned or mixed pages may use hybrid or OCR-oriented methods. For `extract_method` options such as `pdfium`, `pdfium_hybrid`, and `ocr`, refer to the [Python API reference](python-api-reference.md).
+
+**Related**
+
+- [What is NeMo Retriever Library?](overview.md)
+- [OCR and scanned documents](extraction-ocr-scanned.md)
+- [Chunking and splitting](chunking.md)
diff --git a/docs/docs/extraction/throughput-is-dataset-dependent.md b/docs/docs/extraction/throughput-is-dataset-dependent.md
index 399d116c8..dc3349e92 100644
--- a/docs/docs/extraction/throughput-is-dataset-dependent.md
+++ b/docs/docs/extraction/throughput-is-dataset-dependent.md
@@ -1,5 +1,10 @@
 # Why Throughput Is Dataset-Dependent
 
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
 A single headline metric can drastically misrepresent system efficiency. 
 The amount of compute that you need to process a dataset depends far more on its content and how your pipeline operates than on its disk size. 
 This documentation explains why, and offers you better ways to measure and report throughput.
diff --git a/docs/docs/extraction/troubleshoot.md b/docs/docs/extraction/troubleshoot.md
index 6edc3c0a2..9d7835732 100644
--- a/docs/docs/extraction/troubleshoot.md
+++ b/docs/docs/extraction/troubleshoot.md
@@ -4,7 +4,7 @@ Use this documentation to troubleshoot issues that arise when you use [NeMo Retr
 
 !!! note
 
-    NVIDIA Ingest (nv-ingest) has been renamed NeMo Retriever Library.
+    This documentation describes NeMo Retriever Library.
 
 
 ## Can't process long, non-language text strings
@@ -52,7 +52,7 @@ This happens because, by default, NeMo Retriever Library stores the results from
 If the total size of the results exceeds the available memory, the process fails.
 
 To resolve this issue, use the `save_to_disk` method. 
-For details, refer to [Working with Large Datasets: Saving to Disk](nv-ingest-python-api.md#work-with-large-datasets-save-to-disk).
+For details, refer to [Working with Large Datasets: Saving to Disk](python-api-reference.md#work-with-large-datasets-save-to-disk).
 
 
 
diff --git a/docs/docs/extraction/user-defined-functions.md b/docs/docs/extraction/user-defined-functions.md
index aef2c09c3..fb6e5061a 100644
--- a/docs/docs/extraction/user-defined-functions.md
+++ b/docs/docs/extraction/user-defined-functions.md
@@ -5,7 +5,7 @@ This guide covers how to write, validate, and submit UDFs using both the CLI and
 
 !!! note
 
-    NVIDIA Ingest (nv-ingest) has been renamed NeMo Retriever Library. 
+    This documentation describes NeMo Retriever Library. 
 
 
 ## Quickstart
@@ -40,7 +40,7 @@ The CLI supports all UDF function specification formats. Here are examples of ea
 #### Inline Function String
 ```bash
 # Submit inline UDF function
-nv-ingest-cli \
+nemo-retriever \
     --doc /path/to/document.pdf \
     --output-directory ./output \
     --task 'udf:{"udf_function": "def my_processor(control_message): print(\"Processing...\"); return control_message", "udf_function_name": "my_processor", "target_stage": "text_embedder", "run_before": true}'
@@ -49,7 +49,7 @@ nv-ingest-cli \
 #### Module Path with Colon (Recommended)
 ```bash
 # Submit UDF from importable module (preserves all imports and context)
-nv-ingest-cli \
+nemo-retriever \
     --doc /path/to/document.pdf \
     --output-directory ./output \
     --task 'udf:{"udf_function": "my_package.processors:enhance_metadata", "target_stage": "text_embedder", "run_after": true}'
@@ -58,7 +58,7 @@ nv-ingest-cli \
 #### File Path
 ```bash
 # Submit UDF from file path
-nv-ingest-cli \
+nemo-retriever \
     --doc /path/to/document.pdf \
     --output-directory ./output \
     --task 'udf:{"udf_function": "my_file.py:my_custom_processor", "target_stage": "text_embedder", "run_before": true}'
@@ -67,7 +67,7 @@ nv-ingest-cli \
 #### Legacy Import Path (Limited)
 ```bash
 # Submit UDF using legacy dot notation (function only, no imports)
-nv-ingest-cli \
+nemo-retriever \
     --doc /path/to/document.pdf \
     --output-directory ./output \
     --task 'udf:{"udf_function": "my_package.processors.basic_processor", "target_stage": "text_embedder", "run_after": true}'
@@ -151,7 +151,7 @@ The DataFrame payload contains the extracted content and metadata for processing
 | `source_type` | `str` | Source type identifier |
 | `source_file` | `str` | Path or identifier of the source file |
 | `id` | `str` | Unique identifier for this content piece |
-| `metadata` | `dict` | Rich metadata structure (see below) |
+| `metadata` | `dict` | Rich metadata structure (refer to below) |
 | `content` | `str` | The actual extracted content |
 
 #### Example DataFrame Access
@@ -264,7 +264,7 @@ def enhance_metadata(control_message: IngestControlMessage) -> IngestControlMess
     return control_message
 ```
 
-> **📖 For detailed metadata schema documentation, see:** [metadata_documentation.md](metadata_documentation.md)
+> **📖 For detailed metadata schema documentation, refer to:** [Content metadata reference](content-metadata.md) and [Metadata and content schema](multimodal-metadata-schema.md).
 
 ### UDF Targeting
 
@@ -304,15 +304,15 @@ UDFs can be executed at different stages of the pipeline by specifying the `targ
 - `broker_response` - Response message handling
 - `otel_tracer` - OpenTelemetry tracing
 
-> **Note:** For the complete and up-to-date list of pipeline stages, see the [default_pipeline.yaml](../../../config/default_pipeline.yaml) configuration file.
+> **Note:** For the complete and up-to-date list of pipeline stages, refer to the [`default_pipeline.yaml`](https://github.com/NVIDIA/NeMo-Retriever/blob/main/config/default_pipeline.yaml) configuration file in the NeMo Retriever repository.
 
 #### Target Stage Selection Examples
 
 ```bash
 # CLI examples for different target stages
-nv-ingest-cli --doc file.pdf --task 'udf:{"udf_function": "processor.py:validate_input", "target_stage": "pdf_extractor", "run_before": true}'
-nv-ingest-cli --doc file.pdf --task 'udf:{"udf_function": "processor.py:extract_custom", "target_stage": "text_embedder", "run_after": true}'
-nv-ingest-cli --doc file.pdf --task 'udf:{"udf_function": "processor.py:enhance_output", "target_stage": "embedding_storage", "run_before": true}'
+nemo-retriever --doc file.pdf --task 'udf:{"udf_function": "processor.py:validate_input", "target_stage": "pdf_extractor", "run_before": true}'
+nemo-retriever --doc file.pdf --task 'udf:{"udf_function": "processor.py:extract_custom", "target_stage": "text_embedder", "run_after": true}'
+nemo-retriever --doc file.pdf --task 'udf:{"udf_function": "processor.py:enhance_output", "target_stage": "embedding_storage", "run_before": true}'
 ```
 
 ```python
@@ -537,7 +537,7 @@ For detailed guidance on creating custom NIM integrations, including:
 - Error handling and debugging
 - Performance best practices
 
-See the comprehensive [**NimClient Usage Guide**](nimclient_usage.md).
+Refer to [**NimClient and custom NIM endpoints**](nimclient.md).
 
 ### Error Handling
 
@@ -940,6 +940,6 @@ def debug_udf(control_message: IngestControlMessage) -> IngestControlMessage:
 
 ## Related Topics
 
-- [NV-Ingest UDF Examples](https://github.com/NVIDIA/nv-ingest/blob/release/26.1.2/examples/udfs/README.md)
+- [NeMo Retriever UDF examples](https://github.com/NVIDIA/NeMo-Retriever/blob/release/26.1.2/examples/udfs/README.md)
 - [User-Defined Stages for NeMo Retriever Library](user-defined-stages.md)
 - [NimClient Usage](nimclient.md)
diff --git a/docs/docs/extraction/user-defined-stages.md b/docs/docs/extraction/user-defined-stages.md
index 247a27eb0..4b041f5a3 100644
--- a/docs/docs/extraction/user-defined-stages.md
+++ b/docs/docs/extraction/user-defined-stages.md
@@ -8,7 +8,7 @@ and operate on a well-defined DataFrame payload and metadata structure.
 
 !!! note
 
-    NVIDIA Ingest (nv-ingest) has been renamed NeMo Retriever Library.
+    This documentation describes NeMo Retriever Library.
 
 
 To add user-defined stages to your pipeline, you need the following:
diff --git a/docs/docs/extraction/v2-api-guide.md b/docs/docs/extraction/v2-api-guide.md
index a51152f41..3b7b25643 100644
--- a/docs/docs/extraction/v2-api-guide.md
+++ b/docs/docs/extraction/v2-api-guide.md
@@ -1,5 +1,10 @@
 # NeMo Retriever Library V2 API Guide: PDF Pre Splitting
 
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
 > **TL;DR:** V2 API automatically splits large PDFs into chunks for faster parallel processing.
 > 
 > **Python:** Enable with `message_client_kwargs={"api_version": "v2"}` and configure chunk size with `.pdf_split_config(pages_per_chunk=64)`.
@@ -51,7 +56,7 @@ print(f"Processed {results[0]['metadata']['total_pages']} pages")
 ### CLI Usage
 
 ```bash
-nv-ingest-cli \
+nemo-retriever \
   --api_version v2 \
   --pdf_split_page_count 64 \
   --doc large_document.pdf \
@@ -115,7 +120,7 @@ PDF_SPLIT_PAGE_COUNT=64
 ```yaml
 # docker-compose.yaml (already configured)
 services:
-  nv-ingest-ms-runtime:
+  ingestion-ms-runtime:
     environment:
       - PDF_SPLIT_PAGE_COUNT=${PDF_SPLIT_PAGE_COUNT:-32}
 ```
diff --git a/docs/docs/extraction/vector-db-partners.md b/docs/docs/extraction/vector-db-partners.md
new file mode 100644
index 000000000..1b7899fa8
--- /dev/null
+++ b/docs/docs/extraction/vector-db-partners.md
@@ -0,0 +1,13 @@
+# Vector database partners
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+NeMo Retriever Library integrates with vector databases used for RAG collections. Documentation here focuses on stores used in the library and harnesses, such as LanceDB and Milvus, and cuVS where it applies. Refer to [Vector databases](data-store.md) and [Chunking and splitting](chunking.md).
+
+**Related**
+
+- [Embedding NIMs and models](embedding-nims-models.md)
+- [NVIDIA NIM catalog](https://build.nvidia.com/) for embedding and retrieval-related NIMs
diff --git a/docs/docs/extraction/vlm-embed.md b/docs/docs/extraction/vlm-embed.md
index 03d089e8c..941b727ad 100644
--- a/docs/docs/extraction/vlm-embed.md
+++ b/docs/docs/extraction/vlm-embed.md
@@ -10,7 +10,7 @@ The model supports images that contain text, tables, charts, and infographics.
 
 !!! note
 
-    NVIDIA Ingest (nv-ingest) has been renamed NeMo Retriever Library.
+    This documentation describes NeMo Retriever Library.
 
 
 ## Configure and Run the Multimodal NIM
@@ -122,5 +122,5 @@ results = ingestor.ingest()
 
 - [Support Matrix](support-matrix.md)
 - [Troubleshoot Nemo Retriever Extraction](troubleshoot.md)
-- [Use the Python API](nv-ingest-python-api.md)
-- [Extract Captions from Images](nv-ingest-python-api.md#extract-captions-from-images)
+- [Use the Python API](python-api-reference.md)
+- [Extract Captions from Images](python-api-reference.md#extract-captions-from-images)
diff --git a/docs/docs/extraction/workflow-agentic-retrieval.md b/docs/docs/extraction/workflow-agentic-retrieval.md
new file mode 100644
index 000000000..b7aa51021
--- /dev/null
+++ b/docs/docs/extraction/workflow-agentic-retrieval.md
@@ -0,0 +1,19 @@
+# Workflow: Agentic retrieval
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+**Agentic retrieval** describes patterns where a planner or tool-using agent queries retrieval systems in a loop (often combining multiple searches, filters, and rerankers) instead of sending a single static query.
+
+NeMo Retriever Library provides ingestion, embedding, storage, and retrieval building blocks (jobs, chunking, vector stores, hybrid search, reranking) that you orchestrate in application code or frameworks.
+
+**Where to go next**
+
+Use these pages together with your orchestration layer:
+
+- [Semantic and hybrid retrieval](semantic-hybrid-retrieval.md), [Custom metadata and filtering](custom-metadata.md), and [Reranking](reranking.md)
+- [Agentic retrieval (concept)](agentic-retrieval-concept.md)
+- [Benchmarking](benchmarking.md), which includes retrieval evaluation harnesses
+- [Release notes](releasenotes.md), which may mention agentic retrieval updates
diff --git a/docs/docs/extraction/workflow-build-searchable-collection.md b/docs/docs/extraction/workflow-build-searchable-collection.md
new file mode 100644
index 000000000..0bb409e87
--- /dev/null
+++ b/docs/docs/extraction/workflow-build-searchable-collection.md
@@ -0,0 +1,12 @@
+# Workflow: Build a searchable collection
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+After [document ingestion](workflow-document-ingestion.md), configure [chunking](chunking.md) and your [vector database](data-store.md) so extracted content is embedded, indexed, and ready for search.
+
+Technical detail for storage and chunking is in the **Embedding, indexing, and storage** section of the navigation ([Vector databases](data-store.md), [Chunking and splitting](chunking.md)).
+
+**Next:** [Workflow: Query and rerank](workflow-query-rerank.md).
diff --git a/docs/docs/extraction/workflow-document-ingestion.md b/docs/docs/extraction/workflow-document-ingestion.md
new file mode 100644
index 000000000..55516fe4c
--- /dev/null
+++ b/docs/docs/extraction/workflow-document-ingestion.md
@@ -0,0 +1,18 @@
+# Workflow: Document ingestion
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+**Document ingestion** is the step where NeMo Retriever Library reads your files (PDFs, Office documents, images, and other [supported formats](supported-file-types.md)), runs extraction and optional enrichment, and returns structured content you can chunk, embed, and index.
+
+Follow these steps:
+
+1. **Choose how you call the library.** Use the [Python API](python-api-reference.md) or [CLI](cli-reference.md) from application code, or run a deployment (for example [Quickstart: NeMo Retriever Library (local)](quickstart-library-mode.md), [Deploy (Docker Compose)](quickstart-guide.md), or [Quickstart: Kubernetes (Helm)](helm.md)) and send jobs over the network.
+2. **Use the V2 processing pipeline.** Parallel PDF handling and the default ingest path are described in the [HTTP API (V2)](v2-api-guide.md) and [API guide](nemo-retriever-api-reference.md). Set `message_client_kwargs={"api_version": "v2"}` when using the client if you need to be explicit.
+3. **Tune extraction for your content.** Refer to [Multimodal extraction](supported-file-types.md) for formats, [Text and layout extraction](text-layout-extraction.md), [Tables](extraction-tables.md), [OCR](extraction-ocr-scanned.md), and related topics in the navigation.
+
+Pipeline concepts and stage overview appear in [Key concepts](concepts.md).
+
+**Next:** [Workflow: Build a searchable collection](workflow-build-searchable-collection.md).
diff --git a/docs/docs/extraction/workflow-e2e-blueprints.md b/docs/docs/extraction/workflow-e2e-blueprints.md
new file mode 100644
index 000000000..aa4d6c0f0
--- /dev/null
+++ b/docs/docs/extraction/workflow-e2e-blueprints.md
@@ -0,0 +1,8 @@
+# Workflow: End-to-end RAG with NVIDIA Blueprints
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+For NVIDIA AI Blueprint links, solution cards, enterprise RAG resources, and related product landing pages, refer to [NVIDIA AI Blueprints and product links](resources-links.md).
diff --git a/docs/docs/extraction/workflow-query-rerank.md b/docs/docs/extraction/workflow-query-rerank.md
new file mode 100644
index 000000000..9f216c30f
--- /dev/null
+++ b/docs/docs/extraction/workflow-query-rerank.md
@@ -0,0 +1,18 @@
+# Workflow: Query and rerank
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+After documents are [ingested](workflow-document-ingestion.md), [chunked](chunking.md), and [indexed](data-store.md), applications run retrieval (semantic and optionally hybrid search) and optional reranking to improve top-K quality.
+
+Follow these steps:
+
+1. **Query.** Run searches against your vector store with filters as needed. Refer to [Semantic and hybrid retrieval](semantic-hybrid-retrieval.md) and [Custom metadata and filtering](custom-metadata.md).
+2. **Combine hybrid patterns.** Merge dense vectors with sparse or full-text signals where supported, such as LanceDB hybrid mode in [Vector databases](data-store.md).
+3. **Rerank.** Apply a reranker NIM for a second-stage score on candidates. Refer to [Reranking](reranking.md) and the [Support matrix](support-matrix.md) for reranker NIM options and GPU notes.
+
+**Bridge from extraction**
+
+Pipeline stages are summarized in [Concepts](concepts.md). Retrieval quality evaluation appears in [Benchmarking](benchmarking.md) and [Evaluate on your data](evaluate-on-your-data.md).
diff --git a/docs/docs/extraction/workflow-video-ocr.md b/docs/docs/extraction/workflow-video-ocr.md
new file mode 100644
index 000000000..583e9f7bd
--- /dev/null
+++ b/docs/docs/extraction/workflow-video-ocr.md
@@ -0,0 +1,16 @@
+# Workflow: Video processing with OCR
+
+!!! note
+
+    This documentation describes NeMo Retriever Library.
+
+
+For video assets, NeMo Retriever Library can combine audio or speech processing with visual text extraction when OCR applies to frames or derived images.
+
+For the audio and speech path, refer to [Speech and audio](audio.md) for RIVA ASR and related ingestion paths.
+
+For visual text and OCR, scanned or image-heavy content often uses OCR-oriented extract methods. Refer to [OCR and scanned documents](extraction-ocr-scanned.md), [Text and layout extraction](text-layout-extraction.md), and [Nemotron Parse](nemoretriever-parse.md) for advanced visual parsing.
+
+Container formats and early-access video types are listed in [Supported file types and formats](supported-file-types.md).
+
+For end-to-end RAG stacks that include multimodal ingestion, refer to [End-to-end RAG with NVIDIA Blueprints](resources-links.md).
diff --git a/docs/docs/license.md b/docs/docs/license.md
index b50c196ac..8509b7e60 100644
--- a/docs/docs/license.md
+++ b/docs/docs/license.md
@@ -1,21 +1,21 @@
 # License
 
-This page summarizes where to find licensing terms for the nv-ingest project, third-party components, and related NVIDIA software.
+This page summarizes where to find licensing terms for the NeMo Retriever project, third-party components, and related NVIDIA software.
 
 ## License
 
-The nv-ingest source code and documentation in this repository are licensed under the **Apache License, Version 2.0**, unless otherwise noted.
+The NeMo Retriever source code and documentation in this repository are licensed under the **Apache License, Version 2.0**, unless otherwise noted.
 
 Reference links to the main license file in the GitHub repository:
 
-- [LICENSE](https://github.com/NVIDIA/nv-ingest/blob/main/LICENSE) (repository file on GitHub)
-- [LICENSE (raw text)](https://raw.githubusercontent.com/NVIDIA/nv-ingest/main/LICENSE) (raw view for download or copy)
+- [LICENSE](https://github.com/NVIDIA/NeMo-Retriever/blob/main/LICENSE) (repository file on GitHub)
+- [LICENSE (raw text)](https://raw.githubusercontent.com/NVIDIA/NeMo-Retriever/main/LICENSE) (raw view for download or copy)
 
 ## Third-Party Licenses
 
 - **NeMo Retriever:** [Third Party License Notice](https://github.com/NVIDIA/NeMo-Retriever/blob/main/README.md#notices) in the NeMo Retriever README.
 
-The authoritative third-party list for this repository is [THIRD_PARTY_LICENSES.md](https://github.com/NVIDIA/nv-ingest/blob/main/THIRD_PARTY_LICENSES.md). Contents:
+The authoritative third-party list for this repository is [THIRD_PARTY_LICENSES.md](https://github.com/NVIDIA/NeMo-Retriever/blob/main/THIRD_PARTY_LICENSES.md). Contents:
 
 --8<-- "../THIRD_PARTY_LICENSES.md:3"
 
@@ -25,4 +25,4 @@ The authoritative third-party list for this repository is [THIRD_PARTY_LICENSES.
 
 ## Contributing and license compatibility
 
-Contribution guidelines and notes on license compatibility are in **[CONTRIBUTING.md](https://github.com/NVIDIA/nv-ingest/blob/main/CONTRIBUTING.md)** in the repository.
+Contribution guidelines and notes on license compatibility are in **[CONTRIBUTING.md](https://github.com/NVIDIA/NeMo-Retriever/blob/main/CONTRIBUTING.md)** in the repository.
diff --git a/docs/mkdocs.nrl-github-pages.yml b/docs/mkdocs.nrl-github-pages.yml
new file mode 100644
index 000000000..b773cebe9
--- /dev/null
+++ b/docs/mkdocs.nrl-github-pages.yml
@@ -0,0 +1,194 @@
+# NeMo Retriever Library (NRL) only — GitHub Pages staging/nightly build.
+# Does not include the broader NeMo Retriever suite landing page (docs/docs/index.md),
+# full multi-package Sphinx API dumps, or legacy nv-ingest markdown aliases in the built site.
+#
+# Build: make nrl-github-pages   OR   mkdocs build -f mkdocs.nrl-github-pages.yml
+# Optional: SITE_URL=https://owner.github.io/repo/ mkdocs build -f mkdocs.nrl-github-pages.yml
+
+site_name: NeMo Retriever (staging)
+site_description: NeMo Retriever documentation — staging / nightly (not a production release)
+site_url: !ENV [SITE_URL, 'https://example.invalid/']
+
+theme:
+  name: material
+  custom_dir: overrides-nrl-staging
+  features:
+    - content.code.annotate
+    - navigation.tabs
+    - navigation.indexes
+    - navigation.instant
+    - navigation.path
+    - navigation.instant.prefetch
+    - navigation.top
+    - navigation.footer
+    - navigation.expand
+    - search.suggest
+    - search.highlight
+    - content.code.copy
+  font:
+    code: Roboto Mono
+  favicon: assets/images/favicon.png
+  language: en
+  # Visible on every page — staging / not a release build
+  announcement: |
+    **Staging (nightly):** NeMo Retriever documentation only. This site is not a production or release publication.
+  palette:
+    - media: "(prefers-color-scheme: light)"
+      scheme: light
+      primary: custom
+      accent: lime
+      toggle:
+        icon: material/weather-night
+        name: Switch to dark mode
+    - media: "(prefers-color-scheme: dark)"
+      scheme: dark
+      primary: custom
+      accent: lime
+      toggle:
+        icon: material/weather-sunny
+        name: Switch to light mode
+  icon:
+    logo: nvidia/nvidia-logo
+    repo: fontawesome/brands/github
+
+extra_css:
+  - assets/css/color-schemes.css
+  - assets/css/fonts.css
+  - assets/css/custom-material.css
+  - assets/css/jupyter-themes.css
+
+# Broader suite overview and legacy duplicate pages are excluded from the build (see exclude_docs).
+nav:
+  - NeMo Retriever:
+      - "1. Introduction":
+          - "What is NeMo Retriever?": extraction/overview.md
+          - Key features: extraction/key-features.md
+          - Key concepts: extraction/concepts.md
+          - How to use this documentation: extraction/how-to-use-this-documentation.md
+          - Release notes: extraction/releasenotes.md
+      - "2. Get started":
+          - About this section: extraction/getting-started-about.md
+          - Prerequisites: extraction/prerequisites.md
+          - "Hardware and support matrix": extraction/support-matrix.md
+          - "Quickstart: NeMo Retriever Library (local)": extraction/quickstart-library-mode.md
+          - "Quickstart: Kubernetes (Helm)": extraction/helm.md
+          - "Deploy (Docker Compose)": extraction/quickstart-guide.md
+          - "Authentication and API keys": extraction/ngc-api-key.md
+      - "3. Choose your deployment":
+          - Compare deployment options: extraction/choose-your-path.md
+          - When to use NVIDIA-hosted NIMs: extraction/hosted-nims-when-to-use.md
+          - When to self-host NIMs: extraction/self-host-nims-when-to-use.md
+      - "4. Core workflows":
+          - "Workflow: Document ingestion": extraction/workflow-document-ingestion.md
+          - "Workflow: Build a searchable collection": extraction/workflow-build-searchable-collection.md
+          - "Workflow: Query and rerank": extraction/workflow-query-rerank.md
+          - "Workflow: Agentic retrieval": extraction/workflow-agentic-retrieval.md
+          - "Workflow: Audio or video to text": extraction/audio.md
+          - "Workflow: Video processing with OCR": extraction/workflow-video-ocr.md
+          - "Workflow: End-to-end RAG with NVIDIA Blueprints": extraction/workflow-e2e-blueprints.md
+      - "5. Multimodal extraction":
+          - "Supported file types and formats": extraction/supported-file-types.md
+          - "Text and layout extraction": extraction/text-layout-extraction.md
+          - Tables: extraction/extraction-tables.md
+          - "Charts and infographics": extraction/extraction-charts-infographics.md
+          - "OCR and scanned documents": extraction/extraction-ocr-scanned.md
+          - "Nemotron Parse based parsing": extraction/nemoretriever-parse.md
+          - Image captioning: extraction/image-captioning.md
+          - "Metadata and content schema": extraction/multimodal-metadata-schema.md
+          - "Extraction limitations and quality": extraction/throughput-is-dataset-dependent.md
+      - "6. Embedding, indexing, and storage":
+          - "Embedding NIMs and models": extraction/embedding-nims-models.md
+          - "Multimodal embeddings (VLM)": extraction/vlm-embed.md
+          - Vector databases: extraction/data-store.md
+          - "Chunking and splitting": extraction/chunking.md
+      - "7. Retrieval and ranking":
+          - "Semantic and hybrid retrieval": extraction/semantic-hybrid-retrieval.md
+          - Reranking: extraction/reranking.md
+          - "Custom metadata and filtering": extraction/custom-metadata.md
+          - "Agentic retrieval (concept)": extraction/agentic-retrieval-concept.md
+      - "8. Deployment and operations":
+          - "Scaling: static and dynamic": extraction/scaling-modes.md
+          - "Ray and distributed ingest": extraction/ray-logging.md
+          - "Telemetry and observability": extraction/telemetry.md
+          - Production checklist: extraction/production-checklist.md
+      - "9. Customize and extend":
+          - User-defined functions: extraction/user-defined-functions.md
+          - User-defined stages: extraction/user-defined-stages.md
+          - "NimClient and custom NIM endpoints": extraction/nimclient.md
+      - "10. Integrations and ecosystem":
+          - "NVIDIA AI Blueprints": extraction/resources-links.md
+          - "Framework integrations": extraction/integrations-langchain-llamaindex-haystack.md
+          - "Vector database partners": extraction/vector-db-partners.md
+          - "Starter kits": extraction/notebooks.md
+      - "11. Evaluation and benchmarks":
+          - "Benchmarking methodology and evaluation harnesses": extraction/benchmarking.md
+          - "Evaluate on your own documents": extraction/evaluate-on-your-data.md
+          - "Published metrics and comparisons": extraction/published-metrics-comparisons.md
+      - "12. Reference":
+          - "API guide": extraction/nemo-retriever-api-reference.md
+          - "HTTP API (V2)": extraction/v2-api-guide.md
+          - "Python API": extraction/python-api-reference.md
+          - "CLI reference": extraction/cli-reference.md
+          - Environment variables: extraction/environment-config.md
+          - "Metadata reference": extraction/content-metadata.md
+      - "13. Support and community":
+          - Troubleshooting: extraction/troubleshoot.md
+          - FAQ: extraction/faq.md
+          - Contributing: extraction/contributing.md
+      - Additional resources:
+          - "OSS licences": license.md
+
+plugins:
+  - search
+  - macros
+  - mkdocstrings:
+      handlers:
+        python:
+          options:
+            docstring_style: google
+            show_source: true
+            show_if_no_docstring: true
+          paths:
+            - ../nemo_retriever/src
+  - mkdocs-jupyter:
+      theme: auto
+      highlight_extra_classes: "jupyter-notebook"
+  - redirects:
+      redirect_maps:
+        # Suite docs/docs/index.md is excluded; still emit site/index.html (redirect to Library overview)
+        index.md: extraction/overview.md
+        extraction/index.md: extraction/overview.md
+        extraction/nv-ingest_cli.md: extraction/cli-reference.md
+        extraction/nv-ingest-python-api.md: extraction/python-api-reference.md
+  - site-urls
+
+markdown_extensions:
+  - attr_list
+  - md_in_html
+  - pymdownx.details
+  - pymdownx.superfences
+  - pymdownx.snippets:
+      check_paths: true
+      restrict_base_path: false
+      base_path: "."
+  - pymdownx.emoji:
+      emoji_index: !!python/name:material.extensions.emoji.twemoji
+      emoji_generator: !!python/name:material.extensions.emoji.to_svg
+      options:
+        custom_icons:
+          - overrides/.icons
+  - def_list
+  - admonition
+  - footnotes
+
+# MkDocs 1.6+: drop pages that are not part of the NRL-only site (still in repo for upstream parity).
+exclude_docs: |
+  index.md
+  extraction/nv-ingest_cli.md
+  extraction/nv-ingest-python-api.md
+
+extra:
+  generator: false
+
+copyright: |
+  &copy; Copyright 2023–2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. Staging documentation build.
diff --git a/docs/overrides-nrl-staging/.icons/nvidia/nvidia-logo.svg b/docs/overrides-nrl-staging/.icons/nvidia/nvidia-logo.svg
new file mode 100644
index 000000000..da235bf46
--- /dev/null
+++ b/docs/overrides-nrl-staging/.icons/nvidia/nvidia-logo.svg
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg width="100%" height="100%" viewBox="0 0 164 30" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" xmlns:serif="http://www.serif.com/" style="fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;">
+    <g>
+        <path d="M160.352,24.069L160.352,23.62L160.64,23.62C160.797,23.62 161.011,23.632 161.011,23.824C161.011,24.032 160.901,24.069 160.715,24.069L160.352,24.069M160.352,24.384L160.544,24.384L160.991,25.168L161.481,25.168L160.987,24.352C161.242,24.333 161.452,24.212 161.452,23.868C161.452,23.441 161.157,23.303 160.659,23.303L159.938,23.303L159.938,25.168L160.352,25.168L160.352,24.384M162.45,24.238C162.45,23.143 161.599,22.508 160.65,22.508C159.695,22.508 158.845,23.143 158.845,24.238C158.845,25.333 159.695,25.971 160.65,25.971C161.598,25.971 162.45,25.333 162.45,24.238M161.93,24.238C161.93,25.036 161.343,25.572 160.65,25.572L160.65,25.566C159.937,25.572 159.361,25.036 159.361,24.238C159.361,23.441 159.938,22.907 160.65,22.907C161.344,22.907 161.93,23.441 161.93,24.238"/>
+        <path d="M96.374,5.707L96.376,25.367L101.928,25.367L101.928,5.707L96.374,5.707ZM52.697,5.681L52.697,25.367L58.3,25.367L58.3,10.086L62.67,10.1C64.107,10.1 65.1,10.445 65.793,11.184C66.672,12.12 67.03,13.628 67.03,16.389L67.03,25.367L72.457,25.367L72.457,14.49C72.457,6.727 67.509,5.68 62.668,5.68L52.698,5.68L52.697,5.681ZM105.314,5.708L105.314,25.367L114.32,25.367C119.118,25.367 120.684,24.569 122.377,22.78C123.575,21.524 124.348,18.766 124.348,15.753C124.348,12.99 123.693,10.525 122.551,8.99C120.494,6.245 117.531,5.708 113.106,5.708L105.314,5.708ZM110.822,9.988L113.209,9.988C116.672,9.988 118.912,11.544 118.912,15.579C118.912,19.616 116.672,21.171 113.209,21.171L110.822,21.171L110.822,9.988ZM88.369,5.708L83.735,21.288L79.295,5.709L73.302,5.708L79.642,25.367L87.645,25.367L94.036,5.708L88.369,5.708ZM126.932,25.367L132.485,25.367L132.485,5.709L126.93,5.708L126.932,25.367ZM142.496,5.715L134.743,25.36L140.218,25.36L141.445,21.888L150.62,21.888L151.781,25.36L157.725,25.36L149.913,5.714L142.496,5.715ZM146.1,9.3L149.464,18.504L142.631,18.504L146.101,9.3L146.1,9.3Z"/>
+        <path d="M16.889,8.985L16.889,6.28C17.151,6.26 17.417,6.247 17.687,6.238C25.087,6.006 29.942,12.597 29.942,12.597C29.942,12.597 24.698,19.879 19.076,19.879C18.333,19.882 17.594,19.764 16.889,19.529L16.889,11.325C19.769,11.673 20.349,12.945 22.081,15.833L25.933,12.585C25.933,12.585 23.121,8.897 18.381,8.897C17.866,8.897 17.373,8.933 16.889,8.985ZM16.889,0.047L16.889,4.09C17.154,4.069 17.42,4.052 17.687,4.042C27.977,3.696 34.682,12.482 34.682,12.482C34.682,12.482 26.982,21.846 18.959,21.846C18.224,21.846 17.535,21.778 16.889,21.663L16.889,24.161C17.442,24.231 18.015,24.273 18.613,24.273C26.078,24.273 31.477,20.461 36.705,15.948C37.572,16.642 41.121,18.331 41.85,19.071C36.879,23.231 25.295,26.586 18.727,26.586C18.113,26.584 17.5,26.552 16.889,26.49L16.889,30L45.264,30L45.264,0.047L16.889,0.047ZM16.889,19.529L16.889,21.662C9.984,20.432 8.067,13.254 8.067,13.254C8.067,13.254 11.383,9.58 16.889,8.985L16.889,11.325L16.878,11.324C13.988,10.977 11.731,13.677 11.731,13.677C11.731,13.677 12.996,18.221 16.889,19.529ZM4.625,12.943C4.625,12.943 8.717,6.903 16.889,6.28L16.889,4.088C7.838,4.815 0,12.48 0,12.48C0,12.48 4.439,25.313 16.889,26.488L16.889,24.16C7.753,23.011 4.625,12.943 4.625,12.943Z" style="fill:rgb(118,185,0);"/>
+    </g>
+</svg>
diff --git a/docs/overrides-nrl-staging/main.html b/docs/overrides-nrl-staging/main.html
new file mode 100644
index 000000000..98138487f
--- /dev/null
+++ b/docs/overrides-nrl-staging/main.html
@@ -0,0 +1,13 @@
+{# Staging GitHub Pages: no Adobe DTM / Hotjar; keep content and theme footer. #}
+{% extends "base.html" %}
+
+{% block extrahead %}
+{% endblock %}
+
+{% block content %}
+    {{ super() }}
+{% endblock %}
+
+{% block footer %}
+    {{ super() }}
+{% endblock %}
diff --git a/docs/scripts/print_nrl_mkdocs_nav.py b/docs/scripts/print_nrl_mkdocs_nav.py
new file mode 100644
index 000000000..e4c0a46ad
--- /dev/null
+++ b/docs/scripts/print_nrl_mkdocs_nav.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+"""Print the navigation tree from mkdocs.nrl-github-pages.yml for pre-deploy review."""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+from mkdocs.utils import yaml_load
+
+CONFIG = Path(__file__).resolve().parents[1] / "mkdocs.nrl-github-pages.yml"
+
+
+def _walk(node, indent: int = 0) -> None:
+    pad = "  " * indent
+    if isinstance(node, str):
+        print(f"{pad}- {node}")
+        return
+    if isinstance(node, list):
+        for item in node:
+            _walk(item, indent)
+        return
+    if isinstance(node, dict):
+        for key, val in node.items():
+            if isinstance(val, str):
+                print(f"{pad}- {key}: {val}")
+            else:
+                print(f"{pad}- {key}")
+                _walk(val, indent + 1)
+        return
+    print(f"{pad}- {node!r}")
+
+
+def main() -> int:
+    if not CONFIG.is_file():
+        print(f"Missing config: {CONFIG}", file=sys.stderr)
+        return 1
+    data = yaml_load(CONFIG.read_text(encoding="utf-8"))
+    nav = data.get("nav")
+    print(f"Config: {CONFIG.name}")
+    print("Navigation hierarchy (NRL GitHub Pages):")
+    print()
+    _walk(nav)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/docs/scripts/scan_non_nrl_doc_references.py b/docs/scripts/scan_non_nrl_doc_references.py
new file mode 100644
index 000000000..b22bbd74d
--- /dev/null
+++ b/docs/scripts/scan_non_nrl_doc_references.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+"""
+Scan NRL doc sources for legacy naming and non-NRL references (manual review).
+
+This does not fail the build; it lists candidates for editorial cleanup.
+"""
+
+from __future__ import annotations
+
+import re
+import sys
+from pathlib import Path
+
+DOCS_ROOT = Path(__file__).resolve().parents[1] / "docs"
+
+# Lines matching these patterns are reported (case-sensitive variants handled separately).
+PATTERNS: list[tuple[str, re.Pattern[str]]] = [
+    ("nv-ingest (substring)", re.compile(r"nv-ingest", re.IGNORECASE)),
+    ("NV-Ingest", re.compile(r"NV-Ingest")),
+    ("NVIDIA Ingest (rename note)", re.compile(r"NVIDIA Ingest")),
+    ("github.com/.../nv-ingest", re.compile(r"github\.com/[^/\s]+/nv-ingest")),
+    ("nvcr.io/.../nv-ingest", re.compile(r"nvcr\.io/[^\s)]+nv-ingest")),
+    ("nv_ingest (Python package)", re.compile(r"\bnv_ingest\b")),
+    ("NV_INGEST_", re.compile(r"NV_INGEST_")),
+    ("releasenotes-nv-ingest filename", re.compile(r"releasenotes-nv-ingest")),
+]
+
+
+def main() -> int:
+    if not DOCS_ROOT.is_dir():
+        print(f"Missing docs tree: {DOCS_ROOT}", file=sys.stderr)
+        return 1
+
+    md_files = sorted(DOCS_ROOT.rglob("*.md"))
+    print("Non-NRL / legacy reference scan (informational)")
+    print(f"Root: {DOCS_ROOT}")
+    print(f"Files scanned: {len(md_files)}")
+    print()
+
+    total = 0
+    for path in md_files:
+        rel = path.relative_to(DOCS_ROOT)
+        try:
+            text = path.read_text(encoding="utf-8")
+        except OSError as e:
+            print(f"{rel}: read error: {e}", file=sys.stderr)
+            continue
+        lines = text.splitlines()
+        file_hits: list[str] = []
+        for i, line in enumerate(lines, start=1):
+            for label, pat in PATTERNS:
+                if pat.search(line):
+                    snippet = line.strip()
+                    if len(snippet) > 160:
+                        snippet = snippet[:157] + "..."
+                    file_hits.append(f"    L{i} [{label}] {snippet}")
+                    break
+        if file_hits:
+            total += len(file_hits)
+            print(f"{rel}:")
+            print("\n".join(file_hits))
+            print()
+
+    print(f"Total flagged lines (may overlap patterns): {total}")
+    print()
+    print(
+        "Notes: Some hits are expected (historical rename notes, CLI compatibility, "
+        "environment variables, or container image names). Use this list for editorial review only."
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())