py-pdf · papametis · Jan 16, 2026 · Jan 16, 2026 · Jan 16, 2026 · Jan 19, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,7 @@
 
 ### New Features (ENH)
 - `pagemeta` now displays the name of a known page format that is close to the page dimensions
+- `extract-images`: added optional `--output-dir` argument to specify the folder where the extracted images are stored
 
 
 ## Version 0.5.1, 2025-10-13

diff --git a/docs/user/subcommand-extract-images.md b/docs/user/subcommand-extract-images.md
@@ -10,15 +10,15 @@ $ pdfly extract-images --help
  Extract images from PDF without resampling or altering.
 
  Adapted from work by Sylvain Pelissier
- http://stackoverflow.com/questions/2693820/extract-images-from-pdf-without-res
- ampling-in-python
+ http://stackoverflow.com/questions/2693820/extract-images-from-pdf-without-resampling-in-python
 
-╭─ Arguments ──────────────────────────────────────────────────────────────────╮
-│ *    pdf      FILE  [default: None] [required]                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help          Show this message and exit.                                  │
-╰──────────────────────────────────────────────────────────────────────────────╯
+┌─ Arguments ───────────────────────────────────────────────────────────────────────────────────────┐
+│ *    pdf      FILE  [required]                                                                    │
+└───────────────────────────────────────────────────────────────────────────────────────────────────┘
+┌─ Options ─────────────────────────────────────────────────────────────────────────────────────────┐
+│ --output-dir  -o      DIRECTORY  Output directory. Defaults to the input's directory.             │
+│ --help                           Show this message and exit.                                      │
+└───────────────────────────────────────────────────────────────────────────────────────────────────┘
 
 ```
 
@@ -27,10 +27,20 @@ $ pdfly extract-images --help
 Extract the first page of `document.pdf` and extract the images present in it.
 
 ```
-pdfly cat document.pdf 9 -o page.pdf
+pdfly cat document.pdf 0 -o page.pdf
 
-pdfly extract-text page.pdf
+pdfly extract-images page.pdf
  Extracted 1 images:
- - 0-Im0.png
+ - 0-Image0.png
+
+```
+
+Extract the images of `document.pdf` in its directory's parent directory.
+
+```
+pdfly extract-images document.pdf -o ..
+ Extracted 1 images:
+ - <parent_directory>/0-Image0.png
+ Stored in <parent_directory>
 
 ```
diff --git a/pdfly/cli.py b/pdfly/cli.py
@@ -214,8 +214,20 @@ def extract_images(
             resolve_path=True,
         ),
     ],
+    output_dir: Annotated[
+        Path | None,
+        typer.Option(
+            "--output-dir",
+            "-o",
+            file_okay=False,
+            exists=True,
+            resolve_path=True,
+            writable=True,
+            help="Output directory. Defaults to the input's directory.",
+        ),
+    ] = None,
 ) -> None:
-    pdfly.extract_images.main(pdf)
+    pdfly.extract_images.main(pdf, output_dir)
 
 
 @entry_point.command(name="extract-text")  # type: ignore[misc]

diff --git a/pdfly/extract_images.py b/pdfly/extract_images.py
@@ -10,12 +10,16 @@
 from pypdf import PdfReader
 
 
-def main(pdf: Path) -> None:
+def main(pdf: Path, output_dir: Path | None) -> None:
     reader = PdfReader(str(pdf))
+    if not output_dir:
+        output_dir = Path("")
     extracted_images = []
     for page_index, page0 in enumerate(reader.pages):
         for image_file_object in page0.images:
-            path = f"{page_index:04d}-{image_file_object.name}"
+            path = output_dir / Path(
+                f"{page_index:04d}-{image_file_object.name}"
+            )
             with open(path, "wb") as fp:
                 fp.write(image_file_object.data)
             extracted_images.append(path)
@@ -26,3 +30,5 @@ def main(pdf: Path) -> None:
         print(f"Extracted {len(extracted_images)} images:")
         for path in extracted_images:
             print(f"- {path}")
+        if str(output_dir) != ".":
+            print(f"Stored in {output_dir}")
diff --git a/tests/test_extract_images.py b/tests/test_extract_images.py
@@ -29,3 +29,22 @@ def test_extract_images_monochrome(
     captured = capsys.readouterr()
     assert not captured.err
     assert "Extracted 1 images" in captured.out
+
+
+def test_extract_images_specific_output_dir(
+    capsys: pytest.CaptureFixture,
+    tmp_path: Path,
+) -> None:
+    with chdir(tmp_path):
+        run_cli(
+            [
+                "extract-images",
+                str(RESOURCES_ROOT / "GeoBase_NHNC1_Data_Model_UML_EN.pdf"),
+                "--output-dir",
+                str(tmp_path),
+            ]
+        )
+    captured = capsys.readouterr()
+    assert not captured.err
+    assert "Extracted 3 images" in captured.out
+    assert f"Stored in {tmp_path}" in captured.out