Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,18 @@
## Version 0.6.0, not released yet


## Version 0.5.2, 2025-10-15

### New Features (ENH)
- `meta` command now displays PDF permissions in a compact, single-line format ([#19](https://github.com/py-pdf/pdfly/issues/19))
- Unencrypted PDFs show "n/a (unencrypted)"
- Encrypted PDFs show comma-separated list of allowed permissions (e.g., "print, modify, annotate, fill-forms")
- When no permissions are allowed, shows "none (all denied)"

### Bug Fixes (BUG)
- Fixed header reading for encrypted PDFs by seeking to position 0 before reading the %PDF-... version


## Version 0.5.1, 2025-10-13

### Bug Fixes (BUG)
Expand Down
85 changes: 68 additions & 17 deletions pdfly/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
from pathlib import Path
from typing import Optional

from pydantic import BaseModel
from pydantic import BaseModel, Field
from pypdf import PdfReader
from pypdf.constants import UserAccessPermissions as UAP # pypdf ≥ 4

from ._utils import OutputOptions

Expand All @@ -25,7 +26,10 @@ class MetaInfo(BaseModel):
attachments: str = "unknown"
id1: Optional[bytes] = None
id2: Optional[bytes] = None
images: list[int] = []
images: list[int] = Field(default_factory=list)

# Permissions (single-line, compact)
permissions: str = "unknown"

# PDF /Info dictionary
author: Optional[str] = None
Expand All @@ -44,10 +48,68 @@ class MetaInfo(BaseModel):
access_time: datetime


def _format_permissions(uap) -> str:
"""
Return a compact, single-line summary of allowed permissions.
uap may be None (unencrypted), an IntFlag, or an object with .to_dict().
"""
if uap is None:
return "n/a (unencrypted)"

# Prefer mapping via to_dict() if available
label_map = {
"PRINT": "print",
"PRINT_TO_REPRESENTATION": "print-high",
"MODIFY": "modify",
"EXTRACT": "extract",
"ADD_OR_MODIFY": "annotate",
"FILL_FORM_FIELDS": "fill-forms",
"EXTRACT_TEXT_AND_GRAPHICS": "accessibility-copy",
"ASSEMBLE_DOC": "assemble",
}

to_dict = getattr(uap, "to_dict", None)
if callable(to_dict):
try:
flags = to_dict() # {"PRINT": True, ...}
items = [label_map.get(k, k.lower()) for k, v in flags.items() if v and k in label_map]
return ", ".join(items) if items else "none (all denied)"
except Exception:
pass

# Fallback: bitwise check if constants exist
if UAP is not None:
checks = [
(UAP.PRINT, "print"),
(UAP.PRINT_TO_REPRESENTATION, "print-high"),
(UAP.MODIFY, "modify"),
(UAP.EXTRACT, "extract"),
(UAP.ADD_OR_MODIFY, "annotate"),
(UAP.FILL_FORM_FIELDS, "fill-forms"),
(UAP.EXTRACT_TEXT_AND_GRAPHICS, "accessibility-copy"),
(UAP.ASSEMBLE_DOC, "assemble"),
]
try:
mask = int(uap)
items = [label for flag, label in checks if (mask & int(flag)) != 0]
return ", ".join(items) if items else "none (all denied)"
except Exception:
pass

return "unknown"


def main(pdf: Path, output: OutputOptions) -> None:
reader = PdfReader(str(pdf))

# Compute permissions string for both encrypted/unencrypted files
perm_str = _format_permissions(getattr(reader, "user_access_permissions", None))

if reader.is_encrypted:
pdf_stat = pdf.stat()
# read header
reader.stream.seek(0)
pdf_file_version = reader.stream.read(8).decode("utf-8")
meta = MetaInfo(
encryption=(
EncryptionData(
Expand All @@ -57,7 +119,8 @@ def main(pdf: Path, output: OutputOptions) -> None:
if reader.is_encrypted and reader._encryption
else None
),
pdf_file_version=reader.stream.read(8).decode("utf-8"),
pdf_file_version=pdf_file_version,
permissions=perm_str,
# OS Info
file_permissions=f"{stat.filemode(pdf_stat.st_mode)}",
file_size=pdf_stat.st_size,
Expand Down Expand Up @@ -88,6 +151,7 @@ def main(pdf: Path, output: OutputOptions) -> None:
attachments=str(list(reader.attachments.keys())),
id1=pdf_id[0] if pdf_id is not None else None,
id2=pdf_id[1] if pdf_id is not None and len(pdf_id) >= 2 else None,
permissions=perm_str,
# OS Info
file_permissions=f"{stat.filemode(pdf_stat.st_mode)}",
file_size=pdf_stat.st_size,
Expand Down Expand Up @@ -138,6 +202,7 @@ def main(pdf: Path, output: OutputOptions) -> None:
table.add_row("Keywords", meta.keywords)
table.add_row("Pages", f"{meta.pages:,}" if meta.pages else "unknown")
table.add_row("Encrypted", f"{meta.encryption}")
table.add_row("Permissions", meta.permissions)
table.add_row("PDF File Version", meta.pdf_file_version)
table.add_row("Page Layout", meta.page_layout)
table.add_row("Page Mode", meta.page_mode)
Expand All @@ -160,18 +225,6 @@ def main(pdf: Path, output: OutputOptions) -> None:
"Images", f"{len(meta.images)} images ({sum(meta.images):,} bytes)"
)

enc_table = Table(title="Encryption information")
enc_table.add_column(
"Attribute", justify="right", style="cyan", no_wrap=True
)
enc_table.add_column("Value", style="white")
if meta.encryption:
enc_table.add_row(
"Security Handler Revision Number",
str(meta.encryption.revision),
)
enc_table.add_row("V value", str(meta.encryption.v_value))

os_table = Table(title="Operating System Data")
os_table.add_column(
"Attribute", justify="right", style="cyan", no_wrap=True
Expand All @@ -193,8 +246,6 @@ def main(pdf: Path, output: OutputOptions) -> None:
console = Console()
console.print(os_table)
console.print(table)
if meta.encryption:
console.print(enc_table)
console.print(
"Use the 'pagemeta' subcommand to get details about a single page"
)
115 changes: 115 additions & 0 deletions tests/test_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
"""
Unit tests for metadata module.
Tests the _format_permissions function and the meta CLI command.
"""

import json
from pathlib import Path

import pytest

from pdfly.metadata import _format_permissions
from .conftest import RESOURCES_ROOT, run_cli

SAMPLE_FILES = RESOURCES_ROOT / "sample-files"

# Optional: exercise the bitmask fallback with real pypdf flags if present
try:
from pypdf.constants import UserAccessPermissions as UAP
except Exception: # pragma: no cover
UAP = None


class TestFormatPermissions:
"""Test the _format_permissions helper function."""

def test_format_permissions_unencrypted(self):
assert _format_permissions(None) == "n/a (unencrypted)"

def test_format_permissions_encrypted_with_no_permissions(self, mocker):
mock_uap = mocker.Mock()
mock_uap.to_dict.return_value = {
"PRINT": False,
"PRINT_TO_REPRESENTATION": False,
"MODIFY": False,
"EXTRACT": False,
"ADD_OR_MODIFY": False,
"FILL_FORM_FIELDS": False,
"EXTRACT_TEXT_AND_GRAPHICS": False,
"ASSEMBLE_DOC": False,
}
assert _format_permissions(mock_uap) == "none (all denied)"

def test_format_permissions_some_allowed_via_dict(self, mocker):
mock_uap = mocker.Mock()
# Order here controls output order
mock_uap.to_dict.return_value = {
"PRINT": True,
"MODIFY": False,
"EXTRACT": True,
"ASSEMBLE_DOC": False,
}
formatted = _format_permissions(mock_uap)
# Lower-case labels from label_map
assert formatted == "print, extract"

@pytest.mark.skipif(UAP is None, reason="pypdf flags not available")
def test_format_permissions_some_allowed_bitmask_path(self):
# Exercises the IntFlag/bitmask fallback (no to_dict)
uap = UAP.PRINT | UAP.EXTRACT
formatted = _format_permissions(uap)
assert formatted == "print, extract"

def test_format_permissions_unknown_when_unhandled_obj(self):
class Weird: # no to_dict, not int-castable
pass
assert _format_permissions(Weird()) == "unknown"


class TestMetaCommand:
"""End-to-end tests for the meta CLI command."""

def test_meta_command_unencrypted_pdf(self, capsys):
rel = Path("002-trivial-libre-office-writer/002-trivial-libre-office-writer.pdf")
input_pdf = SAMPLE_FILES / rel
if not input_pdf.exists():
pytest.skip(f"Unencrypted PDF file not found: {input_pdf}")

exit_code = run_cli(["meta", str(input_pdf), "--output", "json"])
assert exit_code == 0

captured = capsys.readouterr()
metadata = json.loads(captured.out)
assert metadata["permissions"] == "n/a (unencrypted)"
# header fix: should read raw PDF header bytes as text
assert metadata["pdf_file_version"].startswith("%PDF-")

def test_meta_command_encrypted_pdf(self, capsys):
rel = Path("005-libreoffice-writer-password/libreoffice-writer-password.pdf")
input_pdf = SAMPLE_FILES / rel
if not input_pdf.exists():
pytest.skip(f"Encrypted PDF file not found: {input_pdf}")

exit_code = run_cli(["meta", str(input_pdf), "--output", "json"])
assert exit_code == 0

captured = capsys.readouterr()
metadata = json.loads(captured.out)

assert "permissions" in metadata
perms = metadata["permissions"]
assert perms not in {"n/a (unencrypted)", "unknown"}
# If not "all denied", check formatting invariants
Copy link
Copy Markdown
Member

@Lucas-C Lucas-C Oct 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be known, given that we test a specific PDF file, right?

Maybe it would be better to have 2 distinct test cases/methods:

  • one that checks that none (all denied) is displayed
  • one that checks that the expected permissions are displayed

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so instead of running that test on only those two file i can run that on all the files in the sample-files/, would that be cool?

Copy link
Copy Markdown
Member

@Lucas-C Lucas-C Oct 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would be cool, yes.
However I tested your code, and the assert act_set == exp_set assertion is never evaluated.
Which means that we don't have any test covering this case...

With which PDF test file have you been able to test pdfly meta behaviour, where it displays a list of permissions?

Copy link
Copy Markdown
Author

@iamrishu11 iamrishu11 Oct 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right, it seems like the test case isn't fully covered, and I don't have a PDF with the permissions metadata behavior to fully test it yet. I tried generating a sample PDF with permissions using PyPDF2, but I wasn't able to achieve the desired result. Honestly, I don't have a lot of experience with unittest and pytest yet, so I'm still getting familiar with some of the testing patterns. If you have any advice or pointers on how to create or detect PDFs with specific permissions metadata, it would be really helpful!

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sample-files/005-libreoffice-writer-password/libreoffice-writer-password.pdf has some permissions set:

$ qpdf --show-encryption sample-files/005-libreoffice-writer-password/libreoffice-writer-password.pdf --password=openpassword
R = 3
P = -1028
User password = openpassword
Supplied password is user password
extract for accessibility: allowed
extract for any purpose: allowed
print low resolution: allowed
print high resolution: allowed
modify document assembly: not allowed
modify forms: allowed
modify annotations: allowed
modify other: allowed
modify anything: not allowed

So there is probably an issue somewhere in the code...

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe the password need to be provided to read the permissions, as with qpdf?

if perms != "none (all denied)":
parts = [p.strip() for p in perms.split(",")]
# lower-case labels
assert all(p == p.lower() for p in parts)
# only known labels
allowed = {
"print", "print-high", "modify", "extract",
"annotate", "fill-forms", "accessibility-copy", "assemble",
}
assert set(parts).issubset(allowed)

# header fix also applies on encrypted files
assert metadata["pdf_file_version"].startswith("%PDF-")