Skip to content

Commit ab0c646

Browse files
Improve SigV4 tests and add botocore version reference
1 parent b48917e commit ab0c646

2 files changed

Lines changed: 58 additions & 9 deletions

File tree

pyiceberg/catalog/rest/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -748,6 +748,8 @@ def canonical_request(self, request: Any) -> str:
748748
# Reuses the logic from botocore's SigV4Auth.canonical_request
749749
# (https://github.com/boto/botocore/blob/develop/botocore/auth.py)
750750
# but always uses self.payload(request) for the body checksum.
751+
# Validated against botocore <= 1.42.x
752+
# (https://github.com/boto/botocore/blob/1.42.85/botocore/auth.py#L622-L637)
751753
cr = [request.method.upper()]
752754
path = self._normalize_url_path(parse.urlsplit(request.url).path)
753755
cr.append(path)

tests/catalog/test_rest.py

Lines changed: 56 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -558,10 +558,9 @@ def test_sigv4_sign_request_with_body(rest_mock: Mocker) -> None:
558558
assert prepared.headers["Original-Authorization"] == f"Bearer {existing_token}"
559559
# Non-empty body should have base64-encoded SHA256
560560
content_sha256 = prepared.headers["x-amz-content-sha256"]
561-
assert content_sha256 == "nhKdVGKGU3IMGjYlod9xKUVc7/H5K6zTWj60yJOM80k="
562-
# Verify it's valid base64 and matches the body
563-
decoded = base64.b64decode(content_sha256)
564-
assert len(decoded) == 32 # SHA256 produces 32 bytes
561+
body_bytes = prepared.body.encode("utf-8") if isinstance(prepared.body, str) else prepared.body
562+
expected_sha256 = base64.b64encode(hashlib.sha256(body_bytes).digest()).decode()
563+
assert content_sha256 == expected_sha256
565564
# x-amz-content-sha256 should be in signed headers
566565
assert "x-amz-content-sha256" in auth_header
567566

@@ -596,11 +595,8 @@ def test_sigv4_content_sha256_with_bytes_body(rest_mock: Mocker) -> None:
596595
assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256 Credential=")
597596
assert "SignedHeaders=" in prepared.headers["Authorization"]
598597
content_sha256 = prepared.headers["x-amz-content-sha256"]
599-
assert content_sha256 == "sD20bEQP+WnwKPT7jxn7PIACGciAeWjQPlzFCK5Fifo="
600-
# Verify it's valid base64 and matches the body
601-
decoded = base64.b64decode(content_sha256)
602-
assert len(decoded) == 32 # SHA256 produces 32 bytes
603-
assert decoded == hashlib.sha256(body_content).digest()
598+
expected_sha256 = base64.b64encode(hashlib.sha256(body_content).digest()).decode()
599+
assert content_sha256 == expected_sha256
604600

605601

606602
def test_sigv4_conflicting_sigv4_headers(rest_mock: Mocker) -> None:
@@ -634,6 +630,57 @@ def test_sigv4_conflicting_sigv4_headers(rest_mock: Mocker) -> None:
634630
assert "X-Amz-Date" in prepared.headers
635631

636632

633+
def test_sigv4_canonical_request_uses_hex_payload(rest_mock: Mocker) -> None:
634+
"""Verify that the canonical request uses hex-encoded payload hash, not the base64 header value."""
635+
from unittest.mock import patch
636+
637+
from botocore.auth import SigV4Auth
638+
639+
catalog = RestCatalog(
640+
"rest",
641+
**{
642+
"uri": TEST_URI,
643+
"token": "token",
644+
"rest.sigv4-enabled": "true",
645+
"rest.signing-region": "us-west-2",
646+
"client.access-key-id": "id",
647+
"client.secret-access-key": "secret",
648+
},
649+
)
650+
651+
body_content = b'{"namespace": "test"}'
652+
prepared = catalog._session.prepare_request(
653+
Request(
654+
"POST",
655+
f"{TEST_URI}v1/namespaces",
656+
data=body_content,
657+
)
658+
)
659+
adapter = catalog._session.adapters[catalog.uri]
660+
assert isinstance(adapter, HTTPAdapter)
661+
662+
# Capture the canonical request string during signing
663+
captured_canonical = []
664+
original_add_auth = SigV4Auth.add_auth
665+
666+
def capturing_add_auth(self: Any, request: Any) -> None:
667+
captured_canonical.append(self.canonical_request(request))
668+
original_add_auth(self, request)
669+
670+
with patch.object(SigV4Auth, "add_auth", capturing_add_auth):
671+
adapter.add_headers(prepared)
672+
673+
assert len(captured_canonical) == 1
674+
canonical_lines = captured_canonical[0].split("\n")
675+
# Last line of canonical request is the payload hash
676+
payload_hash = canonical_lines[-1]
677+
# Must be hex-encoded (64 hex chars), not base64
678+
assert len(payload_hash) == 64
679+
assert payload_hash == hashlib.sha256(body_content).hexdigest()
680+
# Meanwhile the header is base64-encoded
681+
assert prepared.headers["x-amz-content-sha256"] == base64.b64encode(hashlib.sha256(body_content).digest()).decode()
682+
683+
637684
def test_sigv4_adapter_default_retry_config(rest_mock: Mocker) -> None:
638685
catalog = RestCatalog(
639686
"rest",

0 commit comments

Comments
 (0)