Skip to content

Commit 8c5af07

Browse files
Fix SigV4 auth to use base64-encoded content SHA256 and custom canonical request
1 parent 1a54e9c commit 8c5af07

2 files changed

Lines changed: 94 additions & 10 deletions

File tree

pyiceberg/catalog/rest/__init__.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -733,6 +733,8 @@ def _split_identifier_for_json(self, identifier: str | Identifier) -> dict[str,
733733
return {"namespace": identifier_tuple[:-1], "name": identifier_tuple[-1]}
734734

735735
def _init_sigv4(self, session: Session) -> None:
736+
import base64
737+
import hashlib
736738
from urllib import parse
737739

738740
import boto3
@@ -741,6 +743,12 @@ def _init_sigv4(self, session: Session) -> None:
741743
from requests import PreparedRequest
742744
from requests.adapters import HTTPAdapter
743745

746+
class _IcebergSigV4Auth(SigV4Auth):
747+
def canonical_request(self, request: Any) -> str:
748+
cr = super().canonical_request(request)
749+
# Replace the last line (body_checksum) with hex-encoded payload hash.
750+
return cr.rsplit("\n", 1)[0] + "\n" + self.payload(request)
751+
744752
class SigV4Adapter(HTTPAdapter):
745753
def __init__(self, **properties: str):
746754
self._properties = properties
@@ -767,17 +775,27 @@ def add_headers(self, request: PreparedRequest, **kwargs: Any) -> None: # pylin
767775
# remove the connection header as it will be updated after signing
768776
if "connection" in request.headers:
769777
del request.headers["connection"]
770-
# For empty bodies, explicitly set the content hash header to the SHA256 of an empty string
771-
if not request.body:
772-
request.headers["x-amz-content-sha256"] = EMPTY_BODY_SHA256
778+
779+
# Compute the x-amz-content-sha256 header to match Iceberg Java SDK:
780+
# - empty body → hex (EMPTY_BODY_SHA256)
781+
# - non-empty body → base64
782+
if request.body:
783+
body_bytes = request.body.encode("utf-8") if isinstance(request.body, str) else request.body
784+
content_sha256_header = base64.b64encode(hashlib.sha256(body_bytes).digest()).decode()
785+
else:
786+
content_sha256_header = EMPTY_BODY_SHA256
787+
788+
signing_headers = dict(request.headers)
789+
signing_headers["x-amz-content-sha256"] = content_sha256_header
773790

774791
aws_request = AWSRequest(
775-
method=request.method, url=url, params=params, data=request.body, headers=dict(request.headers)
792+
method=request.method, url=url, params=params, data=request.body, headers=signing_headers
776793
)
777794

778-
SigV4Auth(credentials, service, region).add_auth(aws_request)
779-
original_header = request.headers
780-
signed_headers = aws_request.headers
795+
_IcebergSigV4Auth(credentials, service, region).add_auth(aws_request)
796+
797+
original_header = dict(request.headers)
798+
signed_headers = dict(aws_request.headers)
781799
relocated_headers = {}
782800

783801
# relocate headers if there is a conflict with signed headers

tests/catalog/test_rest.py

Lines changed: 69 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -512,9 +512,10 @@ def test_sigv4_sign_request_without_body(rest_mock: Mocker) -> None:
512512
assert isinstance(adapter, HTTPAdapter)
513513
adapter.add_headers(prepared)
514514

515-
assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256")
515+
assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256 Credential=")
516516
assert prepared.headers["Original-Authorization"] == f"Bearer {existing_token}"
517517
assert prepared.headers["x-amz-content-sha256"] == EMPTY_BODY_SHA256
518+
assert "SignedHeaders=" in prepared.headers["Authorization"]
518519

519520

520521
def test_sigv4_sign_request_with_body(rest_mock: Mocker) -> None:
@@ -543,9 +544,74 @@ def test_sigv4_sign_request_with_body(rest_mock: Mocker) -> None:
543544
assert isinstance(adapter, HTTPAdapter)
544545
adapter.add_headers(prepared)
545546

546-
assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256")
547+
assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256 Credential=")
548+
assert "SignedHeaders=" in prepared.headers["Authorization"]
549+
# Conflicting Authorization header is relocated
547550
assert prepared.headers["Original-Authorization"] == f"Bearer {existing_token}"
548-
assert prepared.headers.get("x-amz-content-sha256") != EMPTY_BODY_SHA256
551+
assert prepared.headers["x-amz-content-sha256"] == "nhKdVGKGU3IMGjYlod9xKUVc7/H5K6zTWj60yJOM80k="
552+
553+
554+
def test_sigv4_content_sha256_with_bytes_body(rest_mock: Mocker) -> None:
555+
existing_token = "existing_token"
556+
557+
catalog = RestCatalog(
558+
"rest",
559+
**{
560+
"uri": TEST_URI,
561+
"token": existing_token,
562+
"rest.sigv4-enabled": "true",
563+
"rest.signing-region": "us-west-2",
564+
"client.access-key-id": "id",
565+
"client.secret-access-key": "secret",
566+
},
567+
)
568+
569+
body_content = b'{"namespace": "test_namespace"}'
570+
prepared = catalog._session.prepare_request(
571+
Request(
572+
"POST",
573+
f"{TEST_URI}v1/namespaces",
574+
data=body_content,
575+
)
576+
)
577+
adapter = catalog._session.adapters[catalog.uri]
578+
assert isinstance(adapter, HTTPAdapter)
579+
adapter.add_headers(prepared)
580+
581+
assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256 Credential=")
582+
assert "SignedHeaders=" in prepared.headers["Authorization"]
583+
assert prepared.headers["x-amz-content-sha256"] == "sD20bEQP+WnwKPT7jxn7PIACGciAeWjQPlzFCK5Fifo="
584+
585+
586+
def test_sigv4_conflicting_sigv4_headers(rest_mock: Mocker) -> None:
587+
catalog = RestCatalog(
588+
"rest",
589+
**{
590+
"uri": TEST_URI,
591+
"rest.sigv4-enabled": "true",
592+
"rest.signing-region": "us-west-2",
593+
"client.access-key-id": "id",
594+
"client.secret-access-key": "secret",
595+
},
596+
)
597+
598+
prepared = catalog._session.prepare_request(Request("GET", f"{TEST_URI}v1/config"))
599+
adapter = catalog._session.adapters[catalog.uri]
600+
assert isinstance(adapter, HTTPAdapter)
601+
602+
# Inject conflicting SigV4 headers before signing
603+
prepared.headers["x-amz-content-sha256"] = "fake"
604+
prepared.headers["X-Amz-Date"] = "fake"
605+
606+
adapter.add_headers(prepared)
607+
608+
# Matching Java SDK: conflicting headers are relocated with "Original-" prefix
609+
assert prepared.headers.get("Original-x-amz-content-sha256") == "fake"
610+
assert prepared.headers.get("Original-X-Amz-Date") == "fake"
611+
# SigV4 headers are set correctly after signing
612+
assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256 Credential=")
613+
assert prepared.headers["x-amz-content-sha256"] == EMPTY_BODY_SHA256
614+
assert "X-Amz-Date" in prepared.headers
549615

550616

551617
def test_sigv4_adapter_default_retry_config(rest_mock: Mocker) -> None:

0 commit comments

Comments
 (0)