Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion src/v/kafka/protocol/schemata/BUILD
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
load("@rules_python//python:defs.bzl", "py_binary")
load("@rules_python//python:defs.bzl", "py_binary", "py_test")
load("//src/v/kafka/protocol/schemata:generator.bzl", "generate_kafka_messages")

py_binary(
Expand All @@ -12,4 +12,23 @@ py_binary(
],
)

py_test(
name = "generator_reproducibility_test",
size = "small",
srcs = ["generator_reproducibility_test.py"],
data = [
"create_topics_request.json",
"create_topics_response.json",
"describe_configs_response.json",
"fetch_request.json",
"fetch_response.json",
"generator.py",
"metadata_response.json",
],
deps = [
"@python_deps//jinja2",
"@python_deps//jsonschema",
],
)

generate_kafka_messages()
4 changes: 2 additions & 2 deletions src/v/kafka/protocol/schemata/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -995,7 +995,7 @@ def type_headers(t):
h = h.get(which, ())
yield from maybe_strings(h)

return set(h for t in types for h in type_headers(t))
return sorted({h for t in types for h in type_headers(t)})

@property
def is_default_comparable(self):
Expand Down Expand Up @@ -2171,7 +2171,7 @@ def fail(msg):

src = jinja2.Template(COMBINED_SOURCE_TEMPLATE).render(
schema_headers=map(lambda p: p.name, headers),
extra_headers=extra_schema_headers,
extra_headers=sorted(extra_schema_headers),
sources=sources,
)

Expand Down
79 changes: 79 additions & 0 deletions src/v/kafka/protocol/schemata/generator_reproducibility_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Copyright 2026 Redpanda Data, Inc.
#
# Use of this software is governed by the Business Source License
# included in the file licenses/BSL.md
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0
"""Hermeticity regression test for the kafka schemata codegen.

The generator emits C++ source from JSON schemas. Earlier versions iterated
Python sets when emitting #include lines, so the byte-level output varied with
PYTHONHASHSEED (which CPython picks fresh per interpreter). Bazel keys its
action cache on input content hashes, so non-deterministic codegen invalidates
the cache for every downstream compile even when the result is
preprocessor-equivalent.

This test runs the generator with several PYTHONHASHSEED values and fails if
any output byte differs.
"""

import os
import subprocess
import sys
import tempfile
import unittest
from pathlib import Path

_HERE = Path(__file__).resolve().parent
_GENERATOR = _HERE / "generator.py"

_SEEDS = ("1", "4294967295")
_EXTS = ("h", "cc")

# Schemata chosen to exercise the codegen paths most likely to expose set-order
# bugs: ones whose fields pull in multiple `extra_headers` entries.
_SCHEMATA = (
"create_topics_request",
"create_topics_response",
"fetch_request",
"fetch_response",
"metadata_response",
"describe_configs_response",
)


def _run_generator(schema: str, seed: str, outdir: Path) -> dict[str, bytes]:
"""Run the generator once and return {ext: bytes} for the produced files."""
outdir.mkdir()
subprocess.run(
[
sys.executable,
str(_GENERATOR),
str(_HERE / f"{schema}.json"),
*(str(outdir / f"{schema}.{ext}") for ext in _EXTS),
],
check=True,
env={**os.environ, "PYTHONHASHSEED": seed},
)
return {ext: (outdir / f"{schema}.{ext}").read_bytes() for ext in _EXTS}


class GeneratorReproducibilityTest(unittest.TestCase):
def test_codegen_is_hash_seed_independent(self) -> None:
for schema in _SCHEMATA:
with self.subTest(schema=schema), tempfile.TemporaryDirectory() as tmp:
outputs = [
_run_generator(schema, s, Path(tmp) / f"seed{s}") for s in _SEEDS
]
for ext in _EXTS:
self.assertEqual(
{o[ext] for o in outputs},
{outputs[0][ext]},
f"{schema}.{ext} varies across PYTHONHASHSEED values",
)


if __name__ == "__main__":
unittest.main()
Loading