diff --git a/clients/src/main/java/org/apache/kafka/common/Uuid.java b/clients/src/main/java/org/apache/kafka/common/Uuid.java index 6f7f09537f178..5463f7e4a7fcf 100644 --- a/clients/src/main/java/org/apache/kafka/common/Uuid.java +++ b/clients/src/main/java/org/apache/kafka/common/Uuid.java @@ -70,12 +70,12 @@ private static Uuid unsafeRandomUuid() { /** * Static factory to retrieve a type 4 (pseudo randomly generated) UUID. - * - * This will not generate a UUID equal to 0, 1, or one whose string representation starts with a dash ("-") + *

+ * This will not generate a UUID equal to 0, 1, or one whose string representation contains a dash ("-"). */ public static Uuid randomUuid() { Uuid uuid = unsafeRandomUuid(); - while (RESERVED.contains(uuid) || uuid.toString().startsWith("-")) { + while (RESERVED.contains(uuid) || uuid.toString().contains("-")) { uuid = unsafeRandomUuid(); } return uuid; diff --git a/clients/src/test/java/org/apache/kafka/common/UuidTest.java b/clients/src/test/java/org/apache/kafka/common/UuidTest.java index 9acc8145be84a..8b440d48491ca 100644 --- a/clients/src/test/java/org/apache/kafka/common/UuidTest.java +++ b/clients/src/test/java/org/apache/kafka/common/UuidTest.java @@ -83,7 +83,7 @@ public void testRandomUuid() { assertNotEquals(Uuid.ZERO_UUID, randomID); assertNotEquals(Uuid.METADATA_TOPIC_ID, randomID); - assertFalse(randomID.toString().startsWith("-")); + assertFalse(randomID.toString().contains("-")); } @Test diff --git a/docs/design/protocol.md b/docs/design/protocol.md index 31fa16b2a6635..62e450cf2f838 100644 --- a/docs/design/protocol.md +++ b/docs/design/protocol.md @@ -215,3 +215,19 @@ Others have asked if maybe we shouldn't support many different protocols. Prior Another question is why we don't adopt XMPP, STOMP, AMQP or an existing protocol. The answer to this varies by protocol, but in general the problem is that the protocol does determine large parts of the implementation and we couldn't do what we are doing if we didn't have control over the protocol. Our belief is that it is possible to do better than existing messaging systems have in providing a truly distributed messaging system, and to do this we need to build something that works differently. A final question is why we don't use a system like Protocol Buffers or Thrift to define our request messages. These packages excel at helping you to managing lots and lots of serialized messages. However we have only a few messages. Support across languages is somewhat spotty (depending on the package). Finally the mapping between binary log format and wire protocol is something we manage somewhat carefully and this would not be possible with these systems. Finally we prefer the style of versioning APIs explicitly and checking this to inferring new values as nulls as it allows more nuanced control of compatibility. + +## Recommendations for 3rd‑party Clients: Member ID Format + +When a Kafka client participates in group protocols (e.g., `ConsumerGroupHeartbeat` RPC), it must generate a **member ID** to identify itself to the broker. While the protocol does not strictly enforce the format of this ID, we strongly recommend the following: + +1. **Use a base64‑encoded UUID** as the member ID. +2. **Encode the UUID using URL‑safe base64** (without `+` or `/` characters). +3. **Omit hyphens** — the resulting string should be a continuous sequence of alphanumeric characters (e.g., `abc123def456`). + +**Example** +A standard UUID (`00000000-0000-0000-0000-000000000000`) should be transformed into a URL‑safe base64 string like: `YzYxNjQ4OTItZDE1Mi00Y2E4LWIyNzUtYmIwMzAwMDAwMDAw` + +*(Note: This is illustrative; actual encoding depends on the UUID bytes.)* + +**Important** +While this is a strong recommendation, the protocol does **not** reject member IDs that deviate from this format.