diff --git a/protobuf/runtime/src/com/google/protobuf/Descriptors_PackagePrivate.h b/protobuf/runtime/src/com/google/protobuf/Descriptors_PackagePrivate.h index 5829afda29..f248e2554c 100644 --- a/protobuf/runtime/src/com/google/protobuf/Descriptors_PackagePrivate.h +++ b/protobuf/runtime/src/com/google/protobuf/Descriptors_PackagePrivate.h @@ -264,7 +264,43 @@ CGP_ALWAYS_INLINE BOOL CGPJavaTypeIsEnum(CGPFieldJavaType type) { return type == ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_ENUM; } +// Gets the integer value of an enum. +// +// This function is updated to support preservation of unknown enum values in proto3. +// +// How it works: +// - Valid enums are pointers to static singleton objects. Since objects are aligned +// to at least 8 bytes on 64-bit systems, the lowest bit of a valid pointer is always 0. +// - Unrecognized enum values are stored by shifting the raw 32-bit value into the +// upper 32 bits and setting the lowest bit to 1 (tagging it). +// Representation: (raw_value << 32) | 0x1 +// +// Why this is safe: +// 1. ARC (Automatic Reference Counting): The J2ObjC runtime does not retain/release +// enum fields (see CGPIsRetainedType in Descriptors.m returning NO for enums). +// Therefore, storing a non-pointer value in the id field will not cause ARC to +// attempt to dereference it and crash. +// 2. PAC (Pointer Authentication Codes): PAC uses the upper bits of pointers on +// ARM64e to store a signature. We do not touch the upper bits of VALID pointers. +// We only use the upper bits when the value is NOT a pointer (indicated by the +// lowest bit being 1). Since we don't try to authenticate this tagged value as a +// pointer, PAC is not triggered. +// +// Why this is correct: +// - In proto3, the 0 value is always defined (required by spec). Thus, an unknown +// value can never be 0. The shifted raw value in the upper 32 bits will therefore +// always be non-zero for unknown values, ensuring we can distinguish it from a +// valid pointer where the upper 32 bits are naturally zero. CGP_ALWAYS_INLINE jint CGPEnumGetIntValue(CGPEnumDescriptor *descriptor, id enumObj) { + uintptr_t stored_val = (uintptr_t)(ARCBRIDGE void *)enumObj; + + // Check the lowest bit. If it is 1, it is an unrecognized tagged value. + if (stored_val & 0x1) { + // Return the raw value stored in the upper 32 bits. + return (jint)(stored_val >> 32); + } + + // Otherwise, it is a valid pointer to a static enum singleton. return *(jint *)((char *)(ARCBRIDGE void *)enumObj + descriptor->valueOffset_); } diff --git a/protobuf/runtime/src/com/google/protobuf/GeneratedMessage.mm b/protobuf/runtime/src/com/google/protobuf/GeneratedMessage.mm index 0027d399ff..442593b655 100644 --- a/protobuf/runtime/src/com/google/protobuf/GeneratedMessage.mm +++ b/protobuf/runtime/src/com/google/protobuf/GeneratedMessage.mm @@ -374,6 +374,44 @@ static void SingularSetRetainable(id msg, TYPE_Retainable value, size_t offset, #undef REPEATED_GETTER_IMP +// Getter for singular enum fields. Intercepts tagged pointers for unknown values +// and returns the UNRECOGNIZED singleton instead. +static IMP GetSingularGetterImpEnum(size_t offset, CGPHasLocator hasLoc, id defaultValue, + CGPFieldDescriptor *field) { + CGPEnumDescriptor *enumType = [field getEnumType]; + id unrecognizedSingleton = + ((CGPEnumValueDescriptor *)enumType->values_->buffer_[enumType->values_->size_ - 1])->enum_; + + return imp_implementationWithBlock(^id(id msg) { + if (GetHas(msg, hasLoc)) { + id value = *FIELD_PTR(id, msg, offset); + uintptr_t stored_val = (uintptr_t)(__bridge void *)value; + if (stored_val & 0x1) { + return unrecognizedSingleton; + } + return value; + } + return defaultValue; + }); +} + +// Getter for repeated enum fields. Intercepts tagged pointers for unknown values +// and returns the UNRECOGNIZED singleton instead. +static IMP GetRepeatedGetterImpEnum(size_t offset, CGPFieldDescriptor *field) { + CGPEnumDescriptor *enumType = [field getEnumType]; + id unrecognizedSingleton = + ((CGPEnumValueDescriptor *)enumType->values_->buffer_[enumType->values_->size_ - 1])->enum_; + + return imp_implementationWithBlock(^id(id msg, jint idx) { + id value = CGPRepeatedFieldGetId(REPEATED_FIELD_PTR(msg, offset), idx); + uintptr_t stored_val = (uintptr_t)(__bridge void *)value; + if (stored_val & 0x1) { + return unrecognizedSingleton; + } + return value; + }); +} + static BOOL AddGetterMethod(Class cls, SEL sel, CGPFieldDescriptor *field) { BOOL repeated = CGPFieldIsRepeated(field); IMP imp = NULL; @@ -388,7 +426,30 @@ static BOOL AddGetterMethod(Class cls, SEL sel, CGPFieldDescriptor *field) { strcpy(encoding, @encode(TYPE_##NAME)); \ break; - SWITCH_TYPES_NO_ENUM(CGPFieldGetJavaType(field), ADD_GETTER_METHOD_CASE) + // We expand the switch manually instead of using SWITCH_TYPES_NO_ENUM + // to handle ENUM fields specially and avoid RETAIN_AND_AUTORELEASE on tagged pointers. + switch (CGPFieldGetJavaType(field)) { + case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_INT: + ADD_GETTER_METHOD_CASE(Int) + case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_LONG: + ADD_GETTER_METHOD_CASE(Long) + case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_FLOAT: + ADD_GETTER_METHOD_CASE(Float) + case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_DOUBLE: + ADD_GETTER_METHOD_CASE(Double) + case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_BOOLEAN: + ADD_GETTER_METHOD_CASE(Bool) + case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_STRING: + case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_BYTE_STRING: + case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_MESSAGE: + ADD_GETTER_METHOD_CASE(Id) + case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_ENUM: + imp = repeated ? GetRepeatedGetterImpEnum(offset, field) + : GetSingularGetterImpEnum(offset, hasLoc, field->data_->defaultValue.valueId, + field); + strcpy(encoding, @encode(id)); + break; + } #undef ADD_GETTER_METHOD_CASE @@ -1579,11 +1640,37 @@ static inline BOOL ReadEnumValueDescriptor(CGPCodedInputStream *input, CGPEnumDe return YES; } +// Reads an enum value from the stream and resolves it to a Java enum instance. +// +// This function is preserves unknown enum values in proto3 (open enums) +// by storing them as tagged integers instead of falling back to the UNRECOGNIZED +// singleton. +// +// See CGPEnumGetIntValue in Descriptors_PackagePrivate.h for details on the +// tagged representation and why it is safe with ARC and PAC. static BOOL ReadEnumJavaValue(CGPCodedInputStream *input, CGPEnumDescriptor *enumType, id *javaValue) { - CGPEnumValueDescriptor *valueDescriptor; - if (!ReadEnumValueDescriptor(input, enumType, &valueDescriptor)) return NO; - *javaValue = valueDescriptor == nil ? nil : valueDescriptor->enum_; + jint value; + if (!CGPReadEnum(input, &value)) return NO; + + CGPEnumValueDescriptor *valueDescriptor = CGPEnumValueDescriptorFromInt(enumType, value); + + if (valueDescriptor == nil) { + // Closed enum (proto2) and value was not found. We store nil. + *javaValue = nil; + } else if (!enumType->is_closed_ && + valueDescriptor == enumType->values_->buffer_[enumType->values_->size_ - 1]) { + // Open enum (proto3) and the value was not found, so CGPEnumValueDescriptorFromInt + // returned the UNRECOGNIZED descriptor (which is always the last element in values_). + // + // We store the raw value in the upper 32 bits and set the lowest bit to 1. + // This preserves the value for serialization while remaining safe from ARC. + *javaValue = (id)(ARCBRIDGE void *)(((uintptr_t)value << 32) | 0x1); + } else { + // Found a valid known descriptor. Store the singleton pointer. + *javaValue = valueDescriptor->enum_; + } + return YES; } diff --git a/protobuf/tests/Proto3EnumTest.java b/protobuf/tests/Proto3EnumTest.java index acae257070..66b6b58765 100644 --- a/protobuf/tests/Proto3EnumTest.java +++ b/protobuf/tests/Proto3EnumTest.java @@ -101,4 +101,42 @@ public void testNegativeEnumNumber() throws Exception { Text text = Text.parseFrom(new byte[] {0x08, 0x7f}, ExtensionRegistry.getEmptyRegistry()); assertThat(text.getGreeting()).isSameInstanceAs(Greetings.UNRECOGNIZED); } + + public void testSingularParseUnknownEnumSerialization() throws Exception { + // field 1 (fruit), value 5 (unrecognized) + // Tag: (1 << 3) | 0 = 8 + // Value: 5 + byte[] bytes = new byte[] {0x08, 0x05}; + FruitBox box = FruitBox.parseFrom(bytes, ExtensionRegistry.getEmptyRegistry()); + + byte[] outputBytes = box.toByteArray(); + assertThat(outputBytes).isEqualTo(bytes); + } + + public void testRepeatedParseUnknownEnumSerialization() throws Exception { + // field 2 (fruits), repeated, packed + // Tag: (2 << 3) | 2 = 18 + // Length: 3 + // Values: 1 (APPLE), 2 (BANANA), 5 (unrecognized) + byte[] bytes = new byte[] {0x12, 0x03, 0x01, 0x02, 0x05}; + FruitBox box = FruitBox.parseFrom(bytes, ExtensionRegistry.getEmptyRegistry()); + + byte[] outputBytes = box.toByteArray(); + assertThat(outputBytes).isEqualTo(bytes); + } + + public void testMapParseUnknownEnumSerialization() throws Exception { + // field 3 (fruit_map), map + // Tag: (3 << 3) | 2 = 26 + // Length: 4 + // Map Entry: + // key: field 1, value 1 -> 0x08 0x01 + // value: field 2, value 5 -> 0x10 0x05 + byte[] bytes = new byte[] {0x1a, 0x04, 0x08, 0x01, 0x10, 0x05}; + FruitBox box = FruitBox.parseFrom(bytes, ExtensionRegistry.getEmptyRegistry()); + + byte[] outputBytes = box.toByteArray(); + assertThat(outputBytes).isEqualTo(bytes); + } + } diff --git a/protobuf/tests/protos/proto3_enum.proto b/protobuf/tests/protos/proto3_enum.proto index 663c761f72..12a1f1d542 100644 --- a/protobuf/tests/protos/proto3_enum.proto +++ b/protobuf/tests/protos/proto3_enum.proto @@ -27,6 +27,8 @@ enum Fruit { message FruitBox { Fruit fruit = 1; + repeated Fruit fruits = 2; + map fruit_map = 3; } enum Greetings {