Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,15 @@ protected LogicalTypeAnnotation fromString(List<String> params) {
return listType();
}
},
VARIANT {
@Override
protected LogicalTypeAnnotation fromString(List<String> params) {
Preconditions.checkArgument(
params.isEmpty(), "Expecting 0 parameter for variant logical type, got %d", params.size());

return variantType();
Comment thread
aihuaxu marked this conversation as resolved.
Outdated
}
},
STRING {
@Override
protected LogicalTypeAnnotation fromString(List<String> params) {
Expand Down Expand Up @@ -269,6 +278,10 @@ public static ListLogicalTypeAnnotation listType() {
return ListLogicalTypeAnnotation.INSTANCE;
}

public static VariantLogicalTypeAnnotation variantType() {
return VariantLogicalTypeAnnotation.INSTANCE;
}

public static EnumLogicalTypeAnnotation enumType() {
return EnumLogicalTypeAnnotation.INSTANCE;
}
Expand Down Expand Up @@ -1128,6 +1141,28 @@ public int hashCode() {
}
}

public static class VariantLogicalTypeAnnotation extends LogicalTypeAnnotation {
private static final VariantLogicalTypeAnnotation INSTANCE = new VariantLogicalTypeAnnotation();

private VariantLogicalTypeAnnotation() {}

@Override
public OriginalType toOriginalType() {
// No OriginalType for Variant
return null;
}

@Override
public <T> Optional<T> accept(LogicalTypeAnnotationVisitor<T> logicalTypeAnnotationVisitor) {
return logicalTypeAnnotationVisitor.visit(this);
}

@Override
LogicalTypeToken getType() {
return LogicalTypeToken.VARIANT;
}
}

/**
* Implement this interface to visit a logical type annotation in the schema.
* The default implementation for each logical type specific visitor method is empty.
Expand All @@ -1152,6 +1187,10 @@ default Optional<T> visit(ListLogicalTypeAnnotation listLogicalType) {
return empty();
}

default Optional<T> visit(VariantLogicalTypeAnnotation variantLogicalType) {
return empty();
}

default Optional<T> visit(EnumLogicalTypeAnnotation enumLogicalType) {
return empty();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,23 @@ private static void addGroupType(Tokenizer st, Repetition r, GroupBuilder<?> bui
String name = st.nextToken();

// Read annotation, if any.
String annotation = null;
t = st.nextToken();
OriginalType originalType = null;
if (t.equalsIgnoreCase("(")) {
originalType = OriginalType.valueOf(st.nextToken());
childBuilder.as(originalType);
check(st.nextToken(), ")", "original type ended by )", st);
t = st.nextToken();
if (isLogicalType(t)) {
LogicalTypeAnnotation.LogicalTypeToken logicalType = LogicalTypeAnnotation.LogicalTypeToken.valueOf(t);
LogicalTypeAnnotation logicalTypeAnnotation = logicalType.fromString(new ArrayList<>());
childBuilder.as(logicalTypeAnnotation);
annotation = logicalTypeAnnotation.toString();
} else {
// Try to parse as OriginalType
OriginalType originalType = OriginalType.valueOf(t);
childBuilder.as(originalType);
annotation = originalType.toString();
}

check(st.nextToken(), ")", "logical type ended by )", st);
t = st.nextToken();
}
if (t.equals("=")) {
Expand All @@ -134,7 +145,7 @@ private static void addGroupType(Tokenizer st, Repetition r, GroupBuilder<?> bui
addGroupTypeFields(t, st, childBuilder);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(
"problem reading type: type = group, name = " + name + ", original type = " + originalType, e);
"problem reading type: type = group, name = " + name + ", annotation = " + annotation, e);
}

childBuilder.named(name);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import static org.junit.Assert.assertEquals;

import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.MessageTypeParser;
import org.apache.parquet.schema.OriginalType;
Expand Down Expand Up @@ -447,4 +448,30 @@ public void testEmbeddedAnnotations() {
MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
assertEquals(expected, reparsed);
}

@Test
public void testVARIANTAnnotation() {
String message = "message Message {\n"
+ " required group aVariant (VARIANT) {\n"
+ " required binary metadata;\n"
+ " required binary value;\n"
+ " }\n"
+ "}\n";

MessageType expected = buildMessage()
.requiredGroup()
.as(LogicalTypeAnnotation.variantType())
.required(BINARY)
.named("metadata")
.required(BINARY)
.named("value")
.named("aVariant")
.named("Message");

MessageType parsed = parseMessageType(message);

assertEquals(expected, parsed);
MessageType reparsed = parseMessageType(parsed.toString());
assertEquals(expected, reparsed);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
import static org.apache.parquet.schema.Type.Repetition.OPTIONAL;
import static org.apache.parquet.schema.Type.Repetition.REPEATED;
import static org.apache.parquet.schema.Type.Repetition.REQUIRED;
import static org.junit.Assert.assertEquals;

import java.util.ArrayList;
import java.util.List;
Expand Down Expand Up @@ -1414,6 +1415,50 @@ public void testTimestampLogicalTypeWithUTCParameter() {
Assert.assertEquals(nonUtcMicrosExpected, nonUtcMicrosActual);
}

@Test
public void testVariantLogicalType() {
Comment thread
rdblue marked this conversation as resolved.
String name = "variant_field";
GroupType variantExpected = new GroupType(
REQUIRED,
name,
LogicalTypeAnnotation.variantType(),
new PrimitiveType(REQUIRED, BINARY, "metadata"),
new PrimitiveType(REQUIRED, BINARY, "value"));

GroupType variantActual = Types.buildGroup(REQUIRED)
.addFields(
Types.required(BINARY).named("metadata"),
Types.required(BINARY).named("value"))
.as(LogicalTypeAnnotation.variantType())
.named(name);

assertEquals(variantExpected, variantActual);
}

@Test
public void testVariantLogicalTypeWithShredded() {
String name = "variant_field";
GroupType variantExpected = new GroupType(
REQUIRED,
name,
LogicalTypeAnnotation.variantType(),
new PrimitiveType(REQUIRED, BINARY, "metadata"),
new PrimitiveType(OPTIONAL, BINARY, "value"),
new PrimitiveType(OPTIONAL, BINARY, "typed_value", LogicalTypeAnnotation.stringType()));

GroupType variantActual = Types.buildGroup(REQUIRED)
.addFields(
Types.required(BINARY).named("metadata"),
Types.optional(BINARY).named("value"),
Types.optional(BINARY)
.as(LogicalTypeAnnotation.stringType())
.named("typed_value"))
.as(LogicalTypeAnnotation.variantType())
.named(name);

assertEquals(variantExpected, variantActual);
}

@Test(expected = IllegalArgumentException.class)
public void testDecimalLogicalTypeWithDeprecatedScaleMismatch() {
Types.required(BINARY)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96;
import static org.apache.parquet.schema.Type.Repetition.REQUIRED;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;

import java.util.concurrent.Callable;
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
Expand Down Expand Up @@ -473,6 +475,53 @@ public void testFloat16LogicalType() {
.toString());
}

@Test
public void testVariantLogicalType() {
String name = "variant_field";
GroupType variant = new GroupType(
REQUIRED,
name,
LogicalTypeAnnotation.variantType(),
Types.required(BINARY).named("metadata"),
Types.required(BINARY).named("value"));

assertEquals(
"required group variant_field (VARIANT) {\n"
+ " required binary metadata;\n"
+ " required binary value;\n"
+ "}",
variant.toString());

LogicalTypeAnnotation annotation = variant.getLogicalTypeAnnotation();
assertEquals(LogicalTypeAnnotation.LogicalTypeToken.VARIANT, annotation.getType());
assertNull(annotation.toOriginalType());
assertTrue(annotation instanceof LogicalTypeAnnotation.VariantLogicalTypeAnnotation);
}

@Test
public void testVariantLogicalTypeWithShredded() {
String name = "variant_field";
GroupType variant = new GroupType(
REQUIRED,
name,
LogicalTypeAnnotation.variantType(),
Types.required(BINARY).named("metadata"),
Types.optional(BINARY).named("value"),
Types.optional(BINARY).as(LogicalTypeAnnotation.stringType()).named("typed_value"));

assertEquals(
"required group variant_field (VARIANT) {\n"
+ " required binary metadata;\n"
+ " optional binary value;\n"
+ " optional binary typed_value (STRING);\n"
+ "}",
variant.toString());

LogicalTypeAnnotation annotation = variant.getLogicalTypeAnnotation();
assertEquals(LogicalTypeAnnotation.LogicalTypeToken.VARIANT, annotation.getType());
assertNull(annotation.toOriginalType());
assertTrue(annotation instanceof LogicalTypeAnnotation.VariantLogicalTypeAnnotation);
}
/**
* A convenience method to avoid a large number of @Test(expected=...) tests
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,11 @@ public static LogicalType DECIMAL(int scale, int precision) {
public static final LogicalType BSON = LogicalType.BSON(new BsonType());
public static final LogicalType FLOAT16 = LogicalType.FLOAT16(new Float16Type());
public static final LogicalType UUID = LogicalType.UUID(new UUIDType());
public static final LogicalType VARIANT = LogicalType.VARIANT(variant());

private static final VariantType variant() {
Comment thread
aihuaxu marked this conversation as resolved.
Outdated
VariantType type = new VariantType();
type.setSpecification_version((byte) 1);
return type;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.parquet.format;

import static org.junit.Assert.assertEquals;

import org.junit.Test;

public class TestLogicalTypes {
@Test
public void testVariantLogicalTypeVersion() {
LogicalType variant = LogicalTypes.VARIANT;
assertEquals(1, (variant.getVARIANT().getSpecification_version()));
Comment thread
aihuaxu marked this conversation as resolved.
Outdated
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
import org.apache.parquet.format.KeyValue;
import org.apache.parquet.format.LogicalType;
import org.apache.parquet.format.LogicalTypes;
import org.apache.parquet.format.MapType;
import org.apache.parquet.format.MicroSeconds;
import org.apache.parquet.format.MilliSeconds;
import org.apache.parquet.format.NanoSeconds;
Expand Down Expand Up @@ -514,6 +515,11 @@ public Optional<LogicalType> visit(LogicalTypeAnnotation.UnknownLogicalTypeAnnot
public Optional<LogicalType> visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation intervalLogicalType) {
return of(LogicalTypes.UNKNOWN);
}

@Override
public Optional<LogicalType> visit(LogicalTypeAnnotation.VariantLogicalTypeAnnotation variantLogicalType) {
return of(LogicalTypes.VARIANT);
}
}

private void addRowGroup(
Expand Down Expand Up @@ -1177,6 +1183,8 @@ LogicalTypeAnnotation getLogicalTypeAnnotation(LogicalType type) {
return LogicalTypeAnnotation.uuidType();
case FLOAT16:
return LogicalTypeAnnotation.float16Type();
case VARIANT:
return LogicalTypeAnnotation.variantType();
Comment thread
aihuaxu marked this conversation as resolved.
Outdated
default:
throw new RuntimeException("Unknown logical type " + type);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.uuidType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.variantType;
import static org.apache.parquet.schema.MessageTypeParser.parseMessageType;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
Expand Down Expand Up @@ -1589,6 +1590,24 @@ public void testMapConvertedTypeReadWrite() throws Exception {
verifyMapMessageType(messageType, "map");
}

@Test
public void testVariantLogicalType() {
Comment thread
aihuaxu marked this conversation as resolved.
MessageType expected = Types.buildMessage()
.requiredGroup()
.as(variantType())
.required(PrimitiveTypeName.BINARY)
.named("metadata")
.required(PrimitiveTypeName.BINARY)
.named("value")
.named("v")
.named("example");

ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter();
List<SchemaElement> parquetSchema = parquetMetadataConverter.toParquetSchema(expected);
MessageType schema = parquetMetadataConverter.fromParquetSchema(parquetSchema, null);
assertEquals(expected, schema);
}

private void verifyMapMessageType(final MessageType messageType, final String keyValueName) throws IOException {
Path file = new Path(temporaryFolder.newFolder("verifyMapMessageType").getPath(), keyValueName + ".parquet");

Expand Down