apache · caicancai · Jun 5, 2026 · Jun 5, 2026 · mihaibudiu · May 21, 2026
diff --git a/arrow/src/main/java/org/apache/calcite/adapter/arrow/AbstractArrowEnumerator.java b/arrow/src/main/java/org/apache/calcite/adapter/arrow/AbstractArrowEnumerator.java
@@ -16,6 +16,7 @@
  */
 package org.apache.calcite.adapter.arrow;
 
+import org.apache.calcite.avatica.util.ByteString;
 import org.apache.calcite.linq4j.Enumerator;
 import org.apache.calcite.util.ImmutableIntList;
 import org.apache.calcite.util.Util;
@@ -95,7 +96,11 @@ private static Object getValue(ValueVector vector, int index) {
           (ArrowType.Timestamp) vector.getField().getType();
       return toMillis(rawValue, tsType.getUnit());
     }
-    return vector.getObject(index);
+    final Object value = vector.getObject(index);
+    if (value instanceof byte[]) {
+      return new ByteString((byte[]) value);
+    }
+    return value;
   }
 
   /** Converts a raw timestamp value to milliseconds since epoch.

diff --git a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowDirectEnumerator.java b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowDirectEnumerator.java
@@ -26,6 +26,10 @@
 
 /**
  * Enumerator that reads projected Arrow value-vectors directly.
+ *
+ * <p>This path is used for identity projections that Gandiva cannot project
+ * through the existing {@code Projector} path, such as Arrow binary vectors.
+ * It is not a replacement for Gandiva expression evaluation.
  */
 class ArrowDirectEnumerator extends AbstractArrowEnumerator {
   private final Runnable onClose;

diff --git a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowEnumerable.java b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowEnumerable.java
@@ -56,6 +56,8 @@ class ArrowEnumerable extends AbstractEnumerable<Object> {
         return new ArrowFilterEnumerator(arrowFileReader, fields, filter,
             onClose);
       }
+      // No projector and no filter means the query is an identity projection
+      // that should read selected value-vectors directly.
       return new ArrowDirectEnumerator(arrowFileReader, fields, onClose);
     } catch (Exception e) {
       throw Util.toUnchecked(e);

diff --git a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowFieldTypeFactory.java b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowFieldTypeFactory.java
@@ -66,6 +66,12 @@ private static RelDataType of(Field field, JavaTypeFactory typeFactory) {
       return typeFactory.createSqlType(SqlTypeName.BOOLEAN);
     case Utf8:
       return typeFactory.createSqlType(SqlTypeName.VARCHAR);
+    case Binary:
+    case LargeBinary:
+      return typeFactory.createSqlType(SqlTypeName.VARBINARY);
+    case FixedSizeBinary:
+      return typeFactory.createSqlType(SqlTypeName.BINARY,
+          ((ArrowType.FixedSizeBinary) arrowType).getByteWidth());
     case FloatingPoint:
       FloatingPointPrecision precision = ((ArrowType.FloatingPoint) arrowType).getPrecision();
       switch (precision) {

diff --git a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowTable.java b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowTable.java
@@ -203,10 +203,10 @@ private static RelDataType deduceRowType(Schema schema,
   }
 
   private @Nullable Projector makeProjector(ImmutableIntList fields) {
-    if (containsListField(fields)) {
+    if (requiresDirectVectorProjection(fields)) {
       // Returning null selects ArrowEnumerable's direct vector-read path.
-      // Use that path for list fields because Gandiva does not support identity
-      // projection expressions over Arrow List vectors.
+      // Use that path because Gandiva does not support identity projection
+      // expressions over Arrow List and binary vectors.
       return null;
     }
 
@@ -223,11 +223,24 @@ private static RelDataType deduceRowType(Schema schema,
     }
   }
 
-  private boolean containsListField(ImmutableIntList fields) {
+  /** Returns whether selected fields should be projected by reading Arrow
+   * value-vectors directly rather than by creating a Gandiva projector.
+   *
+   * <p>CALCITE-7541 extends this direct projection path for Arrow binary vector
+   * families because Gandiva cannot project them through the existing identity
+   * projection path. Queries with filters still use Gandiva filters; this direct
+   * path only applies to no-filter projections.
+   */
+  private boolean requiresDirectVectorProjection(ImmutableIntList fields) {
     for (int fieldOrdinal : fields) {
-      if (schema.getFields().get(fieldOrdinal).getType().getTypeID()
-          == ArrowType.ArrowTypeID.List) {
+      switch (schema.getFields().get(fieldOrdinal).getType().getTypeID()) {
+      case List:
+      case Binary:
+      case LargeBinary:
+      case FixedSizeBinary:
         return true;
+      default:
+        break;
       }
     }
     return false;

diff --git a/arrow/src/test/java/org/apache/calcite/adapter/arrow/ArrowAdapterDataTypesTest.java b/arrow/src/test/java/org/apache/calcite/adapter/arrow/ArrowAdapterDataTypesTest.java
@@ -65,6 +65,10 @@ static void initializeArrowState(@TempDir Path sharedTempDir)
     ArrowDataTest arrowListDataGenerator = new ArrowDataTest();
     arrowListDataGenerator.writeArrowListData(listDataLocationFile);
 
+    File binaryDataLocationFile = arrowFilesDirectory.resolve("arrowbinary.arrow").toFile();
+    ArrowDataTest arrowBinaryDataGenerator = new ArrowDataTest();
+    arrowBinaryDataGenerator.writeArrowBinaryData(binaryDataLocationFile);
+
     arrow = ImmutableMap.of("model", modelFileTarget.toAbsolutePath().toString());
   }
 
@@ -82,6 +86,24 @@ static void initializeArrowState(@TempDir Path sharedTempDir)
         .explainContains(plan);
   }
 
+  /** Test case for
+   * <a href="https://issues.apache.org/jira/browse/CALCITE-7541">[CALCITE-7541]
+   * Support Binary Arrow types in Arrow adapter</a>. */
+  @Test void testBinaryProject() {
+    String sql = "select \"binaryField\", \"largeBinaryField\", \"fixedSizeBinaryField\" "
+        + "from arrowbinary";
+    String plan = "PLAN=ArrowToEnumerableConverter\n"
+        + "  ArrowTableScan(table=[[ARROW, ARROWBINARY]], fields=[[0, 1, 2]])\n\n";
+    String result = "binaryField=0001; largeBinaryField=0a0b; fixedSizeBinaryField=141516\n"
+        + "binaryField=null; largeBinaryField=null; fixedSizeBinaryField=null\n"
+        + "binaryField=020304; largeBinaryField=0c0d0e; fixedSizeBinaryField=171819\n";
+    CalciteAssert.that()
+        .with(arrow)
+        .query(sql)
+        .returns(result)
+        .explainContains(plan);
+  }
+
   @Test void testTinyIntProject() {
     String sql = "select \"tinyIntField\" from arrowdatatype";
     String plan = "PLAN=ArrowToEnumerableConverter\n"

diff --git a/arrow/src/test/java/org/apache/calcite/adapter/arrow/ArrowDataTest.java b/arrow/src/test/java/org/apache/calcite/adapter/arrow/ArrowDataTest.java
@@ -29,16 +29,19 @@
 import org.apache.arrow.vector.DateDayVector;
 import org.apache.arrow.vector.DecimalVector;
 import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
 import org.apache.arrow.vector.Float8Vector;
 import org.apache.arrow.vector.FloatingPointVector;
 import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.LargeVarBinaryVector;
 import org.apache.arrow.vector.SmallIntVector;
 import org.apache.arrow.vector.TimeSecVector;
 import org.apache.arrow.vector.TimeStampMicroVector;
 import org.apache.arrow.vector.TimeStampMilliVector;
 import org.apache.arrow.vector.TimeStampNanoVector;
 import org.apache.arrow.vector.TimeStampSecVector;
 import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.VarBinaryVector;
 import org.apache.arrow.vector.VarCharVector;
 import org.apache.arrow.vector.VectorSchemaRoot;
 import org.apache.arrow.vector.complex.ListVector;
@@ -179,6 +182,19 @@ private Schema makeArrowSchema() {
     return new Schema(childrenBuilder.build(), null);
   }
 
+  private Schema makeArrowBinarySchema() {
+    ImmutableList.Builder<Field> childrenBuilder = ImmutableList.builder();
+    FieldType binaryType = FieldType.nullable(new ArrowType.Binary());
+    FieldType largeBinaryType = FieldType.nullable(new ArrowType.LargeBinary());
+    FieldType fixedSizeBinaryType =
+        FieldType.nullable(new ArrowType.FixedSizeBinary(3));
+
+    childrenBuilder.add(new Field("binaryField", binaryType, null));
+    childrenBuilder.add(new Field("largeBinaryField", largeBinaryType, null));
+    childrenBuilder.add(new Field("fixedSizeBinaryField", fixedSizeBinaryType, null));
+
+    return new Schema(childrenBuilder.build(), null);
+  }
 
   public void writeScottEmpData(Path arrowDataDirectory) throws IOException, SQLException {
     List<String> tableNames = ImmutableList.of("EMP", "DEPT", "SALGRADE");
@@ -265,6 +281,26 @@ public void writeArrowData(File file) throws IOException {
     fileOutputStream.close();
   }
 
+  public void writeArrowBinaryData(File file) throws IOException {
+    FileOutputStream fileOutputStream = new FileOutputStream(file);
+    Schema arrowSchema = makeArrowBinarySchema();
+    VectorSchemaRoot vectorSchemaRoot =
+        VectorSchemaRoot.create(arrowSchema, new RootAllocator(Integer.MAX_VALUE));
+    ArrowFileWriter arrowFileWriter =
+        new ArrowFileWriter(vectorSchemaRoot, null, fileOutputStream.getChannel());
+
+    arrowFileWriter.start();
+    vectorSchemaRoot.setRowCount(3);
+    binaryField(vectorSchemaRoot.getVector("binaryField"));
+    largeBinaryField(vectorSchemaRoot.getVector("largeBinaryField"));
+    fixedSizeBinaryField(vectorSchemaRoot.getVector("fixedSizeBinaryField"));
+    arrowFileWriter.writeBatch();
+    arrowFileWriter.end();
+    arrowFileWriter.close();
+    fileOutputStream.flush();
+    fileOutputStream.close();
+  }
+
   public void writeArrowDataType(File file) throws IOException {
     FileOutputStream fileOutputStream = new FileOutputStream(file);
     Schema arrowSchema = makeArrowDateTypeSchema();
@@ -342,7 +378,6 @@ public void writeArrowDataType(File file) throws IOException {
     fileOutputStream.close();
   }
 
-
   public void writeArrowListData(File file) throws IOException {
     Schema arrowSchema = makeArrowListSchema();
     try (RootAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
@@ -361,6 +396,36 @@ public void writeArrowListData(File file) throws IOException {
     }
   }
 
+  private void binaryField(FieldVector fieldVector) {
+    VarBinaryVector binaryVector = (VarBinaryVector) fieldVector;
+    binaryVector.setInitialCapacity(3);
+    binaryVector.allocateNew();
+    binaryVector.setSafe(0, new byte[] {0, 1});
+    binaryVector.setNull(1);
+    binaryVector.setSafe(2, new byte[] {2, 3, 4});
+    fieldVector.setValueCount(3);
+  }
+
+  private void largeBinaryField(FieldVector fieldVector) {
+    LargeVarBinaryVector largeBinaryVector = (LargeVarBinaryVector) fieldVector;
+    largeBinaryVector.setInitialCapacity(3);
+    largeBinaryVector.allocateNew();
+    largeBinaryVector.setSafe(0, new byte[] {10, 11});
+    largeBinaryVector.setNull(1);
+    largeBinaryVector.setSafe(2, new byte[] {12, 13, 14});
+    fieldVector.setValueCount(3);
+  }
+
+  private void fixedSizeBinaryField(FieldVector fieldVector) {
+    FixedSizeBinaryVector fixedSizeBinaryVector = (FixedSizeBinaryVector) fieldVector;
+    fixedSizeBinaryVector.setInitialCapacity(3);
+    fixedSizeBinaryVector.allocateNew();
+    fixedSizeBinaryVector.setSafe(0, new byte[] {20, 21, 22});
+    fixedSizeBinaryVector.setNull(1);
+    fixedSizeBinaryVector.setSafe(2, new byte[] {23, 24, 25});
+    fieldVector.setValueCount(3);
+  }
+
   private void tinyIntField(FieldVector fieldVector, int rowCount) {
     TinyIntVector tinyIntVector = (TinyIntVector) fieldVector;
     tinyIntVector.setInitialCapacity(rowCount);