diff --git a/go/go.mod b/go/go.mod index c351887f..0197ccab 100644 --- a/go/go.mod +++ b/go/go.mod @@ -90,3 +90,6 @@ require ( gopkg.in/yaml.v3 v3.0.1 // indirect gotest.tools/gotestsum v1.8.2 // indirect ) + +// Use spiceai fork that adds Spark:DataType:SqlName metadata fix when server omits ArrowSchema bytes +replace github.com/databricks/databricks-sql-go => github.com/spiceai/databricks-sql-go v0.0.0-20260314093348-8baf54cdcd47 diff --git a/go/go.sum b/go/go.sum index 065bd902..a97d3ee6 100644 --- a/go/go.sum +++ b/go/go.sum @@ -24,8 +24,6 @@ github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XL github.com/coreos/go-oidc/v3 v3.5.0 h1:VxKtbccHZxs8juq7RdJntSqtXFtde9YpNpGn0yqgEHw= github.com/coreos/go-oidc/v3 v3.5.0/go.mod h1:ecXRtV4romGPeO6ieExAsUK9cb/3fp9hXNz1tlv8PIM= github.com/coreos/go-systemd/v22 v22.3.3-0.20220203105225-a9a7ef127534/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= -github.com/databricks/databricks-sql-go v1.9.0 h1:h5w5E3FDMFXHqV7d5w5q3HCq1MVQswjSQfGx+43ThcI= -github.com/databricks/databricks-sql-go v1.9.0/go.mod h1:TGAVzvXadeKI8me3nKBa/2phLNnyWR6OolYq6iYbN3E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -115,6 +113,8 @@ github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/rs/zerolog v1.28.0 h1:MirSo27VyNi7RJYP3078AA1+Cyzd2GB66qy3aUHvsWY= github.com/rs/zerolog v1.28.0/go.mod h1:NILgTygv/Uej1ra5XxGf82ZFSLk58MFGAUS2o6usyD0= github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= +github.com/spiceai/databricks-sql-go v0.0.0-20260314093348-8baf54cdcd47 h1:cWKWiSSm5gZL7+mzFtcx81OUpDLlImuYsXinCRNKdvI= +github.com/spiceai/databricks-sql-go v0.0.0-20260314093348-8baf54cdcd47/go.mod h1:TGAVzvXadeKI8me3nKBa/2phLNnyWR6OolYq6iYbN3E= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= diff --git a/go/ipc_reader_adapter.go b/go/ipc_reader_adapter.go index 72f7fc99..63daafad 100644 --- a/go/ipc_reader_adapter.go +++ b/go/ipc_reader_adapter.go @@ -130,6 +130,11 @@ func newIPCReaderAdapter(ctx context.Context, rows driver.Rows) (array.RecordRea reader.Release() } + // The server non-deterministically omits Spark:DataType:SqlName + // metadata from the IPC schema. Ensure all fields have metadata + // using driver.Rows column type info (always available via thrift). + adapter.schema = ensureSchemaMetadata(adapter.schema, rows) + if adapter.schema == nil { return nil, adbc.Error{ Code: adbc.StatusInternal, @@ -165,6 +170,11 @@ func schemaFromRowsMetadata(rows driver.Rows) (*arrow.Schema, error) { Name: name, Type: databricksTypeToArrow(dbType), Nullable: nullable, + // Spark:DataType:SqlName metadata for consistency with the + // server-provided schema and the databricks-sql-go logic. + Metadata: arrow.MetadataFrom(map[string]string{ + "Spark:DataType:SqlName": sparkSqlNameFromDBType(dbType), + }), } } return arrow.NewSchema(fields, nil), nil @@ -202,6 +212,54 @@ func databricksTypeToArrow(dbType string) arrow.DataType { } } +// sparkSqlNameFromDBType converts a database type name to a Spark SQL type name +// suitable for Spark:DataType:SqlName metadata. For DECIMAL, the precision and +// scale (38,18) are a default placeholder because ColumnTypeDatabaseTypeName +// returns just "DECIMAL" with no way to determine the actual precision or scale. +// Consumers that convert Utf8 to Decimal infer the actual precision and scale +// from the data values at query time. +func sparkSqlNameFromDBType(dbType string) string { + upper := strings.ToUpper(dbType) + if upper == "DECIMAL" { + return "DECIMAL(38,18)" + } + return upper +} + +// ensureSchemaMetadata adds Spark:DataType:SqlName metadata to schema fields +// that are missing it. The Databricks server non-deterministically omits +// metadata from the Arrow IPC schema. We use driver.Rows column type info +// (always available via thrift) to fill in the gaps. +func ensureSchemaMetadata(schema *arrow.Schema, rows driver.Rows) *arrow.Schema { + typed, ok := rows.(driver.RowsColumnTypeDatabaseTypeName) + if !ok { + return schema + } + + fields := schema.Fields() + changed := false + newFields := make([]arrow.Field, len(fields)) + + for i, f := range fields { + newFields[i] = f + if _, ok := f.Metadata.GetValue("Spark:DataType:SqlName"); ok { + continue + } + if dbType := typed.ColumnTypeDatabaseTypeName(i); dbType != "" { + newFields[i].Metadata = arrow.MetadataFrom(map[string]string{ + "Spark:DataType:SqlName": sparkSqlNameFromDBType(dbType), + }) + changed = true + } + } + + if !changed { + return schema + } + md := schema.Metadata() + return arrow.NewSchema(newFields, &md) +} + func (r *ipcReaderAdapter) loadNextReader() error { if r.currentReader != nil { r.currentReader.Release()