Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions go/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,6 @@ require (
gopkg.in/yaml.v3 v3.0.1 // indirect
gotest.tools/gotestsum v1.8.2 // indirect
)

// Use spiceai fork that adds Spark:DataType:SqlName metadata fix when server omits ArrowSchema bytes
replace github.com/databricks/databricks-sql-go => github.com/spiceai/databricks-sql-go v0.0.0-20260314093348-8baf54cdcd47
4 changes: 2 additions & 2 deletions go/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@ github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XL
github.com/coreos/go-oidc/v3 v3.5.0 h1:VxKtbccHZxs8juq7RdJntSqtXFtde9YpNpGn0yqgEHw=
github.com/coreos/go-oidc/v3 v3.5.0/go.mod h1:ecXRtV4romGPeO6ieExAsUK9cb/3fp9hXNz1tlv8PIM=
github.com/coreos/go-systemd/v22 v22.3.3-0.20220203105225-a9a7ef127534/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/databricks/databricks-sql-go v1.9.0 h1:h5w5E3FDMFXHqV7d5w5q3HCq1MVQswjSQfGx+43ThcI=
github.com/databricks/databricks-sql-go v1.9.0/go.mod h1:TGAVzvXadeKI8me3nKBa/2phLNnyWR6OolYq6iYbN3E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand Down Expand Up @@ -115,6 +113,8 @@ github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
github.com/rs/zerolog v1.28.0 h1:MirSo27VyNi7RJYP3078AA1+Cyzd2GB66qy3aUHvsWY=
github.com/rs/zerolog v1.28.0/go.mod h1:NILgTygv/Uej1ra5XxGf82ZFSLk58MFGAUS2o6usyD0=
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spiceai/databricks-sql-go v0.0.0-20260314093348-8baf54cdcd47 h1:cWKWiSSm5gZL7+mzFtcx81OUpDLlImuYsXinCRNKdvI=
github.com/spiceai/databricks-sql-go v0.0.0-20260314093348-8baf54cdcd47/go.mod h1:TGAVzvXadeKI8me3nKBa/2phLNnyWR6OolYq6iYbN3E=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
Expand Down
58 changes: 58 additions & 0 deletions go/ipc_reader_adapter.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,11 @@ func newIPCReaderAdapter(ctx context.Context, rows driver.Rows) (array.RecordRea
reader.Release()
}

// The server non-deterministically omits Spark:DataType:SqlName
// metadata from the IPC schema. Ensure all fields have metadata
// using driver.Rows column type info (always available via thrift).
adapter.schema = ensureSchemaMetadata(adapter.schema, rows)

if adapter.schema == nil {
return nil, adbc.Error{
Code: adbc.StatusInternal,
Expand Down Expand Up @@ -165,6 +170,11 @@ func schemaFromRowsMetadata(rows driver.Rows) (*arrow.Schema, error) {
Name: name,
Type: databricksTypeToArrow(dbType),
Nullable: nullable,
// Spark:DataType:SqlName metadata for consistency with the
// server-provided schema and the databricks-sql-go logic.
Metadata: arrow.MetadataFrom(map[string]string{
"Spark:DataType:SqlName": sparkSqlNameFromDBType(dbType),
}),
}
}
return arrow.NewSchema(fields, nil), nil
Expand Down Expand Up @@ -202,6 +212,54 @@ func databricksTypeToArrow(dbType string) arrow.DataType {
}
}

// sparkSqlNameFromDBType converts a database type name to a Spark SQL type name
// suitable for Spark:DataType:SqlName metadata. For DECIMAL, the precision and
// scale (38,18) are a default placeholder because ColumnTypeDatabaseTypeName
// returns just "DECIMAL" with no way to determine the actual precision or scale.
// Consumers that convert Utf8 to Decimal infer the actual precision and scale
// from the data values at query time.
func sparkSqlNameFromDBType(dbType string) string {
upper := strings.ToUpper(dbType)
if upper == "DECIMAL" {
return "DECIMAL(38,18)"
}
return upper
}

// ensureSchemaMetadata adds Spark:DataType:SqlName metadata to schema fields
// that are missing it. The Databricks server non-deterministically omits
// metadata from the Arrow IPC schema. We use driver.Rows column type info
// (always available via thrift) to fill in the gaps.
func ensureSchemaMetadata(schema *arrow.Schema, rows driver.Rows) *arrow.Schema {
typed, ok := rows.(driver.RowsColumnTypeDatabaseTypeName)
if !ok {
return schema
}

fields := schema.Fields()
changed := false
newFields := make([]arrow.Field, len(fields))

for i, f := range fields {
newFields[i] = f
if _, ok := f.Metadata.GetValue("Spark:DataType:SqlName"); ok {
continue
}
if dbType := typed.ColumnTypeDatabaseTypeName(i); dbType != "" {
newFields[i].Metadata = arrow.MetadataFrom(map[string]string{
"Spark:DataType:SqlName": sparkSqlNameFromDBType(dbType),
})
changed = true
}
}

if !changed {
return schema
}
md := schema.Metadata()
return arrow.NewSchema(newFields, &md)
}

func (r *ipcReaderAdapter) loadNextReader() error {
if r.currentReader != nil {
r.currentReader.Release()
Expand Down