Skip to content
Merged
70 changes: 70 additions & 0 deletions internal/translate/databrickspyspark/resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@

import (
"fmt"
"math"
"strconv"
"strings"

"github.com/dacolabs/cli/internal/translate"
)
Expand Down Expand Up @@ -56,8 +58,76 @@
}

func (r *resolver) EnrichField(f *translate.Field) {
c := f.Constraints

switch f.Type {
case "T.DoubleType()":
if c.MultipleOf != nil {
if scale := computeDecimalScale(*c.MultipleOf); scale > 0 {
precision := computeDecimalPrecision(c.Maximum, scale)
f.Type = fmt.Sprintf("T.DecimalType(%d, %d)", precision, scale)
Comment thread
gummiorri marked this conversation as resolved.
Outdated
}
} else if c.Minimum != nil && c.Maximum != nil {
f.Type = inferNumberType(*c.Minimum, *c.Maximum)
}

case "T.LongType()":
if c.Minimum != nil && c.Maximum != nil {
f.Type = inferIntegerType(*c.Minimum, *c.Maximum)
}
}

if f.Description != "" {
escaped := strconv.Quote(f.Description)
f.Tag = `, metadata={"comment": ` + escaped + `}`
}
}

// computeDecimalPrecision derives precision from maximum if available.
// Returns 38 (Spark default) if maximum is nil.
func computeDecimalPrecision(maximum *float64, scale int) int {
if maximum == nil {
return 38
}
max := math.Abs(*maximum)

Check failure on line 92 in internal/translate/databrickspyspark/resolver.go

View workflow job for this annotation

GitHub Actions / Lint

builtinShadow: shadowing of predeclared identifier: max (gocritic)
if max < 1 {
return scale
}
intDigits := len(strconv.FormatFloat(math.Floor(max), 'f', 0, 64))
return intDigits + scale
}

// computeDecimalScale returns the number of decimal places in multipleOf.
// Returns -1 if multipleOf is >= 1 (not a decimal fraction).
func computeDecimalScale(multipleOf float64) int {
if multipleOf >= 1 || multipleOf <= 0 {
return -1
}
s := strconv.FormatFloat(multipleOf, 'f', -1, 64)
if i := strings.Index(s, "."); i >= 0 {
return len(s) - i - 1
}
return -1
}

// inferIntegerType returns a narrower integer type if min/max allow it.
func inferIntegerType(min, max float64) string {

Check failure on line 114 in internal/translate/databrickspyspark/resolver.go

View workflow job for this annotation

GitHub Actions / Lint

builtinShadow: shadowing of predeclared identifier: min (gocritic)
switch {
case min >= -128 && max <= 127:
return "T.ByteType()"
case min >= -32768 && max <= 32767:
return "T.ShortType()"
case min >= -2147483648 && max <= 2147483647:
return "T.IntegerType()"
default:
return "T.LongType()"
}
}

// inferNumberType returns FloatType if min/max fit in float32 bounds.
func inferNumberType(min, max float64) string {

Check failure on line 128 in internal/translate/databrickspyspark/resolver.go

View workflow job for this annotation

GitHub Actions / Lint

builtinShadow: shadowing of predeclared identifier: min (gocritic)
if min >= -math.MaxFloat32 && max <= math.MaxFloat32 {
return "T.FloatType()"
}
return "T.DoubleType()"
}
97 changes: 97 additions & 0 deletions internal/translate/databrickspyspark/translator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,103 @@ func TestTranslate_MetadataComment(t *testing.T) {
assert.NotContains(t, result, `"name", T.StringType(), nullable=True, metadata=`)
}

func TestTranslate_DecimalType(t *testing.T) {
multipleOf := 0.01
schema := &jsonschema.Schema{
Type: "object",
Properties: map[string]*jsonschema.Schema{
"price": {Type: "number", MultipleOf: &multipleOf},
},
}

translator := &Translator{}
output, err := translator.Translate("order", schema, "schemas")
require.NoError(t, err)
assert.Contains(t, string(output), "T.DecimalType(38, 2)")
}

func TestTranslate_DecimalTypeWithMaximum(t *testing.T) {
multipleOf := 0.01
max := 99999.99
schema := &jsonschema.Schema{
Type: "object",
Properties: map[string]*jsonschema.Schema{
"price": {Type: "number", MultipleOf: &multipleOf, Maximum: &max},
},
}

translator := &Translator{}
output, err := translator.Translate("order", schema, "schemas")
require.NoError(t, err)
assert.Contains(t, string(output), "T.DecimalType(7, 2)")
Comment thread
gummiorri marked this conversation as resolved.
}

func TestTranslate_FloatType(t *testing.T) {
min := -1000.0
max := 1000.0
schema := &jsonschema.Schema{
Type: "object",
Properties: map[string]*jsonschema.Schema{
"temperature": {Type: "number", Minimum: &min, Maximum: &max},
},
}

translator := &Translator{}
output, err := translator.Translate("sensor", schema, "schemas")
require.NoError(t, err)
assert.Contains(t, string(output), "T.FloatType()")
Comment thread
gummiorri marked this conversation as resolved.
Outdated
Comment thread
gummiorri marked this conversation as resolved.
Outdated
}

func TestTranslate_IntegerType(t *testing.T) {
min := 0.0
max := 2147483647.0
schema := &jsonschema.Schema{
Type: "object",
Properties: map[string]*jsonschema.Schema{
"count": {Type: "integer", Minimum: &min, Maximum: &max},
},
}

translator := &Translator{}
output, err := translator.Translate("data", schema, "schemas")
require.NoError(t, err)
assert.Contains(t, string(output), "T.IntegerType()")
}

func TestTranslate_ByteType(t *testing.T) {
min := 0.0
max := 127.0
schema := &jsonschema.Schema{
Type: "object",
Properties: map[string]*jsonschema.Schema{
"flags": {Type: "integer", Minimum: &min, Maximum: &max},
},
}

translator := &Translator{}
output, err := translator.Translate("data", schema, "schemas")
require.NoError(t, err)
assert.Contains(t, string(output), "T.ByteType()")
}

func TestTranslate_DecimalWithMetadata(t *testing.T) {
multipleOf := 0.01
schema := &jsonschema.Schema{
Type: "object",
Properties: map[string]*jsonschema.Schema{
"price": {Type: "number", MultipleOf: &multipleOf, Description: "Product price"},
},
}

translator := &Translator{}
output, err := translator.Translate("order", schema, "schemas")
require.NoError(t, err)

result := string(output)
assert.Contains(t, result, "T.DecimalType(38, 2)")
assert.Contains(t, result, `metadata={"comment": "Product price"}`)
}

func TestFileExtension(t *testing.T) {
translator := &Translator{}
assert.Equal(t, ".py", translator.FileExtension())
Expand Down
75 changes: 73 additions & 2 deletions internal/translate/pyspark/resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@

import (
"fmt"
"math"
"strconv"
"strings"

"github.com/dacolabs/cli/internal/translate"
)
Expand Down Expand Up @@ -53,6 +56,74 @@
return portName + "_schema"
}

func (r *resolver) EnrichField(_ *translate.Field) {
// PySpark needs no field enrichment.
func (r *resolver) EnrichField(f *translate.Field) {
c := f.Constraints

switch f.Type {
case "T.DoubleType()":
if c.MultipleOf != nil {
if scale := computeDecimalScale(*c.MultipleOf); scale > 0 {
precision := computeDecimalPrecision(c.Maximum, scale)
Comment thread
gummiorri marked this conversation as resolved.
Outdated
f.Type = fmt.Sprintf("T.DecimalType(%d, %d)", precision, scale)
return
}
}
if c.Minimum != nil && c.Maximum != nil {
f.Type = inferNumberType(*c.Minimum, *c.Maximum)
}

case "T.LongType()":
if c.Minimum != nil && c.Maximum != nil {
f.Type = inferIntegerType(*c.Minimum, *c.Maximum)
}
}
}

// computeDecimalPrecision derives precision from maximum if available.
// Returns 38 (Spark default) if maximum is nil.
func computeDecimalPrecision(maximum *float64, scale int) int {
if maximum == nil {
return 38
}
max := math.Abs(*maximum)

Check failure on line 88 in internal/translate/pyspark/resolver.go

View workflow job for this annotation

GitHub Actions / Lint

builtinShadow: shadowing of predeclared identifier: max (gocritic)
if max < 1 {
return scale
}
intDigits := len(strconv.FormatFloat(math.Floor(max), 'f', 0, 64))
return intDigits + scale
}

// computeDecimalScale returns the number of decimal places in multipleOf.
// Returns -1 if multipleOf is >= 1 (not a decimal fraction).
func computeDecimalScale(multipleOf float64) int {
if multipleOf >= 1 || multipleOf <= 0 {
return -1
}
s := strconv.FormatFloat(multipleOf, 'f', -1, 64)
if i := strings.Index(s, "."); i >= 0 {
return len(s) - i - 1
}
return -1
}

// inferIntegerType returns a narrower integer type if min/max allow it.
func inferIntegerType(min, max float64) string {

Check failure on line 110 in internal/translate/pyspark/resolver.go

View workflow job for this annotation

GitHub Actions / Lint

builtinShadow: shadowing of predeclared identifier: min (gocritic)
switch {
case min >= -128 && max <= 127:
return "T.ByteType()"
case min >= -32768 && max <= 32767:
return "T.ShortType()"
case min >= -2147483648 && max <= 2147483647:
return "T.IntegerType()"
default:
return "T.LongType()"
}
}

// inferNumberType returns FloatType if min/max fit in float32 bounds.
func inferNumberType(min, max float64) string {
if min >= -math.MaxFloat32 && max <= math.MaxFloat32 {
return "T.FloatType()"
}
return "T.DoubleType()"
}
Loading
Loading