Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
87b438d
add json_to_variant
harshmotw-db Jun 25, 2025
a946ac6
comment fix
harshmotw-db Jun 25, 2025
bca3b81
Added another sample buffer managger
harshmotw-db Jun 25, 2025
339e880
minor refactoring
harshmotw-db Jun 25, 2025
fe798c3
Merge branch 'main' of https://github.com/harshmotw-db/arrow-rs into …
harshmotw-db Jun 25, 2025
882d3a7
Merge branch 'main' of https://github.com/harshmotw-db/arrow-rs into …
harshmotw-db Jun 25, 2025
3c18fdf
incorporated new changes
harshmotw-db Jun 25, 2025
67a83fe
minor changes
harshmotw-db Jun 26, 2025
c9aa519
Merge branch 'main' of https://github.com/harshmotw-db/arrow-rs into …
harshmotw-db Jun 26, 2025
dede88d
test fix based on recent commit
harshmotw-db Jun 26, 2025
fa3befc
constant fix
harshmotw-db Jun 26, 2025
38bac59
fix
harshmotw-db Jun 26, 2025
cd530ee
addressed comments
harshmotw-db Jun 26, 2025
57b3eb0
fix
harshmotw-db Jun 26, 2025
71b7d6f
fixed VariantBufferManager
harshmotw-db Jun 26, 2025
031c916
deduped a bit of code
harshmotw-db Jun 26, 2025
d4fc876
deduplicated code
harshmotw-db Jun 26, 2025
c41af4e
moved serde code out of variant.rs
harshmotw-db Jun 26, 2025
ecaf557
incorporated Ryan's latest comments
harshmotw-db Jun 26, 2025
4abc598
add more object tests
harshmotw-db Jun 26, 2025
0842ef8
fix
harshmotw-db Jun 26, 2025
94531af
doc fix
harshmotw-db Jun 26, 2025
28d0012
Merge branch 'main' into harsh-motwani_data/from_json
harshmotw-db Jun 27, 2025
e2788f5
Removed dependency on VariantBufferManager and leave that to later
harshmotw-db Jun 27, 2025
3178449
Merge branch 'harsh-motwani_data/from_json' of https://github.com/har…
harshmotw-db Jun 27, 2025
0455685
fix
harshmotw-db Jun 27, 2025
cc0b66e
fix
harshmotw-db Jun 30, 2025
d2a7516
resolved test comment
harshmotw-db Jun 30, 2025
a29b5c3
fixed more comments
harshmotw-db Jun 30, 2025
7f23cf5
fix decimal to string
harshmotw-db Jul 1, 2025
50f4b25
fmt and clippy
harshmotw-db Jul 1, 2025
560e430
fix
harshmotw-db Jul 1, 2025
3249d93
Merge remote-tracking branch 'apache/main' into harsh-motwani_data/fr…
alamb Jul 1, 2025
07d5688
clippy
alamb Jul 1, 2025
af937ac
Split tests into separate functions
alamb Jul 1, 2025
388f188
partially removed decimal dependency
harshmotw-db Jul 2, 2025
7407776
merge
harshmotw-db Jul 2, 2025
3b42d91
removed decimal type from variant json parsing
harshmotw-db Jul 2, 2025
43d6ea5
comment fix
harshmotw-db Jul 2, 2025
e9deda9
refined lifetimes
harshmotw-db Jul 3, 2025
eb11890
Fix clippy, remove unused dependency
alamb Jul 3, 2025
3531540
Merge remote-tracking branch 'apache/main' into harsh-motwani_data/fr…
alamb Jul 3, 2025
ea5b573
Update parquet-variant/src/from_json.rs
alamb Jul 3, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion parquet-variant/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ arrow-schema = { workspace = true }
chrono = { workspace = true }
# "arbitrary_precision" allows us to manually parse numbers. "preserve_order" does not automatically
# sort object keys
serde_json = { version = "1.0", features = ["arbitrary_precision", "preserve_order"] }
serde_json = { version = "1.0" }
rust_decimal = "1.37.2"
base64 = "0.22"

Expand Down
39 changes: 9 additions & 30 deletions parquet-variant/src/from_json.rs
Comment thread
alamb marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -89,36 +89,15 @@ fn variant_from_number<'a, 'b>(n: &Number) -> Result<Variant<'a, 'b>, ArrowError
Ok(i.into())
}
} else {
// Try decimal
// TODO: Replace with custom decimal parsing as the rust_decimal library only supports
// a max unscaled value of 2^96.
match Decimal::from_str_exact(n.as_str()) {
Ok(dec) => {
let unscaled: i128 = dec.mantissa();
let scale = dec.scale() as u8;
if unscaled.abs() <= VariantDecimal4::MAX_UNSCALED_VALUE as i128
&& scale <= VariantDecimal4::MAX_PRECISION as u8
{
(unscaled as i32, scale).try_into()
} else if unscaled.abs() <= VariantDecimal8::MAX_UNSCALED_VALUE as i128
&& scale <= VariantDecimal8::MAX_PRECISION as u8
{
(unscaled as i64, scale).try_into()
} else {
(unscaled, scale).try_into()
}
}
Err(_) => {
// Try double
match n.as_f64() {
Some(f) => return Ok(f.into()),
None => Err(ArrowError::InvalidArgumentError(format!(
"Failed to parse {} as number",
n.as_str()
))),
}?
}
}
// Todo: Try decimal once we implement custom JSON parsing where we have access to strings
// Try double - currently json_to_variant does not produce decimal
match n.as_f64() {
Some(f) => return Ok(f.into()),
None => Err(ArrowError::InvalidArgumentError(format!(
"Failed to parse {} as number",
n.to_string()
))),
}?
}
}

Expand Down
86 changes: 56 additions & 30 deletions parquet-variant/src/to_json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,10 @@
//! Module for converting Variant data to JSON format
use arrow_schema::ArrowError;
use base64::{engine::general_purpose, Engine as _};
use rust_decimal::prelude::*;
use serde_json::{Number, Value};
use serde_json::Value;
use std::io::Write;

use crate::{
variant::{Variant, VariantList, VariantObject},
VariantDecimal,
};
use crate::variant::{Variant, VariantList, VariantObject};

// Format string constants to avoid duplication and reduce errors
const DATE_FORMAT: &str = "%Y-%m-%d";
Expand Down Expand Up @@ -249,27 +245,6 @@ pub fn variant_to_json_string(variant: &Variant) -> Result<String, ArrowError> {
.map_err(|e| ArrowError::InvalidArgumentError(format!("UTF-8 conversion error: {e}")))
}

fn variant_decimal_to_json_value(decimal: &impl VariantDecimal) -> Result<Value, ArrowError> {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like a macro could avoid both overhead and duplication? The only part that needs to change (besides the data types) is Decimal16 has a more complicated way of handling the integer value produced at the end.

let integer = decimal.integer();
let scale = decimal.scale();
if scale == 0 {
return Ok(Value::from(integer));
}
let divisor = 10_i128.pow(scale);
if integer % divisor != 0 {
// try decimal
if let Ok(dec) = Decimal::try_from_i128_with_scale(integer, scale) {
if let Ok(value) = serde_json::from_str::<Number>(&dec.to_string()) {
return Ok(serde_json::Value::Number(value));
}
}
// fall back to floating point
return Ok(Value::from(integer as f64 / divisor as f64));
}
// integer perfect division
Ok(Value::from(integer / divisor))
}

/// Convert [`Variant`] to [`serde_json::Value`]
///
/// This function converts a Variant to a [`serde_json::Value`], which is useful
Expand Down Expand Up @@ -311,9 +286,60 @@ pub fn variant_to_json_value(variant: &Variant) -> Result<Value, ArrowError> {
Variant::Double(f) => serde_json::Number::from_f64(*f)
.map(Value::Number)
.ok_or_else(|| ArrowError::InvalidArgumentError("Invalid double value".to_string())),
Variant::Decimal4(decimal4) => variant_decimal_to_json_value(decimal4),
Variant::Decimal8(decimal8) => variant_decimal_to_json_value(decimal8),
Variant::Decimal16(decimal16) => variant_decimal_to_json_value(decimal16),
Variant::Decimal4(decimal4) => {
let scale = decimal4.scale();
let integer = decimal4.integer();

let integer = if scale == 0 {
integer
} else {
let divisor = 10_i32.pow(scale as u32);
if integer % divisor != 0 {
// fall back to floating point
return Ok(Value::from(integer as f64 / divisor as f64));
}
integer / divisor
};
Ok(Value::from(integer))
}
Variant::Decimal8(decimal8) => {
let scale = decimal8.scale();
let integer = decimal8.integer();

let integer = if scale == 0 {
integer
} else {
let divisor = 10_i64.pow(scale as u32);
if integer % divisor != 0 {
// fall back to floating point
return Ok(Value::from(integer as f64 / divisor as f64));
}
integer / divisor
};
Ok(Value::from(integer))
}
Variant::Decimal16(decimal16) => {
let scale = decimal16.scale();
let integer = decimal16.integer();

let integer = if scale == 0 {
integer
} else {
let divisor = 10_i128.pow(scale as u32);
if integer % divisor != 0 {
// fall back to floating point
return Ok(Value::from(integer as f64 / divisor as f64));
}
integer / divisor
};
// i128 has higher precision than any 64-bit type. Try a lossless narrowing cast to
// i64 or u64 first, falling back to a lossy narrowing cast to f64 if necessary.
let value = i64::try_from(integer)
.map(Value::from)
.or_else(|_| u64::try_from(integer).map(Value::from))
.unwrap_or_else(|_| Value::from(integer as f64));
Ok(value)
}
Variant::Date(date) => Ok(Value::String(format_date_string(date))),
Variant::TimestampMicros(ts) => Ok(Value::String(ts.to_rfc3339())),
Variant::TimestampNtzMicros(ts) => Ok(Value::String(format_timestamp_ntz_string(ts))),
Expand Down
2 changes: 1 addition & 1 deletion parquet-variant/src/variant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use std::ops::Deref;
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
pub use self::decimal::{VariantDecimal, VariantDecimal16, VariantDecimal4, VariantDecimal8};
pub use self::decimal::{VariantDecimal16, VariantDecimal4, VariantDecimal8};
pub use self::list::VariantList;
pub use self::metadata::VariantMetadata;
pub use self::object::VariantObject;
Expand Down
36 changes: 0 additions & 36 deletions parquet-variant/src/variant/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,6 @@ macro_rules! format_decimal {
}};
}

// Common trait to classify VariantDecimalXXX types
pub trait VariantDecimal {
fn integer(&self) -> i128;
fn scale(&self) -> u32;
}

/// Represents a 4-byte decimal value in the Variant format.
///
/// This struct stores a decimal number using a 32-bit signed integer for the coefficient
Expand Down Expand Up @@ -107,16 +101,6 @@ impl VariantDecimal4 {
}
}

impl VariantDecimal for VariantDecimal4 {
fn integer(&self) -> i128 {
self.integer() as i128
}

fn scale(&self) -> u32 {
self.scale() as u32
}
}

impl fmt::Display for VariantDecimal4 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
format_decimal!(f, self.integer, self.scale, i32)
Expand Down Expand Up @@ -185,16 +169,6 @@ impl VariantDecimal8 {
}
}

impl VariantDecimal for VariantDecimal8 {
fn integer(&self) -> i128 {
self.integer() as i128
}

fn scale(&self) -> u32 {
self.scale() as u32
}
}

impl fmt::Display for VariantDecimal8 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
format_decimal!(f, self.integer, self.scale, i64)
Expand Down Expand Up @@ -263,16 +237,6 @@ impl VariantDecimal16 {
}
}

impl VariantDecimal for VariantDecimal16 {
fn integer(&self) -> i128 {
self.integer()
}

fn scale(&self) -> u32 {
self.scale() as u32
}
}

impl fmt::Display for VariantDecimal16 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
format_decimal!(f, self.integer, self.scale, i128)
Expand Down