-
Notifications
You must be signed in to change notification settings - Fork 1.2k
[VARIANT] Add support for the json_to_variant API #7783
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
87b438d
a946ac6
bca3b81
339e880
fe798c3
882d3a7
3c18fdf
67a83fe
c9aa519
dede88d
fa3befc
38bac59
cd530ee
57b3eb0
71b7d6f
031c916
d4fc876
c41af4e
ecaf557
4abc598
0842ef8
94531af
28d0012
e2788f5
3178449
0455685
cc0b66e
d2a7516
a29b5c3
7f23cf5
50f4b25
560e430
3249d93
07d5688
af937ac
388f188
7407776
3b42d91
43d6ea5
e9deda9
eb11890
3531540
ea5b573
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,20 +19,26 @@ | |
|
|
||
| use parquet_variant::{ | ||
| json_to_variant, variant_to_json, variant_to_json_string, variant_to_json_value, | ||
| SampleBoxBasedVariantBufferManager, SampleVecBasedVariantBufferManager, VariantBufferManager, | ||
| SampleVecBasedVariantBufferManager, | ||
| }; | ||
|
|
||
| fn from_json_example<T: VariantBufferManager>( | ||
| variant_buffer_manager: &mut T, | ||
| ) -> Result<(), Box<dyn std::error::Error>> { | ||
| fn main() -> Result<(), Box<dyn std::error::Error>> { | ||
| // The caller must provide an object implementing the `VariantBufferManager` trait to the library. | ||
| // This allows the library to write the constructed variant to buffers provided by the caller. | ||
| // This way, the caller has direct control over the output buffers. | ||
| let mut variant_buffer_manager = SampleVecBasedVariantBufferManager { | ||
| value_buffer: vec![0u8; 1], | ||
| metadata_buffer: vec![0u8; 1], | ||
| }; | ||
|
|
||
| let person_string = "{\"name\":\"Alice\", \"age\":30, ".to_string() | ||
| + "\"email\":\"[email protected]\", \"is_active\": true, \"score\": 95.7," | ||
| + "\"additional_info\": null}"; | ||
| let (metadata_size, value_size) = json_to_variant(&person_string, variant_buffer_manager)?; | ||
| let (metadata_size, value_size) = json_to_variant(&person_string, &mut variant_buffer_manager)?; | ||
|
|
||
| let variant = parquet_variant::Variant::try_new( | ||
| &variant_buffer_manager.get_immutable_metadata_buffer()[..metadata_size], | ||
| &variant_buffer_manager.get_immutable_value_buffer()[..value_size], | ||
| &variant_buffer_manager.metadata_buffer[..metadata_size], | ||
| &variant_buffer_manager.value_buffer[..value_size], | ||
| )?; | ||
|
|
||
| let json_string = variant_to_json_string(&variant)?; | ||
|
|
@@ -50,22 +56,3 @@ fn from_json_example<T: VariantBufferManager>( | |
|
|
||
| Ok(()) | ||
| } | ||
|
|
||
| fn main() -> Result<(), Box<dyn std::error::Error>> { | ||
| // The caller must provide an object implementing the `VariantBufferManager` trait to the library. | ||
| // This allows the library to write the constructed variant to buffers provided by the caller. | ||
| // This way, the caller has direct control over the output buffers. | ||
| let mut box_based_buffer_manager = SampleBoxBasedVariantBufferManager { | ||
| value_buffer: vec![0u8; 1].into_boxed_slice(), | ||
| metadata_buffer: vec![0u8; 1].into_boxed_slice(), | ||
| }; | ||
|
|
||
| let mut vec_based_buffer_manager = SampleVecBasedVariantBufferManager { | ||
| value_buffer: vec![0u8; 1], | ||
| metadata_buffer: vec![0u8; 1], | ||
| }; | ||
|
|
||
| from_json_example(&mut box_based_buffer_manager)?; | ||
| from_json_example(&mut vec_based_buffer_manager)?; | ||
| Ok(()) | ||
| } | ||
|
alamb marked this conversation as resolved.
|
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,14 +1,16 @@ | ||||||||||||||
| pub use crate::variant::{VariantDecimal4, VariantDecimal8}; | ||||||||||||||
| use crate::variant_buffer_manager::VariantBufferManager; | ||||||||||||||
| use crate::{ListBuilder, ObjectBuilder, Variant, VariantBuilder}; | ||||||||||||||
| use crate::{AppendVariantHelper, ListBuilder, ObjectBuilder, Variant, VariantBuilder}; | ||||||||||||||
| use arrow_schema::ArrowError; | ||||||||||||||
| use serde_json::{Map, Value}; | ||||||||||||||
| use rust_decimal::prelude::*; | ||||||||||||||
| use serde_json::{Map, Number, Value}; | ||||||||||||||
|
|
||||||||||||||
| /// Eventually, internal writes should also be performed using VariantBufferManager instead of | ||||||||||||||
| /// ValueBuffer and MetadataBuffer so the caller has control of the memory. | ||||||||||||||
| /// Returns a pair <value_size, metadata_size> | ||||||||||||||
| pub fn json_to_variant<T: VariantBufferManager>( | ||||||||||||||
| pub fn json_to_variant( | ||||||||||||||
| json: &str, | ||||||||||||||
| variant_buffer_manager: &mut T, | ||||||||||||||
| variant_buffer_manager: &mut impl VariantBufferManager, | ||||||||||||||
| ) -> Result<(usize, usize), ArrowError> { | ||||||||||||||
| let mut builder = VariantBuilder::new(); | ||||||||||||||
| let json: Value = serde_json::from_str(json) | ||||||||||||||
|
|
@@ -21,32 +23,81 @@ pub fn json_to_variant<T: VariantBufferManager>( | |||||||||||||
|
|
||||||||||||||
| // Write to caller's buffers - Remove this when the library internally writes to the caller's | ||||||||||||||
| // buffers anyway | ||||||||||||||
| variant_buffer_manager.ensure_metadata_buffer_size(metadata_size)?; | ||||||||||||||
| variant_buffer_manager.ensure_value_buffer_size(value_size)?; | ||||||||||||||
|
|
||||||||||||||
| let caller_metadata_buffer = variant_buffer_manager.borrow_metadata_buffer(); | ||||||||||||||
| let caller_metadata_buffer = | ||||||||||||||
| variant_buffer_manager.ensure_size_and_borrow_metadata_buffer(metadata_size)?; | ||||||||||||||
| caller_metadata_buffer[..metadata_size].copy_from_slice(metadata.as_slice()); | ||||||||||||||
| let caller_value_buffer = variant_buffer_manager.borrow_value_buffer(); | ||||||||||||||
| let caller_value_buffer = | ||||||||||||||
| variant_buffer_manager.ensure_size_and_borrow_value_buffer(value_size)?; | ||||||||||||||
| caller_value_buffer[..value_size].copy_from_slice(value.as_slice()); | ||||||||||||||
| Ok((metadata_size, value_size)) | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| fn build_json(json: &Value, builder: &mut VariantBuilder) -> Result<(), ArrowError> { | ||||||||||||||
| append_json(json, builder)?; | ||||||||||||||
| Ok(()) | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| fn variant_from_number<'a, 'b>(n: &Number) -> Result<Variant<'a, 'b>, ArrowError> { | ||||||||||||||
|
harshmotw-db marked this conversation as resolved.
Outdated
|
||||||||||||||
| if let Some(i) = n.as_i64() { | ||||||||||||||
| // Find minimum Integer width to fit | ||||||||||||||
| if i as i8 as i64 == i { | ||||||||||||||
| Ok((i as i8).into()) | ||||||||||||||
| } else if i as i16 as i64 == i { | ||||||||||||||
| Ok((i as i16).into()) | ||||||||||||||
| } else if i as i32 as i64 == i { | ||||||||||||||
| Ok((i as i32).into()) | ||||||||||||||
| } else { | ||||||||||||||
| Ok(i.into()) | ||||||||||||||
| } | ||||||||||||||
| } else { | ||||||||||||||
| // Try decimal | ||||||||||||||
| // TODO: Replace with custom decimal parsing as the rust_decimal library only supports | ||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should look at what arrow-json does for this |
||||||||||||||
| // a max unscaled value of 2^96. | ||||||||||||||
| match Decimal::from_str_exact(n.as_str()) { | ||||||||||||||
| Ok(dec) => { | ||||||||||||||
| let unscaled: i128 = dec.mantissa(); | ||||||||||||||
| let scale = dec.scale() as u8; | ||||||||||||||
| if unscaled.abs() <= VariantDecimal4::MAX_UNSCALED_VALUE as i128 | ||||||||||||||
| && scale <= VariantDecimal4::MAX_PRECISION as u8 | ||||||||||||||
| { | ||||||||||||||
| (unscaled as i32, scale).try_into() | ||||||||||||||
| } else if unscaled.abs() <= VariantDecimal8::MAX_UNSCALED_VALUE as i128 | ||||||||||||||
| && scale <= VariantDecimal8::MAX_PRECISION as u8 | ||||||||||||||
| { | ||||||||||||||
| (unscaled as i64, scale).try_into() | ||||||||||||||
| } else { | ||||||||||||||
| (unscaled, scale).try_into() | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
| Err(_) => { | ||||||||||||||
| // Try double | ||||||||||||||
| match n.as_f64() { | ||||||||||||||
| Some(f) => return Ok(f.into()), | ||||||||||||||
| None => Err(ArrowError::InvalidArgumentError(format!( | ||||||||||||||
| "Failed to parse {} as number", | ||||||||||||||
| n.as_str() | ||||||||||||||
| ))), | ||||||||||||||
| }? | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| fn append_json(json: &Value, builder: &mut impl AppendVariantHelper) -> Result<(), ArrowError> { | ||||||||||||||
| match json { | ||||||||||||||
| Value::Null => builder.append_value(Variant::Null), | ||||||||||||||
| Value::Bool(b) => builder.append_value(*b), | ||||||||||||||
| Value::Null => builder.append_value_helper(Variant::Null), | ||||||||||||||
| Value::Bool(b) => builder.append_value_helper(*b), | ||||||||||||||
| Value::Number(n) => { | ||||||||||||||
| let v: Variant = n.try_into()?; | ||||||||||||||
| builder.append_value(v) | ||||||||||||||
| builder.append_value_helper(variant_from_number(n)?); | ||||||||||||||
| } | ||||||||||||||
|
harshmotw-db marked this conversation as resolved.
|
||||||||||||||
| Value::String(s) => builder.append_value(s.as_str()), | ||||||||||||||
| Value::String(s) => builder.append_value_helper(s.as_str()), | ||||||||||||||
| Value::Array(arr) => { | ||||||||||||||
| let mut list_builder = builder.new_list(); | ||||||||||||||
| let mut list_builder = builder.new_list_helper(); | ||||||||||||||
| build_list(arr, &mut list_builder)?; | ||||||||||||||
|
harshmotw-db marked this conversation as resolved.
Outdated
|
||||||||||||||
| list_builder.finish(); | ||||||||||||||
| } | ||||||||||||||
| Value::Object(obj) => { | ||||||||||||||
| let mut obj_builder = builder.new_object(); | ||||||||||||||
| let mut obj_builder = builder.new_object_helper(); | ||||||||||||||
| build_object(obj, &mut obj_builder)?; | ||||||||||||||
|
harshmotw-db marked this conversation as resolved.
Outdated
|
||||||||||||||
| obj_builder.finish(); | ||||||||||||||
| } | ||||||||||||||
|
|
@@ -56,22 +107,7 @@ fn build_json(json: &Value, builder: &mut VariantBuilder) -> Result<(), ArrowErr | |||||||||||||
|
|
||||||||||||||
| fn build_list(arr: &[Value], builder: &mut ListBuilder) -> Result<(), ArrowError> { | ||||||||||||||
| for val in arr { | ||||||||||||||
| match val { | ||||||||||||||
| Value::Null => builder.append_value(Variant::Null), | ||||||||||||||
| Value::Bool(b) => builder.append_value(*b), | ||||||||||||||
| Value::Number(n) => builder.append_value(Variant::try_from(n)?), | ||||||||||||||
| Value::String(s) => builder.append_value(s.as_str()), | ||||||||||||||
| Value::Array(arr) => { | ||||||||||||||
| let mut list_builder = builder.new_list(); | ||||||||||||||
| build_list(arr, &mut list_builder)?; | ||||||||||||||
| list_builder.finish() | ||||||||||||||
| } | ||||||||||||||
| Value::Object(obj) => { | ||||||||||||||
| let mut obj_builder = builder.new_object(); | ||||||||||||||
| build_object(obj, &mut obj_builder)?; | ||||||||||||||
| obj_builder.finish(); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
| append_json(val, builder)?; | ||||||||||||||
| } | ||||||||||||||
| Ok(()) | ||||||||||||||
| } | ||||||||||||||
|
|
@@ -81,22 +117,27 @@ fn build_object<'a, 'b>( | |||||||||||||
| builder: &mut ObjectBuilder<'a, 'b>, | ||||||||||||||
| ) -> Result<(), ArrowError> { | ||||||||||||||
| for (key, value) in obj.iter() { | ||||||||||||||
| match value { | ||||||||||||||
| Value::Null => builder.insert(key, Variant::Null), | ||||||||||||||
| Value::Bool(b) => builder.insert(key, *b), | ||||||||||||||
| Value::Number(n) => builder.insert(key, Variant::try_from(n)?), | ||||||||||||||
| Value::String(s) => builder.insert(key, s.as_str()), | ||||||||||||||
| Value::Array(arr) => { | ||||||||||||||
| let mut list_builder = builder.new_list(key); | ||||||||||||||
| build_list(arr, &mut list_builder)?; | ||||||||||||||
| list_builder.finish() | ||||||||||||||
| } | ||||||||||||||
| Value::Object(obj) => { | ||||||||||||||
| let mut obj_builder = builder.new_object(key); | ||||||||||||||
| build_object(obj, &mut obj_builder)?; | ||||||||||||||
| obj_builder.finish(); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
| let mut field_builder = ObjectFieldBuilder { key, builder }; | ||||||||||||||
| append_json(value, &mut field_builder)?; | ||||||||||||||
| } | ||||||||||||||
| Ok(()) | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| struct ObjectFieldBuilder<'a, 'b, 'c> { | ||||||||||||||
| key: &'a str, | ||||||||||||||
| builder: &'b mut ObjectBuilder<'c, 'a>, | ||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is hard to interpret... can we use
Suggested change
(here,
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've changed it to 's, 'o and 'v where 's is the lifetime of the 's is the lifetime of the string, 'o is the lifetime of [ |
||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| impl AppendVariantHelper for ObjectFieldBuilder<'_, '_, '_> { | ||||||||||||||
| fn append_value_helper<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) { | ||||||||||||||
| self.builder.insert(self.key, value); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| fn new_list_helper(&mut self) -> ListBuilder { | ||||||||||||||
| self.builder.new_list(self.key) | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| fn new_object_helper(&mut self) -> ObjectBuilder { | ||||||||||||||
| self.builder.new_object(self.key) | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
Uh oh!
There was an error while loading. Please reload this page.