Skip to content

Commit 9bb416b

Browse files
Append complex variants
1 parent 7b219f9 commit 9bb416b

File tree

2 files changed

+124
-10
lines changed

2 files changed

+124
-10
lines changed

parquet-variant/src/builder.rs

Lines changed: 123 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,10 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717
use crate::decoder::{VariantBasicType, VariantPrimitiveType};
18-
use crate::{ShortString, Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8};
18+
use crate::{
19+
ShortString, Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8, VariantList,
20+
VariantObject,
21+
};
1922
use arrow_schema::ArrowError;
2023
use indexmap::{IndexMap, IndexSet};
2124
use std::collections::HashSet;
@@ -192,8 +195,7 @@ impl ValueBuffer {
192195
self.0.len()
193196
}
194197

195-
fn append_non_nested_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) {
196-
let variant = value.into();
198+
fn append_variant<'m, 'd>(&mut self, variant: Variant<'m, 'd>) {
197199
match variant {
198200
Variant::Null => self.append_null(),
199201
Variant::BooleanTrue => self.append_bool(true),
@@ -213,14 +215,14 @@ impl ValueBuffer {
213215
Variant::Binary(v) => self.append_binary(v),
214216
Variant::String(s) => self.append_string(s),
215217
Variant::ShortString(s) => self.append_short_string(s),
216-
Variant::Object(_) | Variant::List(_) => {
217-
unreachable!(
218-
"Nested values are handled specially by ObjectBuilder and ListBuilder"
219-
);
220-
}
218+
_ => unreachable!("Objects and lists must be appended using VariantBuilder::append_object and VariantBuilder::append_list"),
221219
}
222220
}
223221

222+
fn append_non_nested_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) {
223+
self.append_variant(value.into());
224+
}
225+
224226
/// Writes out the header byte for a variant object or list
225227
fn append_header(&mut self, header_byte: u8, is_large: bool, num_items: usize) {
226228
let buf = self.inner_mut();
@@ -697,6 +699,71 @@ impl VariantBuilder {
697699
ObjectBuilder::new(parent_state, validate_unique_fields)
698700
}
699701

702+
/// Appends a [`VariantObject`] to the builder.
703+
fn append_object<'m, 'v>(&mut self, object: VariantObject<'m, 'v>) {
704+
let (parent_state, validate_unique_fields) = self.parent_state();
705+
706+
let mut obj_builder = ObjectBuilder::new(parent_state, validate_unique_fields);
707+
708+
for (field_name, variant) in object.iter() {
709+
obj_builder.insert(field_name, variant);
710+
}
711+
712+
obj_builder.finish().unwrap();
713+
}
714+
715+
/// Appends a [`VariantObject`] to the builder with full validation during iteration.
716+
///
717+
/// Recursively validates all nested variants in the object during iteration.
718+
fn try_append_object<'m, 'v>(
719+
&mut self,
720+
object: VariantObject<'m, 'v>,
721+
) -> Result<(), ArrowError> {
722+
let (parent_state, validate_unique_fields) = self.parent_state();
723+
724+
let mut obj_builder = ObjectBuilder::new(parent_state, validate_unique_fields);
725+
726+
for res in object.iter_try() {
727+
let (field_name, variant) = res?;
728+
729+
obj_builder.insert(field_name, variant);
730+
}
731+
732+
obj_builder.finish()?;
733+
734+
Ok(())
735+
}
736+
737+
/// Appends a [`VariantList`] to the builder.
738+
fn append_list<'m, 'v>(&mut self, list: VariantList<'m, 'v>) {
739+
let (parent_state, validate_unique_fields) = self.parent_state();
740+
741+
let mut list_builder = ListBuilder::new(parent_state, validate_unique_fields);
742+
743+
for variant in list.iter() {
744+
list_builder.append_value(variant);
745+
}
746+
747+
list_builder.finish();
748+
}
749+
750+
/// Appends a [`VariantList`] to the builder with full validation during iteration.
751+
///
752+
/// Recursively validates all nested variants in the list during iteration.
753+
fn try_append_list<'m, 'v>(&mut self, list: VariantList<'m, 'v>) -> Result<(), ArrowError> {
754+
let (parent_state, validate_unique_fields) = self.parent_state();
755+
756+
let mut list_builder = ListBuilder::new(parent_state, validate_unique_fields);
757+
758+
for variant in list.iter_try() {
759+
list_builder.append_value(variant?);
760+
}
761+
762+
list_builder.finish();
763+
764+
Ok(())
765+
}
766+
700767
/// Append a non-nested value to the builder.
701768
///
702769
/// # Example
@@ -707,7 +774,13 @@ impl VariantBuilder {
707774
/// builder.append_value(42i8);
708775
/// ```
709776
pub fn append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) {
710-
self.buffer.append_non_nested_value(value);
777+
let variant = value.into();
778+
779+
match variant {
780+
Variant::Object(obj) => self.append_object(obj),
781+
Variant::List(list) => self.append_list(list),
782+
primitive => self.buffer.append_variant(primitive),
783+
}
711784
}
712785

713786
/// Finish the builder and return the metadata and value buffers.
@@ -2170,4 +2243,45 @@ mod tests {
21702243
let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
21712244
assert_eq!(variant, Variant::Int8(2));
21722245
}
2246+
2247+
#[test]
2248+
fn test_append_object() {
2249+
let (m1, v1) = make_object();
2250+
let variant = Variant::new(&m1, &v1);
2251+
2252+
let mut builder = VariantBuilder::new();
2253+
builder.append_value(variant.clone());
2254+
let (metadata, value) = builder.finish();
2255+
assert_eq!(variant, Variant::new(&metadata, &value));
2256+
}
2257+
2258+
/// make an object variant
2259+
fn make_object() -> (Vec<u8>, Vec<u8>) {
2260+
let mut builder = VariantBuilder::new();
2261+
2262+
let mut obj = builder.new_object();
2263+
obj.insert("a", true);
2264+
obj.finish().unwrap();
2265+
builder.finish()
2266+
}
2267+
2268+
#[test]
2269+
fn test_append_list() {
2270+
let (m1, v1) = make_list();
2271+
let variant = Variant::new(&m1, &v1);
2272+
let mut builder = VariantBuilder::new();
2273+
builder.append_value(variant.clone());
2274+
let (metadata, value) = builder.finish();
2275+
assert_eq!(variant, Variant::new(&metadata, &value));
2276+
}
2277+
2278+
/// make a simple List variant
2279+
fn make_list() -> (Vec<u8>, Vec<u8>) {
2280+
let mut builder = VariantBuilder::new();
2281+
let mut list = builder.new_list();
2282+
list.append_value(1234);
2283+
list.append_value("a string value");
2284+
list.finish();
2285+
builder.finish()
2286+
}
21732287
}

parquet-variant/src/variant/metadata.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ impl VariantMetadataHeader {
127127
/// [Variant Spec]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#metadata-encoding
128128
#[derive(Debug, Clone, PartialEq)]
129129
pub struct VariantMetadata<'m> {
130-
bytes: &'m [u8],
130+
pub(crate) bytes: &'m [u8],
131131
header: VariantMetadataHeader,
132132
dictionary_size: u32,
133133
first_value_byte: u32,

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy