apache · ctsk · Sep 20, 2024 · Sep 20, 2024 · Jun 25, 2025 · Jun 25, 2025
diff --git a/arrow-array/benches/view_types.rs b/arrow-array/benches/view_types.rs
@@ -48,6 +48,12 @@ fn criterion_benchmark(c: &mut Criterion) {
             black_box(array.slice(0, 100_000 / 2));
         });
     });
+
+    c.bench_function("view types slice", |b| {
+        b.iter(|| {
+            black_box(array.slice(0, 100_000 / 2));
+        });
+    });
 }
 
 criterion_group!(benches, criterion_benchmark);

diff --git a/arrow-array/src/array/byte_view_array.rs b/arrow-array/src/array/byte_view_array.rs
@@ -20,7 +20,9 @@ use crate::builder::{ArrayBuilder, GenericByteViewBuilder};
 use crate::iterator::ArrayIter;
 use crate::types::bytes::ByteArrayNativeType;
 use crate::types::{BinaryViewType, ByteViewType, StringViewType};
-use crate::{Array, ArrayAccessor, ArrayRef, GenericByteArray, OffsetSizeTrait, Scalar};
+use crate::{
+    Array, ArrayAccessor, ArrayRef, GenericByteArray, OffsetSizeTrait, Scalar, ViewBuffers,
+};
 use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, ScalarBuffer};
 use arrow_data::{ArrayData, ArrayDataBuilder, ByteView, MAX_INLINE_VIEW_LEN};
 use arrow_schema::{ArrowError, DataType};
@@ -164,7 +166,7 @@ use super::ByteArrayType;
 pub struct GenericByteViewArray<T: ByteViewType + ?Sized> {
     data_type: DataType,
     views: ScalarBuffer<u128>,
-    buffers: Vec<Buffer>,
+    buffers: ViewBuffers,
     phantom: PhantomData<T>,
     nulls: Option<NullBuffer>,
 }
@@ -187,7 +189,11 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
     /// # Panics
     ///
     /// Panics if [`GenericByteViewArray::try_new`] returns an error
-    pub fn new(views: ScalarBuffer<u128>, buffers: Vec<Buffer>, nulls: Option<NullBuffer>) -> Self {
+    pub fn new(
+        views: ScalarBuffer<u128>,
+        buffers: impl Into<ViewBuffers>,
+        nulls: Option<NullBuffer>,
+    ) -> Self {
         Self::try_new(views, buffers, nulls).unwrap()
     }
 
@@ -199,9 +205,11 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
     /// * [ByteViewType::validate] fails
     pub fn try_new(
         views: ScalarBuffer<u128>,
-        buffers: Vec<Buffer>,
+        buffers: impl Into<ViewBuffers>,
         nulls: Option<NullBuffer>,
     ) -> Result<Self, ArrowError> {
+        let buffers = buffers.into();
+
         T::validate(&views, &buffers)?;
 
         if let Some(n) = nulls.as_ref() {
@@ -231,7 +239,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
     /// Safe if [`Self::try_new`] would not error
     pub unsafe fn new_unchecked(
         views: ScalarBuffer<u128>,
-        buffers: Vec<Buffer>,
+        buffers: impl Into<ViewBuffers>,
         nulls: Option<NullBuffer>,
     ) -> Self {
         if cfg!(feature = "force_validate") {
@@ -242,7 +250,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
             data_type: T::DATA_TYPE,
             phantom: Default::default(),
             views,
-            buffers,
+            buffers: buffers.into(),
             nulls,
         }
     }
@@ -252,7 +260,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
         Self {
             data_type: T::DATA_TYPE,
             views: vec![0; len].into(),
-            buffers: vec![],
+            buffers: vec![].into(),
             nulls: Some(NullBuffer::new_null(len)),
             phantom: Default::default(),
         }
@@ -278,7 +286,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
     }
 
     /// Deconstruct this array into its constituent parts
-    pub fn into_parts(self) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+    pub fn into_parts(self) -> (ScalarBuffer<u128>, ViewBuffers, Option<NullBuffer>) {
         (self.views, self.buffers, self.nulls)
     }
 
@@ -609,8 +617,9 @@ impl<T: ByteViewType + ?Sized> Array for GenericByteViewArray<T> {
 
     fn shrink_to_fit(&mut self) {
         self.views.shrink_to_fit();
-        self.buffers.iter_mut().for_each(|b| b.shrink_to_fit());
-        self.buffers.shrink_to_fit();
+        if let Some(buffers) = Arc::get_mut(&mut self.buffers.0) {
+            buffers.iter_mut().for_each(|b| b.shrink_to_fit());
+        }
         if let Some(nulls) = &mut self.nulls {
             nulls.shrink_to_fit();
         }
@@ -668,11 +677,11 @@ impl<T: ByteViewType + ?Sized> From<ArrayData> for GenericByteViewArray<T> {
     fn from(value: ArrayData) -> Self {
         let views = value.buffers()[0].clone();
         let views = ScalarBuffer::new(views, value.offset(), value.len());
-        let buffers = value.buffers()[1..].to_vec();
+        let buffers = &value.buffers()[1..];
         Self {
             data_type: T::DATA_TYPE,
             views,
-            buffers,
+            buffers: buffers.into(),
             nulls: value.nulls().cloned(),
             phantom: Default::default(),
         }
@@ -736,12 +745,18 @@ where
 }
 
 impl<T: ByteViewType + ?Sized> From<GenericByteViewArray<T>> for ArrayData {
-    fn from(mut array: GenericByteViewArray<T>) -> Self {
+    fn from(array: GenericByteViewArray<T>) -> Self {
         let len = array.len();
-        array.buffers.insert(0, array.views.into_inner());
+        let new_buffers = {
+            let mut buffers = Vec::with_capacity(array.buffers.len() + 1);
+            buffers.push(array.views.into_inner());
+            buffers.extend_from_slice(&array.buffers);
+            buffers
+        };
+
         let builder = ArrayDataBuilder::new(T::DATA_TYPE)
             .len(len)
-            .buffers(array.buffers)
+            .buffers(new_buffers)
             .nulls(array.nulls);
 
         unsafe { builder.build_unchecked() }

diff --git a/arrow-array/src/lib.rs b/arrow-array/src/lib.rs
@@ -259,6 +259,8 @@ pub mod temporal_conversions;
 pub mod timezone;
 mod trusted_len;
 pub mod types;
+mod view_buffers;
+pub use view_buffers::ViewBuffers;
 
 #[cfg(test)]
 mod tests {

diff --git a/arrow-array/src/view_buffers.rs b/arrow-array/src/view_buffers.rs
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{ops::Deref, sync::Arc};
+
+use arrow_buffer::Buffer;
+
+/// A cheaply cloneable, owned slice of [`Buffer`]
+///
+/// Similar to `Arc<Vec<Buffer>>` or `Arc<[Buffer]>`
+#[derive(Clone, Debug)]
+pub struct ViewBuffers(pub(crate) Arc<[Buffer]>);
+
+impl FromIterator<Buffer> for ViewBuffers {
+    fn from_iter<T: IntoIterator<Item = Buffer>>(iter: T) -> Self {
+        Self(iter.into_iter().collect())
+    }
+}
+
+impl From<Vec<Buffer>> for ViewBuffers {
+    fn from(value: Vec<Buffer>) -> Self {
+        Self(value.into())
+    }
+}
+
+impl From<&[Buffer]> for ViewBuffers {
+    fn from(value: &[Buffer]) -> Self {
+        Self(value.into())
+    }
+}
+
+impl Deref for ViewBuffers {
+    type Target = [Buffer];
+
+    fn deref(&self) -> &Self::Target {
+        self.0.as_ref()
+    }
+}
diff --git a/arrow/benches/concatenate_kernel.rs b/arrow/benches/concatenate_kernel.rs
@@ -39,18 +39,20 @@ fn bench_concat_arrays(arrays: &[&dyn Array]) {
 fn add_benchmark(c: &mut Criterion) {
     let v1 = create_primitive_array::<Int32Type>(1024, 0.0);
     let v2 = create_primitive_array::<Int32Type>(1024, 0.0);
-    c.bench_function("concat i32 1024", |b| b.iter(|| bench_concat(&v1, &v2)));
+    c.bench_function("concat i32 1024", |b| {
+        b.iter_with_large_drop(|| bench_concat(&v1, &v2))
+    });
 
     let v1 = create_primitive_array::<Int32Type>(1024, 0.5);
     let v2 = create_primitive_array::<Int32Type>(1024, 0.5);
     c.bench_function("concat i32 nulls 1024", |b| {
-        b.iter(|| bench_concat(&v1, &v2))
+        b.iter_with_large_drop(|| bench_concat(&v1, &v2))
     });
 
     let small_array = create_primitive_array::<Int32Type>(4, 0.0);
     let arrays: Vec<_> = (0..1024).map(|_| &small_array as &dyn Array).collect();
     c.bench_function("concat 1024 arrays i32 4", |b| {
-        b.iter(|| bench_concat_arrays(&arrays))
+        b.iter_with_large_drop(|| bench_concat_arrays(&arrays))
     });
 
     {
@@ -59,7 +61,7 @@ fn add_benchmark(c: &mut Criterion) {
             .collect::<Vec<_>>();
         let arrays: Vec<_> = input.iter().map(|arr| arr as &dyn Array).collect();
         c.bench_function("concat i32 8192 over 100 arrays", |b| {
-            b.iter(|| bench_concat_arrays(&arrays))
+            b.iter_with_large_drop(|| bench_concat_arrays(&arrays))
         });
     }
 
@@ -69,24 +71,26 @@ fn add_benchmark(c: &mut Criterion) {
             .collect::<Vec<_>>();
         let arrays: Vec<_> = input.iter().map(|arr| arr as &dyn Array).collect();
         c.bench_function("concat i32 nulls 8192 over 100 arrays", |b| {
-            b.iter(|| bench_concat_arrays(&arrays))
+            b.iter_with_large_drop(|| bench_concat_arrays(&arrays))
         });
     }
 
     let v1 = create_boolean_array(1024, 0.0, 0.5);
     let v2 = create_boolean_array(1024, 0.0, 0.5);
-    c.bench_function("concat boolean 1024", |b| b.iter(|| bench_concat(&v1, &v2)));
+    c.bench_function("concat boolean 1024", |b| {
+        b.iter_with_large_drop(|| bench_concat(&v1, &v2))
+    });
 
     let v1 = create_boolean_array(1024, 0.5, 0.5);
     let v2 = create_boolean_array(1024, 0.5, 0.5);
     c.bench_function("concat boolean nulls 1024", |b| {
-        b.iter(|| bench_concat(&v1, &v2))
+        b.iter_with_large_drop(|| bench_concat(&v1, &v2))
     });
 
     let small_array = create_boolean_array(4, 0.0, 0.5);
     let arrays: Vec<_> = (0..1024).map(|_| &small_array as &dyn Array).collect();
     c.bench_function("concat 1024 arrays boolean 4", |b| {
-        b.iter(|| bench_concat_arrays(&arrays))
+        b.iter_with_large_drop(|| bench_concat_arrays(&arrays))
     });
 
     {
@@ -95,7 +99,7 @@ fn add_benchmark(c: &mut Criterion) {
             .collect::<Vec<_>>();
         let arrays: Vec<_> = input.iter().map(|arr| arr as &dyn Array).collect();
         c.bench_function("concat boolean 8192 over 100 arrays", |b| {
-            b.iter(|| bench_concat_arrays(&arrays))
+            b.iter_with_large_drop(|| bench_concat_arrays(&arrays))
         });
     }
 
@@ -105,24 +109,26 @@ fn add_benchmark(c: &mut Criterion) {
             .collect::<Vec<_>>();
         let arrays: Vec<_> = input.iter().map(|arr| arr as &dyn Array).collect();
         c.bench_function("concat boolean nulls 8192 over 100 arrays", |b| {
-            b.iter(|| bench_concat_arrays(&arrays))
+            b.iter_with_large_drop(|| bench_concat_arrays(&arrays))
         });
     }
 
     let v1 = create_string_array::<i32>(1024, 0.0);
     let v2 = create_string_array::<i32>(1024, 0.0);
-    c.bench_function("concat str 1024", |b| b.iter(|| bench_concat(&v1, &v2)));
+    c.bench_function("concat str 1024", |b| {
+        b.iter_with_large_drop(|| bench_concat(&v1, &v2))
+    });
 
     let v1 = create_string_array::<i32>(1024, 0.5);
     let v2 = create_string_array::<i32>(1024, 0.5);
     c.bench_function("concat str nulls 1024", |b| {
-        b.iter(|| bench_concat(&v1, &v2))
+        b.iter_with_large_drop(|| bench_concat(&v1, &v2))
     });
 
     let small_array = create_string_array::<i32>(4, 0.0);
     let arrays: Vec<_> = (0..1024).map(|_| &small_array as &dyn Array).collect();
     c.bench_function("concat 1024 arrays str 4", |b| {
-        b.iter(|| bench_concat_arrays(&arrays))
+        b.iter_with_large_drop(|| bench_concat_arrays(&arrays))
     });
 
     {
@@ -131,7 +137,7 @@ fn add_benchmark(c: &mut Criterion) {
             .collect::<Vec<_>>();
         let arrays: Vec<_> = input.iter().map(|arr| arr as &dyn Array).collect();
         c.bench_function("concat str 8192 over 100 arrays", |b| {
-            b.iter(|| bench_concat_arrays(&arrays))
+            b.iter_with_large_drop(|| bench_concat_arrays(&arrays))
         });
     }
 
@@ -141,7 +147,7 @@ fn add_benchmark(c: &mut Criterion) {
             .collect::<Vec<_>>();
         let arrays: Vec<_> = input.iter().map(|arr| arr as &dyn Array).collect();
         c.bench_function("concat str nulls 8192 over 100 arrays", |b| {
-            b.iter(|| bench_concat_arrays(&arrays))
+            b.iter_with_large_drop(|| bench_concat_arrays(&arrays))
         });
     }
 
@@ -155,7 +161,9 @@ fn add_benchmark(c: &mut Criterion) {
             let id = format!(
                 "concat utf8_view {name} max_str_len={str_len} null_density={null_density}"
             );
-            c.bench_function(&id, |b| b.iter(|| bench_concat_arrays(&arrays)));
+            c.bench_function(&id, |b| {
+                b.iter_with_large_drop(|| bench_concat_arrays(&arrays))
+            });
         }
     }
 
@@ -164,15 +172,15 @@ fn add_benchmark(c: &mut Criterion) {
     let v2 = create_string_array_with_len::<i32>(10, 0.0, 20);
     let v2 = create_dict_from_values::<Int32Type>(1024, 0.0, &v2);
     c.bench_function("concat str_dict 1024", |b| {
-        b.iter(|| bench_concat(&v1, &v2))
+        b.iter_with_large_drop(|| bench_concat(&v1, &v2))
     });
 
     let v1 = create_string_array_with_len::<i32>(1024, 0.0, 20);
     let v1 = create_sparse_dict_from_values::<Int32Type>(1024, 0.0, &v1, 10..20);
     let v2 = create_string_array_with_len::<i32>(1024, 0.0, 20);
     let v2 = create_sparse_dict_from_values::<Int32Type>(1024, 0.0, &v2, 30..40);
     c.bench_function("concat str_dict_sparse 1024", |b| {
-        b.iter(|| bench_concat(&v1, &v2))
+        b.iter_with_large_drop(|| bench_concat(&v1, &v2))
     });
 
     let v1 = FixedSizeListArray::try_new(
@@ -190,7 +198,7 @@ fn add_benchmark(c: &mut Criterion) {
     )
     .unwrap();
     c.bench_function("concat fixed size lists", |b| {
-        b.iter(|| bench_concat(&v1, &v2))
+        b.iter_with_large_drop(|| bench_concat(&v1, &v2))
     });
 
     {
@@ -233,7 +241,7 @@ fn add_benchmark(c: &mut Criterion) {
 
         c.bench_function(
             &format!("concat struct with int32 and dicts size={batch_size} count={batch_count}"),
-            |b| b.iter(|| bench_concat_arrays(&array_refs)),
+            |b| b.iter_with_large_drop(|| bench_concat_arrays(&array_refs)),
         );
     }
 }