core/portable-simd/crates/core_simd/src/
ops.rs

1use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount, cmp::SimdPartialEq};
2use core::ops::{Add, Mul};
3use core::ops::{BitAnd, BitOr, BitXor};
4use core::ops::{Div, Rem, Sub};
5use core::ops::{Shl, Shr};
6
7mod assign;
8mod deref;
9mod shift_scalar;
10mod unary;
11
12impl<I, T, const N: usize> core::ops::Index<I> for Simd<T, N>
13where
14    T: SimdElement,
15    LaneCount<N>: SupportedLaneCount,
16    I: core::slice::SliceIndex<[T]>,
17{
18    type Output = I::Output;
19    #[inline]
20    fn index(&self, index: I) -> &Self::Output {
21        &self.as_array()[index]
22    }
23}
24
25impl<I, T, const N: usize> core::ops::IndexMut<I> for Simd<T, N>
26where
27    T: SimdElement,
28    LaneCount<N>: SupportedLaneCount,
29    I: core::slice::SliceIndex<[T]>,
30{
31    #[inline]
32    fn index_mut(&mut self, index: I) -> &mut Self::Output {
33        &mut self.as_mut_array()[index]
34    }
35}
36
37macro_rules! unsafe_base {
38    ($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => {
39        // Safety: $lhs and $rhs are vectors
40        unsafe { core::intrinsics::simd::$simd_call($lhs, $rhs) }
41    };
42}
43
44/// SAFETY: This macro should not be used for anything except Shl or Shr, and passed the appropriate shift intrinsic.
45/// It handles performing a bitand in addition to calling the shift operator, so that the result
46/// is well-defined: LLVM can return a poison value if you shl, lshr, or ashr if `rhs >= <Int>::BITS`
47/// At worst, this will maybe add another instruction and cycle,
48/// at best, it may open up more optimization opportunities,
49/// or simply be elided entirely, especially for SIMD ISAs which default to this.
50///
51// FIXME: Consider implementing this in cg_llvm instead?
52// cg_clif defaults to this, and scalar MIR shifts also default to wrapping
53macro_rules! wrap_bitshift {
54    ($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => {
55        #[allow(clippy::suspicious_arithmetic_impl)]
56        // Safety: $lhs and the bitand result are vectors
57        unsafe {
58            core::intrinsics::simd::$simd_call(
59                $lhs,
60                $rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)),
61            )
62        }
63    };
64}
65
66/// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic.
67/// It guards against LLVM's UB conditions for integer div or rem using masks and selects,
68/// thus guaranteeing a Rust value returns instead.
69///
70/// |                  | LLVM | Rust
71/// | :--------------: | :--- | :----------
72/// | N {/,%} 0        | UB   | panic!()
73/// | <$int>::MIN / -1 | UB   | <$int>::MIN
74/// | <$int>::MIN % -1 | UB   | 0
75///
76macro_rules! int_divrem_guard {
77    (   $lhs:ident,
78        $rhs:ident,
79        {   const PANIC_ZERO: &'static str = $zero:literal;
80            $simd_call:ident, $op:tt
81        },
82        $int:ident ) => {
83        if $rhs.simd_eq(Simd::splat(0 as _)).any() {
84            panic!($zero);
85        } else {
86            // Prevent otherwise-UB overflow on the MIN / -1 case.
87            let rhs = if <$int>::MIN != 0 {
88                // This should, at worst, optimize to a few branchless logical ops
89                // Ideally, this entire conditional should evaporate
90                // Fire LLVM and implement those manually if it doesn't get the hint
91                ($lhs.simd_eq(Simd::splat(<$int>::MIN))
92                // type inference can break here, so cut an SInt to size
93                & $rhs.simd_eq(Simd::splat(-1i64 as _)))
94                .select(Simd::splat(1 as _), $rhs)
95            } else {
96                // Nice base case to make it easy to const-fold away the other branch.
97                $rhs
98            };
99
100            // aarch64 div fails for arbitrary `v % 0`, mod fails when rhs is MIN, for non-powers-of-two
101            // these operations aren't vectorized on aarch64 anyway
102            #[cfg(target_arch = "aarch64")]
103            {
104                let mut out = Simd::splat(0 as _);
105                for i in 0..Self::LEN {
106                    out[i] = $lhs[i] $op rhs[i];
107                }
108                out
109            }
110
111            #[cfg(not(target_arch = "aarch64"))]
112            {
113                // Safety: $lhs and rhs are vectors
114                unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) }
115            }
116        }
117    };
118}
119
120macro_rules! for_base_types {
121    (   T = ($($scalar:ident),*);
122        type Lhs = Simd<T, N>;
123        type Rhs = Simd<T, N>;
124        type Output = $out:ty;
125
126        impl $op:ident::$call:ident {
127            $macro_impl:ident $inner:tt
128        }) => {
129            $(
130                impl<const N: usize> $op<Self> for Simd<$scalar, N>
131                where
132                    $scalar: SimdElement,
133                    LaneCount<N>: SupportedLaneCount,
134                {
135                    type Output = $out;
136
137                    #[inline]
138                    // TODO: only useful for int Div::div, but we hope that this
139                    // will essentially always get inlined anyway.
140                    #[track_caller]
141                    fn $call(self, rhs: Self) -> Self::Output {
142                        $macro_impl!(self, rhs, $inner, $scalar)
143                    }
144                }
145            )*
146    }
147}
148
149// A "TokenTree muncher": takes a set of scalar types `T = {};`
150// type parameters for the ops it implements, `Op::fn` names,
151// and a macro that expands into an expr, substituting in an intrinsic.
152// It passes that to for_base_types, which expands an impl for the types,
153// using the expanded expr in the function, and recurses with itself.
154//
155// tl;dr impls a set of ops::{Traits} for a set of types
156macro_rules! for_base_ops {
157    (
158        T = $types:tt;
159        type Lhs = Simd<T, N>;
160        type Rhs = Simd<T, N>;
161        type Output = $out:ident;
162        impl $op:ident::$call:ident
163            $inner:tt
164        $($rest:tt)*
165    ) => {
166        for_base_types! {
167            T = $types;
168            type Lhs = Simd<T, N>;
169            type Rhs = Simd<T, N>;
170            type Output = $out;
171            impl $op::$call
172                $inner
173        }
174        for_base_ops! {
175            T = $types;
176            type Lhs = Simd<T, N>;
177            type Rhs = Simd<T, N>;
178            type Output = $out;
179            $($rest)*
180        }
181    };
182    ($($done:tt)*) => {
183        // Done.
184    }
185}
186
187// Integers can always accept add, mul, sub, bitand, bitor, and bitxor.
188// For all of these operations, simd_* intrinsics apply wrapping logic.
189for_base_ops! {
190    T = (i8, i16, i32, i64, isize, u8, u16, u32, u64, usize);
191    type Lhs = Simd<T, N>;
192    type Rhs = Simd<T, N>;
193    type Output = Self;
194
195    impl Add::add {
196        unsafe_base { simd_add }
197    }
198
199    impl Mul::mul {
200        unsafe_base { simd_mul }
201    }
202
203    impl Sub::sub {
204        unsafe_base { simd_sub }
205    }
206
207    impl BitAnd::bitand {
208        unsafe_base { simd_and }
209    }
210
211    impl BitOr::bitor {
212        unsafe_base { simd_or }
213    }
214
215    impl BitXor::bitxor {
216        unsafe_base { simd_xor }
217    }
218
219    impl Div::div {
220        int_divrem_guard {
221            const PANIC_ZERO: &'static str = "attempt to divide by zero";
222            simd_div, /
223        }
224    }
225
226    impl Rem::rem {
227        int_divrem_guard {
228            const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
229            simd_rem, %
230        }
231    }
232
233    // The only question is how to handle shifts >= <Int>::BITS?
234    // Our current solution uses wrapping logic.
235    impl Shl::shl {
236        wrap_bitshift { simd_shl }
237    }
238
239    impl Shr::shr {
240        wrap_bitshift {
241            // This automatically monomorphizes to lshr or ashr, depending,
242            // so it's fine to use it for both UInts and SInts.
243            simd_shr
244        }
245    }
246}
247
248// We don't need any special precautions here:
249// Floats always accept arithmetic ops, but may become NaN.
250for_base_ops! {
251    T = (f32, f64);
252    type Lhs = Simd<T, N>;
253    type Rhs = Simd<T, N>;
254    type Output = Self;
255
256    impl Add::add {
257        unsafe_base { simd_add }
258    }
259
260    impl Mul::mul {
261        unsafe_base { simd_mul }
262    }
263
264    impl Sub::sub {
265        unsafe_base { simd_sub }
266    }
267
268    impl Div::div {
269        unsafe_base { simd_div }
270    }
271
272    impl Rem::rem {
273        unsafe_base { simd_rem }
274    }
275}
core/portable-simd/crates/core_simd/src/ops.rs

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

core/portable-simd/crates/core_simd/src/
ops.rs