core/portable-simd/crates/core_simd/src/
ops.rs

1use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount, cmp::SimdPartialEq};
2use core::ops::{Add, Mul};
3use core::ops::{BitAnd, BitOr, BitXor};
4use core::ops::{Div, Rem, Sub};
5use core::ops::{Shl, Shr};
6
7mod assign;
8mod deref;
9mod shift_scalar;
10mod unary;
11
12impl<I, T, const N: usize> core::ops::Index<I> for Simd<T, N>
13where
14    T: SimdElement,
15    LaneCount<N>: SupportedLaneCount,
16    I: core::slice::SliceIndex<[T]>,
17{
18    type Output = I::Output;
19    #[inline]
20    fn index(&self, index: I) -> &Self::Output {
21        &self.as_array()[index]
22    }
23}
24
25impl<I, T, const N: usize> core::ops::IndexMut<I> for Simd<T, N>
26where
27    T: SimdElement,
28    LaneCount<N>: SupportedLaneCount,
29    I: core::slice::SliceIndex<[T]>,
30{
31    #[inline]
32    fn index_mut(&mut self, index: I) -> &mut Self::Output {
33        &mut self.as_mut_array()[index]
34    }
35}
36
37macro_rules! unsafe_base {
38    ($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => {
39        // Safety: $lhs and $rhs are vectors
40        unsafe { core::intrinsics::simd::$simd_call($lhs, $rhs) }
41    };
42}
43
44/// SAFETY: This macro should not be used for anything except Shl or Shr, and passed the appropriate shift intrinsic.
45/// It handles performing a bitand in addition to calling the shift operator, so that the result
46/// is well-defined: LLVM can return a poison value if you shl, lshr, or ashr if `rhs >= <Int>::BITS`
47/// At worst, this will maybe add another instruction and cycle,
48/// at best, it may open up more optimization opportunities,
49/// or simply be elided entirely, especially for SIMD ISAs which default to this.
50///
51// FIXME: Consider implementing this in cg_llvm instead?
52// cg_clif defaults to this, and scalar MIR shifts also default to wrapping
53macro_rules! wrap_bitshift {
54    ($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => {
55        #[allow(clippy::suspicious_arithmetic_impl)]
56        // Safety: $lhs and the bitand result are vectors
57        unsafe {
58            core::intrinsics::simd::$simd_call(
59                $lhs,
60                $rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)),
61            )
62        }
63    };
64}
65
66/// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic.
67/// It guards against LLVM's UB conditions for integer div or rem using masks and selects,
68/// thus guaranteeing a Rust value returns instead.
69///
70/// |                  | LLVM | Rust
71/// | :--------------: | :--- | :----------
72/// | N {/,%} 0        | UB   | panic!()
73/// | <$int>::MIN / -1 | UB   | <$int>::MIN
74/// | <$int>::MIN % -1 | UB   | 0
75///
76macro_rules! int_divrem_guard {
77    (   $lhs:ident,
78        $rhs:ident,
79        {   const PANIC_ZERO: &'static str = $zero:literal;
80            $simd_call:ident, $op:tt
81        },
82        $int:ident ) => {
83        if $rhs.simd_eq(Simd::splat(0 as _)).any() {
84            panic!($zero);
85        } else {
86            // Prevent otherwise-UB overflow on the MIN / -1 case.
87            let rhs = if <$int>::MIN != 0 {
88                // This should, at worst, optimize to a few branchless logical ops
89                // Ideally, this entire conditional should evaporate
90                // Fire LLVM and implement those manually if it doesn't get the hint
91                ($lhs.simd_eq(Simd::splat(<$int>::MIN))
92                // type inference can break here, so cut an SInt to size
93                & $rhs.simd_eq(Simd::splat(-1i64 as _)))
94                .select(Simd::splat(1 as _), $rhs)
95            } else {
96                // Nice base case to make it easy to const-fold away the other branch.
97                $rhs
98            };
99
100            // aarch64 div fails for arbitrary `v % 0`, mod fails when rhs is MIN, for non-powers-of-two
101            // these operations aren't vectorized on aarch64 anyway
102            #[cfg(target_arch = "aarch64")]
103            {
104                let mut out = Simd::splat(0 as _);
105                for i in 0..Self::LEN {
106                    out[i] = $lhs[i] $op rhs[i];
107                }
108                out
109            }
110
111            #[cfg(not(target_arch = "aarch64"))]
112            {
113                // Safety: $lhs and rhs are vectors
114                unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) }
115            }
116        }
117    };
118}
119
120macro_rules! for_base_types {
121    (   T = ($($scalar:ident),*);
122        type Lhs = Simd<T, N>;
123        type Rhs = Simd<T, N>;
124        type Output = $out:ty;
125
126        impl $op:ident::$call:ident {
127            $macro_impl:ident $inner:tt
128        }) => {
129            $(
130                impl<const N: usize> $op<Self> for Simd<$scalar, N>
131                where
132                    $scalar: SimdElement,
133                    LaneCount<N>: SupportedLaneCount,
134                {
135                    type Output = $out;
136
137                    #[inline]
138                    // TODO: only useful for int Div::div, but we hope that this
139                    // will essentially always get inlined anyway.
140                    #[track_caller]
141                    fn $call(self, rhs: Self) -> Self::Output {
142                        $macro_impl!(self, rhs, $inner, $scalar)
143                    }
144                }
145            )*
146    }
147}
148
149// A "TokenTree muncher": takes a set of scalar types `T = {};`
150// type parameters for the ops it implements, `Op::fn` names,
151// and a macro that expands into an expr, substituting in an intrinsic.
152// It passes that to for_base_types, which expands an impl for the types,
153// using the expanded expr in the function, and recurses with itself.
154//
155// tl;dr impls a set of ops::{Traits} for a set of types
156macro_rules! for_base_ops {
157    (
158        T = $types:tt;
159        type Lhs = Simd<T, N>;
160        type Rhs = Simd<T, N>;
161        type Output = $out:ident;
162        impl $op:ident::$call:ident
163            $inner:tt
164        $($rest:tt)*
165    ) => {
166        for_base_types! {
167            T = $types;
168            type Lhs = Simd<T, N>;
169            type Rhs = Simd<T, N>;
170            type Output = $out;
171            impl $op::$call
172                $inner
173        }
174        for_base_ops! {
175            T = $types;
176            type Lhs = Simd<T, N>;
177            type Rhs = Simd<T, N>;
178            type Output = $out;
179            $($rest)*
180        }
181    };
182    ($($done:tt)*) => {
183        // Done.
184    }
185}
186
187// Integers can always accept add, mul, sub, bitand, bitor, and bitxor.
188// For all of these operations, simd_* intrinsics apply wrapping logic.
189for_base_ops! {
190    T = (i8, i16, i32, i64, isize, u8, u16, u32, u64, usize);
191    type Lhs = Simd<T, N>;
192    type Rhs = Simd<T, N>;
193    type Output = Self;
194
195    impl Add::add {
196        unsafe_base { simd_add }
197    }
198
199    impl Mul::mul {
200        unsafe_base { simd_mul }
201    }
202
203    impl Sub::sub {
204        unsafe_base { simd_sub }
205    }
206
207    impl BitAnd::bitand {
208        unsafe_base { simd_and }
209    }
210
211    impl BitOr::bitor {
212        unsafe_base { simd_or }
213    }
214
215    impl BitXor::bitxor {
216        unsafe_base { simd_xor }
217    }
218
219    impl Div::div {
220        int_divrem_guard {
221            const PANIC_ZERO: &'static str = "attempt to divide by zero";
222            simd_div, /
223        }
224    }
225
226    impl Rem::rem {
227        int_divrem_guard {
228            const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
229            simd_rem, %
230        }
231    }
232
233    // The only question is how to handle shifts >= <Int>::BITS?
234    // Our current solution uses wrapping logic.
235    impl Shl::shl {
236        wrap_bitshift { simd_shl }
237    }
238
239    impl Shr::shr {
240        wrap_bitshift {
241            // This automatically monomorphizes to lshr or ashr, depending,
242            // so it's fine to use it for both UInts and SInts.
243            simd_shr
244        }
245    }
246}
247
248// We don't need any special precautions here:
249// Floats always accept arithmetic ops, but may become NaN.
250for_base_ops! {
251    T = (f32, f64);
252    type Lhs = Simd<T, N>;
253    type Rhs = Simd<T, N>;
254    type Output = Self;
255
256    impl Add::add {
257        unsafe_base { simd_add }
258    }
259
260    impl Mul::mul {
261        unsafe_base { simd_mul }
262    }
263
264    impl Sub::sub {
265        unsafe_base { simd_sub }
266    }
267
268    impl Div::div {
269        unsafe_base { simd_div }
270    }
271
272    impl Rem::rem {
273        unsafe_base { simd_rem }
274    }
275}
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy