Skip to content

Commit e751c6e

Browse files
RalfJungtgross35
authored andcommitted
turn load_prefix macro into helper function
1 parent bec25a4 commit e751c6e

File tree

1 file changed

+37
-30
lines changed

1 file changed

+37
-30
lines changed

compiler-builtins/src/mem/impls.rs

Lines changed: 37 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -41,30 +41,43 @@ unsafe fn read_usize_unaligned(x: *const usize) -> usize {
4141
core::mem::transmute(x_read)
4242
}
4343

44+
/// Loads a `T`-sized chunk from `src` into `dst` at offset `offset`, if that does not exceed
45+
/// `load_sz`. The offset pointers must both be `T`-aligned. Returns the new offset, advanced by the
46+
/// chunk size if a load happened.
47+
#[cfg(not(feature = "mem-unaligned"))]
48+
#[inline(always)]
49+
unsafe fn load_chunk_aligned<T: Copy>(
50+
src: *const usize,
51+
dst: *mut usize,
52+
load_sz: usize,
53+
offset: usize,
54+
) -> usize {
55+
let chunk_sz = core::mem::size_of::<T>();
56+
if (load_sz & chunk_sz) != 0 {
57+
*dst.wrapping_byte_add(offset).cast::<T>() = *src.wrapping_byte_add(offset).cast::<T>();
58+
offset | chunk_sz
59+
} else {
60+
offset
61+
}
62+
}
63+
4464
/// Load `load_sz` many bytes from `src`, which must be usize-aligned. Acts as if we did a `usize`
4565
/// read with the out-of-bounds part filled with 0s.
4666
/// `load_sz` be strictly less than `WORD_SIZE`.
4767
#[cfg(not(feature = "mem-unaligned"))]
4868
#[inline(always)]
4969
unsafe fn load_aligned_partial(src: *const usize, load_sz: usize) -> usize {
5070
debug_assert!(load_sz < WORD_SIZE);
71+
// We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
72+
// (since `load_sz < WORD_SIZE`).
73+
const { assert!(WORD_SIZE <= 8) };
5174

5275
let mut i = 0;
5376
let mut out = 0usize;
54-
macro_rules! load_prefix {
55-
($($ty:ty)+) => {$(
56-
let chunk_sz = core::mem::size_of::<$ty>();
57-
if (load_sz & chunk_sz) != 0 {
58-
// Since we are doing the large reads first, this must still be aligned to `chunk_sz`.
59-
*(&raw mut out).wrapping_byte_add(i).cast::<$ty>() = *src.wrapping_byte_add(i).cast::<$ty>();
60-
i |= chunk_sz;
61-
}
62-
)+};
63-
}
64-
// We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
65-
// (since `load_size < WORD_SIZE`).
66-
const { assert!(WORD_SIZE <= 8) };
67-
load_prefix!(u32 u16 u8);
77+
// We load in decreasing order, so the pointers remain sufficiently aligned for the next step.
78+
i = load_chunk_aligned::<u32>(src, &raw mut out, load_sz, i);
79+
i = load_chunk_aligned::<u16>(src, &raw mut out, load_sz, i);
80+
i = load_chunk_aligned::<u8>(src, &raw mut out, load_sz, i);
6881
debug_assert!(i == load_sz);
6982
out
7083
}
@@ -77,25 +90,19 @@ unsafe fn load_aligned_partial(src: *const usize, load_sz: usize) -> usize {
7790
#[inline(always)]
7891
unsafe fn load_aligned_end_partial(src: *const usize, load_sz: usize) -> usize {
7992
debug_assert!(load_sz < WORD_SIZE);
93+
// We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
94+
// (since `load_sz < WORD_SIZE`).
95+
const { assert!(WORD_SIZE <= 8) };
8096

8197
let mut i = 0;
8298
let mut out = 0usize;
83-
let start_shift = WORD_SIZE - load_sz;
84-
macro_rules! load_prefix {
85-
($($ty:ty)+) => {$(
86-
let chunk_sz = core::mem::size_of::<$ty>();
87-
if (load_sz & chunk_sz) != 0 {
88-
// Since we are doing the small reads first, `start_shift + i` has in the mean
89-
// time become aligned to `chunk_sz`.
90-
*(&raw mut out).wrapping_byte_add(start_shift + i).cast::<$ty>() = *src.wrapping_byte_add(start_shift + i).cast::<$ty>();
91-
i |= chunk_sz;
92-
}
93-
)+};
94-
}
95-
// We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
96-
// (since `load_size < WORD_SIZE`).
97-
const { assert!(WORD_SIZE <= 8) };
98-
load_prefix!(u8 u16 u32);
99+
// Obtain pointers pointing to the beginning of the range we want to load.
100+
let src_shifted = src.wrapping_byte_add(WORD_SIZE - load_sz);
101+
let out_shifted = (&raw mut out).wrapping_byte_add(WORD_SIZE - load_sz);
102+
// We load in increasing order, so by the time we reach `u16` things are 2-aligned etc.
103+
i = load_chunk_aligned::<u8>(src_shifted, out_shifted, load_sz, i);
104+
i = load_chunk_aligned::<u16>(src_shifted, out_shifted, load_sz, i);
105+
i = load_chunk_aligned::<u32>(src_shifted, out_shifted, load_sz, i);
99106
debug_assert!(i == load_sz);
100107
out
101108
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy