core/hash/
sip.rs

1//! An implementation of SipHash.
2
3#![allow(deprecated)] // the types in this module are deprecated
4
5use crate::marker::PhantomData;
6use crate::{cmp, ptr};
7
8/// An implementation of SipHash 1-3.
9///
10/// This is currently the default hashing function used by standard library
11/// (e.g., `collections::HashMap` uses it by default).
12///
13/// See: <https://131002.net/siphash>
14#[unstable(feature = "hashmap_internals", issue = "none")]
15#[deprecated(since = "1.13.0", note = "use `std::hash::DefaultHasher` instead")]
16#[derive(Debug, Clone, Default)]
17#[doc(hidden)]
18pub struct SipHasher13 {
19    hasher: Hasher<Sip13Rounds>,
20}
21
22/// An implementation of SipHash 2-4.
23///
24/// See: <https://131002.net/siphash/>
25#[unstable(feature = "hashmap_internals", issue = "none")]
26#[deprecated(since = "1.13.0", note = "use `std::hash::DefaultHasher` instead")]
27#[derive(Debug, Clone, Default)]
28struct SipHasher24 {
29    hasher: Hasher<Sip24Rounds>,
30}
31
32/// An implementation of SipHash 2-4.
33///
34/// See: <https://131002.net/siphash/>
35///
36/// SipHash is a general-purpose hashing function: it runs at a good
37/// speed (competitive with Spooky and City) and permits strong _keyed_
38/// hashing. This lets you key your hash tables from a strong RNG, such as
39/// [`rand::os::OsRng`](https://docs.rs/rand/latest/rand/rngs/struct.OsRng.html).
40///
41/// Although the SipHash algorithm is considered to be generally strong,
42/// it is not intended for cryptographic purposes. As such, all
43/// cryptographic uses of this implementation are _strongly discouraged_.
44#[stable(feature = "rust1", since = "1.0.0")]
45#[deprecated(since = "1.13.0", note = "use `std::hash::DefaultHasher` instead")]
46#[derive(Debug, Clone, Default)]
47pub struct SipHasher(SipHasher24);
48
49#[derive(Debug)]
50struct Hasher<S: Sip> {
51    k0: u64,
52    k1: u64,
53    length: usize, // how many bytes we've processed
54    state: State,  // hash State
55    tail: u64,     // unprocessed bytes le
56    ntail: usize,  // how many bytes in tail are valid
57    _marker: PhantomData<S>,
58}
59
60#[derive(Debug, Clone, Copy)]
61#[repr(C)]
62struct State {
63    // v0, v2 and v1, v3 show up in pairs in the algorithm,
64    // and simd implementations of SipHash will use vectors
65    // of v02 and v13. By placing them in this order in the struct,
66    // the compiler can pick up on just a few simd optimizations by itself.
67    v0: u64,
68    v2: u64,
69    v1: u64,
70    v3: u64,
71}
72
73macro_rules! compress {
74    ($state:expr) => {{ compress!($state.v0, $state.v1, $state.v2, $state.v3) }};
75    ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{
76        $v0 = $v0.wrapping_add($v1);
77        $v2 = $v2.wrapping_add($v3);
78        $v1 = $v1.rotate_left(13);
79        $v1 ^= $v0;
80        $v3 = $v3.rotate_left(16);
81        $v3 ^= $v2;
82        $v0 = $v0.rotate_left(32);
83
84        $v2 = $v2.wrapping_add($v1);
85        $v0 = $v0.wrapping_add($v3);
86        $v1 = $v1.rotate_left(17);
87        $v1 ^= $v2;
88        $v3 = $v3.rotate_left(21);
89        $v3 ^= $v0;
90        $v2 = $v2.rotate_left(32);
91    }};
92}
93
94/// Loads an integer of the desired type from a byte stream, in LE order. Uses
95/// `copy_nonoverlapping` to let the compiler generate the most efficient way
96/// to load it from a possibly unaligned address.
97///
98/// Safety: this performs unchecked indexing of `$buf` at
99/// `$i..$i+size_of::<$int_ty>()`, so that must be in-bounds.
100macro_rules! load_int_le {
101    ($buf:expr, $i:expr, $int_ty:ident) => {{
102        debug_assert!($i + size_of::<$int_ty>() <= $buf.len());
103        let mut data = 0 as $int_ty;
104        ptr::copy_nonoverlapping(
105            $buf.as_ptr().add($i),
106            &mut data as *mut _ as *mut u8,
107            size_of::<$int_ty>(),
108        );
109        data.to_le()
110    }};
111}
112
113/// Loads a u64 using up to 7 bytes of a byte slice. It looks clumsy but the
114/// `copy_nonoverlapping` calls that occur (via `load_int_le!`) all have fixed
115/// sizes and avoid calling `memcpy`, which is good for speed.
116///
117/// Safety: this performs unchecked indexing of `buf` at `start..start+len`, so
118/// that must be in-bounds.
119#[inline]
120unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
121    debug_assert!(len < 8);
122    let mut i = 0; // current byte index (from LSB) in the output u64
123    let mut out = 0;
124    if i + 3 < len {
125        // SAFETY: `i` cannot be greater than `len`, and the caller must guarantee
126        // that the index start..start+len is in bounds.
127        out = unsafe { load_int_le!(buf, start + i, u32) } as u64;
128        i += 4;
129    }
130    if i + 1 < len {
131        // SAFETY: same as above.
132        out |= (unsafe { load_int_le!(buf, start + i, u16) } as u64) << (i * 8);
133        i += 2
134    }
135    if i < len {
136        // SAFETY: same as above.
137        out |= (unsafe { *buf.get_unchecked(start + i) } as u64) << (i * 8);
138        i += 1;
139    }
140    //FIXME(fee1-dead): use debug_assert_eq
141    debug_assert!(i == len);
142    out
143}
144
145impl SipHasher {
146    /// Creates a new `SipHasher` with the two initial keys set to 0.
147    #[inline]
148    #[stable(feature = "rust1", since = "1.0.0")]
149    #[deprecated(since = "1.13.0", note = "use `std::hash::DefaultHasher` instead")]
150    #[must_use]
151    pub fn new() -> SipHasher {
152        SipHasher::new_with_keys(0, 0)
153    }
154
155    /// Creates a `SipHasher` that is keyed off the provided keys.
156    #[inline]
157    #[stable(feature = "rust1", since = "1.0.0")]
158    #[deprecated(since = "1.13.0", note = "use `std::hash::DefaultHasher` instead")]
159    #[must_use]
160    pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher {
161        SipHasher(SipHasher24 { hasher: Hasher::new_with_keys(key0, key1) })
162    }
163}
164
165impl SipHasher13 {
166    /// Creates a new `SipHasher13` with the two initial keys set to 0.
167    #[inline]
168    #[unstable(feature = "hashmap_internals", issue = "none")]
169    #[deprecated(since = "1.13.0", note = "use `std::hash::DefaultHasher` instead")]
170    pub fn new() -> SipHasher13 {
171        SipHasher13::new_with_keys(0, 0)
172    }
173
174    /// Creates a `SipHasher13` that is keyed off the provided keys.
175    #[inline]
176    #[unstable(feature = "hashmap_internals", issue = "none")]
177    #[deprecated(since = "1.13.0", note = "use `std::hash::DefaultHasher` instead")]
178    pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 {
179        SipHasher13 { hasher: Hasher::new_with_keys(key0, key1) }
180    }
181}
182
183impl<S: Sip> Hasher<S> {
184    #[inline]
185    const fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> {
186        let mut state = Hasher {
187            k0: key0,
188            k1: key1,
189            length: 0,
190            state: State { v0: 0, v1: 0, v2: 0, v3: 0 },
191            tail: 0,
192            ntail: 0,
193            _marker: PhantomData,
194        };
195        state.reset();
196        state
197    }
198
199    #[inline]
200    const fn reset(&mut self) {
201        self.length = 0;
202        self.state.v0 = self.k0 ^ 0x736f6d6570736575;
203        self.state.v1 = self.k1 ^ 0x646f72616e646f6d;
204        self.state.v2 = self.k0 ^ 0x6c7967656e657261;
205        self.state.v3 = self.k1 ^ 0x7465646279746573;
206        self.ntail = 0;
207    }
208}
209
210#[stable(feature = "rust1", since = "1.0.0")]
211impl super::Hasher for SipHasher {
212    #[inline]
213    fn write(&mut self, msg: &[u8]) {
214        self.0.hasher.write(msg)
215    }
216
217    #[inline]
218    fn write_str(&mut self, s: &str) {
219        self.0.hasher.write_str(s);
220    }
221
222    #[inline]
223    fn finish(&self) -> u64 {
224        self.0.hasher.finish()
225    }
226}
227
228#[unstable(feature = "hashmap_internals", issue = "none")]
229impl super::Hasher for SipHasher13 {
230    #[inline]
231    fn write(&mut self, msg: &[u8]) {
232        self.hasher.write(msg)
233    }
234
235    #[inline]
236    fn write_str(&mut self, s: &str) {
237        self.hasher.write_str(s);
238    }
239
240    #[inline]
241    fn finish(&self) -> u64 {
242        self.hasher.finish()
243    }
244}
245
246impl<S: Sip> super::Hasher for Hasher<S> {
247    // Note: no integer hashing methods (`write_u*`, `write_i*`) are defined
248    // for this type. We could add them, copy the `short_write` implementation
249    // in librustc_data_structures/sip128.rs, and add `write_u*`/`write_i*`
250    // methods to `SipHasher`, `SipHasher13`, and `DefaultHasher`. This would
251    // greatly speed up integer hashing by those hashers, at the cost of
252    // slightly slowing down compile speeds on some benchmarks. See #69152 for
253    // details.
254    #[inline]
255    fn write(&mut self, msg: &[u8]) {
256        let length = msg.len();
257        self.length += length;
258
259        let mut needed = 0;
260
261        if self.ntail != 0 {
262            needed = 8 - self.ntail;
263            // SAFETY: `cmp::min(length, needed)` is guaranteed to not be over `length`
264            self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << (8 * self.ntail);
265            if length < needed {
266                self.ntail += length;
267                return;
268            } else {
269                self.state.v3 ^= self.tail;
270                S::c_rounds(&mut self.state);
271                self.state.v0 ^= self.tail;
272                self.ntail = 0;
273            }
274        }
275
276        // Buffered tail is now flushed, process new input.
277        let len = length - needed;
278        let left = len & 0x7; // len % 8
279
280        let mut i = needed;
281        while i < len - left {
282            // SAFETY: because `len - left` is the biggest multiple of 8 under
283            // `len`, and because `i` starts at `needed` where `len` is `length - needed`,
284            // `i + 8` is guaranteed to be less than or equal to `length`.
285            let mi = unsafe { load_int_le!(msg, i, u64) };
286
287            self.state.v3 ^= mi;
288            S::c_rounds(&mut self.state);
289            self.state.v0 ^= mi;
290
291            i += 8;
292        }
293
294        // SAFETY: `i` is now `needed + len.div_euclid(8) * 8`,
295        // so `i + left` = `needed + len` = `length`, which is by
296        // definition equal to `msg.len()`.
297        self.tail = unsafe { u8to64_le(msg, i, left) };
298        self.ntail = left;
299    }
300
301    #[inline]
302    fn write_str(&mut self, s: &str) {
303        // This hasher works byte-wise, and `0xFF` cannot show up in a `str`,
304        // so just hashing the one extra byte is enough to be prefix-free.
305        self.write(s.as_bytes());
306        self.write_u8(0xFF);
307    }
308
309    #[inline]
310    fn finish(&self) -> u64 {
311        let mut state = self.state;
312
313        let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail;
314
315        state.v3 ^= b;
316        S::c_rounds(&mut state);
317        state.v0 ^= b;
318
319        state.v2 ^= 0xff;
320        S::d_rounds(&mut state);
321
322        state.v0 ^ state.v1 ^ state.v2 ^ state.v3
323    }
324}
325
326impl<S: Sip> Clone for Hasher<S> {
327    #[inline]
328    fn clone(&self) -> Hasher<S> {
329        Hasher {
330            k0: self.k0,
331            k1: self.k1,
332            length: self.length,
333            state: self.state,
334            tail: self.tail,
335            ntail: self.ntail,
336            _marker: self._marker,
337        }
338    }
339}
340
341impl<S: Sip> Default for Hasher<S> {
342    /// Creates a `Hasher<S>` with the two initial keys set to 0.
343    #[inline]
344    fn default() -> Hasher<S> {
345        Hasher::new_with_keys(0, 0)
346    }
347}
348
349#[doc(hidden)]
350trait Sip {
351    fn c_rounds(_: &mut State);
352    fn d_rounds(_: &mut State);
353}
354
355#[derive(Debug, Clone, Default)]
356struct Sip13Rounds;
357
358impl Sip for Sip13Rounds {
359    #[inline]
360    fn c_rounds(state: &mut State) {
361        compress!(state);
362    }
363
364    #[inline]
365    fn d_rounds(state: &mut State) {
366        compress!(state);
367        compress!(state);
368        compress!(state);
369    }
370}
371
372#[derive(Debug, Clone, Default)]
373struct Sip24Rounds;
374
375impl Sip for Sip24Rounds {
376    #[inline]
377    fn c_rounds(state: &mut State) {
378        compress!(state);
379        compress!(state);
380    }
381
382    #[inline]
383    fn d_rounds(state: &mut State) {
384        compress!(state);
385        compress!(state);
386        compress!(state);
387        compress!(state);
388    }
389}
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy