Skip to content

Commit 367fe1f

Browse files
authored
Merge pull request #9 from bjuthjliu/master
add simhash, add rdtregression
2 parents 33543f7 + 114a962 commit 367fe1f

File tree

12 files changed

+748
-139
lines changed

12 files changed

+748
-139
lines changed
Lines changed: 331 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,331 @@
1+
package fregata.hash
2+
3+
import fregata._
4+
import scala.collection.mutable.ArrayBuffer
5+
6+
/**
7+
* Created by hjliu on 16/11/29.
8+
*/
9+
class SimHash extends Serializable{
10+
11+
var featureSize: Int = 0
12+
var hashbits: Int = 0
13+
var hashfunc: (Int) => Array[Long] = x => null
14+
15+
def this(featureSize: Int, hashBits: Int)= {
16+
17+
this
18+
this.featureSize = featureSize
19+
this.hashbits = hashBits
20+
21+
hashBits match {
22+
case 64 => hashfunc = hash64
23+
case 128 => hashfunc = hash128
24+
case 192 => hashfunc = hash192
25+
case 256 => hashfunc = hash256
26+
case 320 => hashfunc = hash320
27+
case 384 => hashfunc = hash384
28+
case 448 => hashfunc = hash448
29+
case 512 => hashfunc = hash512
30+
case _ => hashfunc = hash256
31+
}
32+
33+
}
34+
35+
def hash64(in: Int): Array[Long] = {
36+
var h = in.toLong
37+
h ^= (h >> 23)
38+
h *= 0x2127599bf4325c37L
39+
h ^= h >> 47
40+
41+
val t = new Array[Long](1)
42+
t(0) = h
43+
t
44+
}
45+
46+
def hash128(in: Int): Array[Long] = {
47+
var h = in.toLong
48+
h ^= (h >> 23)
49+
h *= 0x2127599bf4325c37L
50+
h ^= h >> 47
51+
52+
val t = new Array[Long](2)
53+
t(0) = h
54+
55+
h = -in.toLong
56+
h ^= (h >> 23)
57+
h *= 0x2127599bf4325c37L
58+
h ^= h >> 47
59+
t(1) = h
60+
t
61+
}
62+
63+
def hash192(in: Int): Array[Long] = {
64+
var h = in.toLong
65+
h ^= (h >> 23)
66+
h *= 0x2127599bf4325c37L
67+
h ^= h >> 47
68+
69+
val t = new Array[Long](3)
70+
t(0) = h
71+
72+
h = -in.toLong
73+
h ^= (h >> 23)
74+
h *= 0x2127599bf4325c37L
75+
h ^= h >> 47
76+
t(1) = h
77+
78+
h = in.toLong + featureSize
79+
h ^= (h >> 23)
80+
h *= 0x2127599bf4325c37L
81+
h ^= h >> 47
82+
t(2) = h
83+
t
84+
}
85+
86+
def hash256(in: Int): Array[Long] = {
87+
var h = in.toLong
88+
h ^= (h >> 23)
89+
h *= 0x2127599bf4325c37L
90+
h ^= h >> 47
91+
92+
val t = new Array[Long](4)
93+
t(0) = h
94+
95+
h = -in.toLong
96+
h ^= (h >> 23)
97+
h *= 0x2127599bf4325c37L
98+
h ^= h >> 47
99+
t(1) = h
100+
101+
h = in.toLong + featureSize
102+
h ^= (h >> 23)
103+
h *= 0x2127599bf4325c37L
104+
h ^= h >> 47
105+
t(2) = h
106+
107+
h = -in.toLong - featureSize
108+
h ^= (h >> 23)
109+
h *= 0x2127599bf4325c37L
110+
h ^= h >> 47
111+
t(3) = h
112+
t
113+
}
114+
115+
def hash320(in: Int): Array[Long] = {
116+
var h = in.toLong
117+
h ^= (h >> 23)
118+
h *= 0x2127599bf4325c37L
119+
h ^= h >> 47
120+
121+
val t = new Array[Long](5)
122+
t(0) = h
123+
124+
h = -in.toLong
125+
h ^= (h >> 23)
126+
h *= 0x2127599bf4325c37L
127+
h ^= h >> 47
128+
t(1) = h
129+
130+
h = in.toLong + featureSize
131+
h ^= (h >> 23)
132+
h *= 0x2127599bf4325c37L
133+
h ^= h >> 47
134+
t(2) = h
135+
136+
h = -in.toLong - featureSize
137+
h ^= (h >> 23)
138+
h *= 0x2127599bf4325c37L
139+
h ^= h >> 47
140+
t(3) = h
141+
142+
h = in.toLong + 2 * featureSize
143+
h ^= (h >> 23)
144+
h *= 0x2127599bf4325c37L
145+
h ^= h >> 47
146+
t(4) = h
147+
148+
t
149+
}
150+
151+
def hash384(in: Int): Array[Long] = {
152+
var h = in.toLong
153+
h ^= (h >> 23)
154+
h *= 0x2127599bf4325c37L
155+
h ^= h >> 47
156+
157+
val t = new Array[Long](6)
158+
t(0) = h
159+
160+
h = -in.toLong
161+
h ^= (h >> 23)
162+
h *= 0x2127599bf4325c37L
163+
h ^= h >> 47
164+
t(1) = h
165+
166+
h = in.toLong + featureSize
167+
h ^= (h >> 23)
168+
h *= 0x2127599bf4325c37L
169+
h ^= h >> 47
170+
t(2) = h
171+
172+
h = -in.toLong - featureSize
173+
h ^= (h >> 23)
174+
h *= 0x2127599bf4325c37L
175+
h ^= h >> 47
176+
t(3) = h
177+
178+
h = in.toLong + 2 * featureSize
179+
h ^= (h >> 23)
180+
h *= 0x2127599bf4325c37L
181+
h ^= h >> 47
182+
t(4) = h
183+
184+
h = -in.toLong - 2 * featureSize
185+
h ^= (h >> 23)
186+
h *= 0x2127599bf4325c37L
187+
h ^= h >> 47
188+
t(5) = h
189+
190+
t
191+
}
192+
193+
def hash448(in: Int): Array[Long] = {
194+
var h = in.toLong
195+
h ^= (h >> 23)
196+
h *= 0x2127599bf4325c37L
197+
h ^= h >> 47
198+
199+
val t = new Array[Long](7)
200+
t(0) = h
201+
202+
h = -in.toLong
203+
h ^= (h >> 23)
204+
h *= 0x2127599bf4325c37L
205+
h ^= h >> 47
206+
t(1) = h
207+
208+
h = in.toLong + featureSize
209+
h ^= (h >> 23)
210+
h *= 0x2127599bf4325c37L
211+
h ^= h >> 47
212+
t(2) = h
213+
214+
h = -in.toLong - featureSize
215+
h ^= (h >> 23)
216+
h *= 0x2127599bf4325c37L
217+
h ^= h >> 47
218+
t(3) = h
219+
220+
h = in.toLong + 2 * featureSize
221+
h ^= (h >> 23)
222+
h *= 0x2127599bf4325c37L
223+
h ^= h >> 47
224+
t(4) = h
225+
226+
h = -in.toLong - 2 * featureSize
227+
h ^= (h >> 23)
228+
h *= 0x2127599bf4325c37L
229+
h ^= h >> 47
230+
t(5) = h
231+
232+
h = in.toLong + 3 * featureSize
233+
h ^= (h >> 23)
234+
h *= 0x2127599bf4325c37L
235+
h ^= h >> 47
236+
t(6) = h
237+
238+
t
239+
}
240+
241+
def hash512(in: Int): Array[Long] = {
242+
var h = in.toLong
243+
h ^= (h >> 23)
244+
h *= 0x2127599bf4325c37L
245+
h ^= h >> 47
246+
247+
val t = new Array[Long](8)
248+
t(0) = h
249+
250+
h = -in.toLong
251+
h ^= (h >> 23)
252+
h *= 0x2127599bf4325c37L
253+
h ^= h >> 47
254+
t(1) = h
255+
256+
h = in.toLong + featureSize
257+
h ^= (h >> 23)
258+
h *= 0x2127599bf4325c37L
259+
h ^= h >> 47
260+
t(2) = h
261+
262+
h = -in.toLong - featureSize
263+
h ^= (h >> 23)
264+
h *= 0x2127599bf4325c37L
265+
h ^= h >> 47
266+
t(3) = h
267+
268+
h = in.toLong + 2 * featureSize
269+
h ^= (h >> 23)
270+
h *= 0x2127599bf4325c37L
271+
h ^= h >> 47
272+
t(4) = h
273+
274+
h = -in.toLong - 2 * featureSize
275+
h ^= (h >> 23)
276+
h *= 0x2127599bf4325c37L
277+
h ^= h >> 47
278+
t(5) = h
279+
280+
h = in.toLong + 3 * featureSize
281+
h ^= (h >> 23)
282+
h *= 0x2127599bf4325c37L
283+
h ^= h >> 47
284+
t(6) = h
285+
286+
h = -in.toLong - 3 * featureSize
287+
h ^= (h >> 23)
288+
h *= 0x2127599bf4325c37L
289+
h ^= h >> 47
290+
t(7) = h
291+
t
292+
}
293+
294+
def hash(vs: Vector) = {
295+
val s = new Array[Num](hashbits)
296+
val one = 1L
297+
298+
vs.foreachPair {
299+
(i, v) =>
300+
if (v != 0d) {
301+
val h = hashfunc(i)
302+
var k = 0
303+
val hlen = h.length
304+
while (k < hlen) {
305+
var l = 0
306+
while (l < 64) {
307+
if ((h(k) & (one << l)) != 0) {
308+
s(k * 64 + l) += v
309+
} else {
310+
s(k * 64 + l) -= v
311+
}
312+
l += 1
313+
}
314+
k += 1
315+
}
316+
}
317+
}
318+
319+
var j = 0
320+
val indices = ArrayBuffer[Int]()
321+
val values = ArrayBuffer[Num]()
322+
while (j < s.length) {
323+
if (s(j) >= 0) {
324+
indices.append(j)
325+
values.append(1d)
326+
}
327+
j += 1
328+
}
329+
new SparseVector(indices.toArray, values.toArray, hashbits).asInstanceOf[Vector]
330+
}
331+
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy