Blob of strokedb-ruby/lib/data_structures/simple_skiplist.rb (raw blob data)

1 require 'thread'
2 require File.expand_path(File.dirname(__FILE__) + '/../util/class_optimization')
3
4 module StrokeDB
5 # Implements a thread-safe skiplist structure.
6 # Doesn't yield new skiplists
7 class SimpleSkiplist
8 include Enumerable
9
10 DEFAULT_MAXLEVEL = 32
11 DEFAULT_PROBABILITY = 1/Math::E
12
13 attr_accessor :maxlevel, :probability
14
15 def initialize(raw_list = nil, options = {})
16 @maxlevel = options[:maxlevel] || DEFAULT_MAXLEVEL
17 @probability = options[:probability] || DEFAULT_PROBABILITY
18 @head = raw_list && unserialize_list!(raw_list) || new_head
19 @mutex = Mutex.new
20 end
21
22 # Marshal API
23 def marshal_dump
24 raw_list = serialize_list(@head)
25 {
26 :options => {
27 :maxlevel => @maxlevel,
28 :probability => @probability
29 },
30 :raw_list => raw_list
31 }
32 end
33
34 def marshal_load(dumped)
35 initialize(dumped[:raw_list], dumped[:options])
36 self
37 end
38
39 # Tests whether skiplist is empty.
40 #
41 def empty?
42 !node_next(@head, 0)
43 end
44
45 # First key of a non-empty skiplist (nil for empty one)
46 #
47 def first_key
48 first = node_next(@head, 0)
49 return first ? first[1] : nil
50 end
51
52 # Insert a key-value pair. If key already exists,
53 # value will be overwritten.
54 #
55 def insert(key, value, __level = nil)
56 @mutex.synchronize do
57 newlevel = __level || random_level
58 x = node_first
59 level = node_level(x)
60 update = Array.new(level)
61 x = find_with_update(x, level, key, update)
62
63 # rewrite existing key
64 if node_compare(x, key) == 0
65 node_set_value!(x, value)
66 # insert in a middle
67 else
68 level = newlevel
69 newx = new_node(newlevel, key, value)
70 while level > 0
71 level -= 1
72 node_insert_after!(newx, update[level], level)
73 end
74 end
75 end
76 self
77 end
78
79 def find_with_update(x, level, key, update) #:nodoc:
80 while level > 0
81 level -= 1
82 xnext = node_next(x, level)
83 while node_compare(xnext, key) < 0
84 x = xnext
85 xnext = node_next(x, level)
86 end
87 update[level] = x
88 end
89 xnext
90 end
91
92 # Find is thread-safe and requires no mutexes locking.
93 def find_nearest_node(key) #:nodoc:
94 x = node_first
95 level = node_level(x)
96 while level > 0
97 level -= 1
98 xnext = node_next(x, level)
99 while node_compare(xnext, key) <= 0
100 x = xnext
101 xnext = node_next(x, level)
102 end
103 end
104 x
105 end
106
107 declare_optimized_methods(:Java) do
108 # Temporary off due to:
109 # ./vendor/java_inline.rb:19: cannot load Java class javax.tools.ToolProvider (NameError)
110 #
111 # require 'vendor/java_inline'
112 # inline(:Java) do |builder|
113 # builder.package "org.jruby.strokedb"
114 # builder.import "java.lang.reflect.*"
115 # builder.java %{
116 # public static Object find_Java(String key)
117 # {
118 # Object o = new Object();
119 # return o;
120 # /*Class[] param_types = new Class[1];
121 # param_types[0] = String;
122 # Method method = this.getClass().getMethod("find", param_types);
123 # Object[] invokeParam = new Object[1];
124 # invokeParam[0] = key;
125 #
126 # return method.invoke(this, invokeParam);
127 # */
128 # }
129 # }
130 # end
131 end
132
133 declare_optimized_methods(:C, :find_nearest_node, :find_with_update) do
134 require 'rubygems'
135 require 'inline'
136 inline(:C) do |builder|
137 builder.prefix %{
138 static ID i_node_first, i_node_level;
139 #define SS_NODE_NEXT(x, level) (rb_ary_entry(rb_ary_entry(x, 0), level))
140 static int ss_node_compare(VALUE x, VALUE key)
141 {
142 if (x == Qnil) return 1; /* tail */
143 VALUE key1 = rb_ary_entry(x, 1);
144 if (key1 == Qnil) return -1; /* head */
145 return rb_str_cmp(key1, key);
146 }
147 }
148 builder.add_to_init %{
149 i_node_first = rb_intern("node_first");
150 i_node_level = rb_intern("node_level");
151 }
152 builder.c %{
153 VALUE find_nearest_node_C(VALUE key)
154 {
155 VALUE x = rb_funcall(self, i_node_first, 0);
156 long level = FIX2LONG(rb_funcall(self, i_node_level, 1, x));
157 VALUE xnext;
158 while (level-- > 0)
159 {
160 xnext = SS_NODE_NEXT(x, level);
161 while (ss_node_compare(xnext, key) <= 0)
162 {
163 x = xnext;
164 xnext = SS_NODE_NEXT(x, level);
165 }
166 }
167 return x;
168 }
169 }
170 builder.c %{
171 static VALUE find_with_update_C(VALUE x, VALUE rlevel, VALUE key, VALUE update)
172 {
173 long level = FIX2LONG(rlevel);
174 VALUE xnext;
175 while (level-- > 0)
176 {
177 xnext = SS_NODE_NEXT(x, level);
178 while (ss_node_compare(xnext, key) < 0)
179 {
180 x = xnext;
181 xnext = SS_NODE_NEXT(x, level);
182 }
183 rb_ary_store(update, level, x);
184 }
185 return xnext;
186 }
187 }
188 end
189 end
190
191 # Finds a value with a nearest key to given key (from the left).
192 # For a set of keys [b, d, f], query "a" will return nil and query "c"
193 # will return a value under "b" key.
194 #
195 def find_nearest(key)
196 node_value(find_nearest_node(key))
197 end
198
199 # Returns value, associated with key. nil if key is not found.
200 #
201 def find(key)
202 x = find_nearest_node(key)
203 return node_value(x) if node_compare(x, key) == 0
204 nil # nothing found
205 end
206
207 def each #:nodoc:
208 x = node_next(node_first, 0)
209 while x
210 yield(x)
211 x = node_next(x, 0)
212 end
213 self
214 end
215
216 # Constructs a skiplist from a hash values.
217 #
218 def self.from_hash(hash, options = {})
219 from_a(hash.to_a, options)
220 end
221
222 # Constructs a skiplist from an array of key-value tuples (arrays).
223 #
224 def self.from_a(ary, options = {})
225 sl = new(nil, options)
226 ary.each do |kv|
227 sl.insert(kv[0], kv[1])
228 end
229 sl
230 end
231
232 # Converts skiplist to an array of key-value pairs.
233 #
234 def to_a
235 inject([]) do |arr, node|
236 arr << node[1, 2] # key, data
237 arr
238 end
239 end
240
241 private
242
243 def serialize_list(head)
244 head = node_first.dup
245 head[0] = [ nil ] * node_level(head)
246 raw_list = [ head ]
247 prev_by_levels = [ head ] * node_level(head)
248 x = node_next(head, 0)
249 i = 1
250 while x
251 l = node_level(x)
252 nx = node_next(x, 0)
253 x = x.dup # make modification-safe copy of node
254 forwards = x[0]
255 while l > 0 # for each node level update forwards
256 l -= 1
257 prev_by_levels[l][l] = i # set raw_list's index as a forward ref
258 forwards[l] = nil # nullify forward pointer (point to tail)
259 prev_by_levels[l] = x # set in a previous stack
260 end
261 raw_list << x # store serialized node in an array
262 x = nx # step to next node
263 i += 1 # increment index in a raw_list array
264 end
265 raw_list
266 end
267
268 # Returns head of an imported skiplist.
269 # Caution: raw_list is modified (thus the bang).
270 # Pass dup-ed value if you need.
271 def unserialize_list!(raw_list)
272 x = raw_list[0]
273 while x != nil
274 forwards = x[0]
275 forwards.each_with_index do |rawindex, i|
276 forwards[i] = rawindex ? raw_list[rawindex] : nil
277 end
278 # go next node
279 x = forwards[0]
280 end
281 # return head
282 raw_list[0]
283 end
284
285 # C-style API for node operations
286 def node_first
287 @head
288 end
289
290 def node_level(x)
291 x[0].size
292 end
293
294 def node_next(x, level)
295 x[0][level]
296 end
297
298 def node_compare(x, key)
299 return 1 unless x # tail
300 return -1 unless x[1] # head
301 x[1] <=> key
302 end
303
304 def node_value(x)
305 x[2]
306 end
307
308 def node_set_value!(x, value)
309 x[2] = value
310 end
311
312 def node_insert_after!(x, prev, level)
313 x[0][level] = prev[0][level]
314 prev[0][level] = x
315 end
316
317 def new_node(level, key, value)
318 [
319 [nil]*level,
320 key,
321 value
322 ]
323 end
324
325 def new_head
326 new_node(@maxlevel, nil, nil)
327 end
328
329 def random_level
330 p = @probability
331 m = @maxlevel
332 l = 1
333 l += 1 while rand < p && l < m
334 return l
335 end
336
337 end
338 end
339
340 if __FILE__ == $0
341
342 require 'benchmark'
343 include StrokeDB
344
345 puts "Serialization techniques"
346
347 len = 2_000
348 array = (1..len).map{ [rand(len).to_s]*2 }
349 biglist = SimpleSkiplist.from_a(array)
350 dumped = biglist.marshal_dump
351
352 Benchmark.bm(17) do |x|
353 # First technique: to_a/from_a
354 GC.start
355 x.report("SimpleSkiplist#to_a ") do
356 biglist.to_a
357 biglist.to_a
358 biglist.to_a
359 biglist.to_a
360 biglist.to_a
361 end
362 GC.start
363 x.report("SimpleSkiplist.from_a ") do
364 SimpleSkiplist.from_a(array)
365 SimpleSkiplist.from_a(array)
366 SimpleSkiplist.from_a(array)
367 SimpleSkiplist.from_a(array)
368 SimpleSkiplist.from_a(array)
369 end
370
371 # Another technique: Marshal.dump
372 GC.start
373 x.report("SimpleSkiplist#marshal_dump ") do
374 biglist.marshal_dump
375 biglist.marshal_dump
376 biglist.marshal_dump
377 biglist.marshal_dump
378 biglist.marshal_dump
379 end
380 GC.start
381 x.report("SimpleSkiplist#marshal_load ") do
382 SimpleSkiplist.allocate.marshal_load(dumped.dup)
383 SimpleSkiplist.allocate.marshal_load(dumped.dup)
384 SimpleSkiplist.allocate.marshal_load(dumped.dup)
385 SimpleSkiplist.allocate.marshal_load(dumped.dup)
386 SimpleSkiplist.allocate.marshal_load(dumped.dup)
387 end
388 end
389
390 puts
391 puts "Find/insert techniques"
392 Benchmark.bm(32) do |x|
393 langs = [:C] if RUBY_PLATFORM !~ /java/
394 langs = [:Java] if RUBY_PLATFORM =~ /java/
395 SimpleSkiplist.with_optimizations(langs) do |lang|
396 GC.start
397 x.report("SimpleSkiplist#find #{lang}".ljust(32)) do
398 100.times do
399 key = rand(len).to_s
400 biglist.find(key)
401 biglist.find(key)
402 biglist.find(key)
403 biglist.find(key)
404 biglist.find(key)
405 end
406 end
407 GC.start
408 x.report("SimpleSkiplist#insert #{lang}".ljust(32)) do
409 100.times do
410 key = rand(len).to_s
411 biglist.insert(key, key)
412 key = rand(len).to_s
413 biglist.insert(key, key)
414 key = rand(len).to_s
415 biglist.insert(key, key)
416 key = rand(len).to_s
417 biglist.insert(key, key)
418 key = rand(len).to_s
419 biglist.insert(key, key)
420 end
421 end
422 end
423 end
424 end