"SfR Fresh" - the SfR Freeware/Shareware Archive 
Member "spambayes-1.0.4/spambayes/cdb.py" of archive spambayes-1.0.4.zip:
As a special service "SfR Fresh" has tried to format the requested source page into HTML format using source code syntax highlighting with prefixed line numbers.
Alternatively you can here view or download the uninterpreted source code file.
That can be also achieved for any archive member file by clicking within an archive contents listing on the first character of the file(path) respectively on the according byte size field.
1 #! /usr/bin/env python
2 """
3 Dan Bernstein's CDB implemented in Python
4
5 see http://cr.yp.to/cdb.html
6
7 """
8
9 from __future__ import generators
10
11 import os
12 import struct
13 import mmap
14
15 def uint32_unpack(buf):
16 return struct.unpack('<L', buf)[0]
17
18 def uint32_pack(n):
19 return struct.pack('<L', n)
20
21 CDB_HASHSTART = 5381
22
23 def cdb_hash(buf):
24 h = CDB_HASHSTART
25 for c in buf:
26 h = (h + (h << 5)) & 0xffffffffL
27 h ^= ord(c)
28 return h
29
30 class Cdb(object):
31
32 def __init__(self, fp):
33 self.fp = fp
34 fd = fp.fileno()
35 self.size = os.fstat(fd).st_size
36 self.map = mmap.mmap(fd, self.size, access=mmap.ACCESS_READ)
37 self.eod = uint32_unpack(self.map[:4])
38 self.findstart()
39 self.loop = 0 # number of hash slots searched under this key
40 # initialized if loop is nonzero
41 self.khash = 0
42 self.hpos = 0
43 self.hslots = 0
44 # initialized if findnext() returns 1
45 self.dpos = 0
46 self.dlen = 0
47
48 def close(self):
49 self.map.close()
50
51 def __iter__(self, fn=None):
52 len = 2048
53 while len < self.eod:
54 klen, vlen = struct.unpack("<LL", self.map[len:len+8])
55 len += 8
56 key = self.map[len:len+klen]
57 len += klen
58 val = self.map[len:len+vlen]
59 len += vlen
60 if fn:
61 yield fn(key, val)
62 else:
63 yield (key, val)
64
65 def iteritems(self):
66 return self.__iter__()
67
68 def iterkeys(self):
69 return self.__iter__(lambda k,v: k)
70
71 def itervalues(self):
72 return self.__iter__(lambda k,v: v)
73
74 def items(self):
75 ret = []
76 for i in self.iteritems():
77 ret.append(i)
78 return ret
79
80 def keys(self):
81 ret = []
82 for i in self.iterkeys():
83 ret.append(i)
84 return ret
85
86 def values(self):
87 ret = []
88 for i in self.itervalues():
89 ret.append(i)
90 return ret
91
92 def findstart(self):
93 self.loop = 0
94
95 def read(self, n, pos):
96 # XXX add code for platforms without mmap
97 return self.map[pos:pos+n]
98
99 def match(self, key, pos):
100 if key == self.read(len(key), pos):
101 return 1
102 else:
103 return 0
104
105 def findnext(self, key):
106 if not self.loop:
107 u = cdb_hash(key)
108 buf = self.read(8, u << 3 & 2047)
109 self.hslots = uint32_unpack(buf[4:])
110 if not self.hslots:
111 raise KeyError
112 self.hpos = uint32_unpack(buf[:4])
113 self.khash = u
114 u >>= 8
115 u %= self.hslots
116 u <<= 3
117 self.kpos = self.hpos + u
118
119 while self.loop < self.hslots:
120 buf = self.read(8, self.kpos)
121 pos = uint32_unpack(buf[4:])
122 if not pos:
123 raise KeyError
124 self.loop += 1
125 self.kpos += 8
126 if self.kpos == self.hpos + (self.hslots << 3):
127 self.kpos = self.hpos
128 u = uint32_unpack(buf[:4])
129 if u == self.khash:
130 buf = self.read(8, pos)
131 u = uint32_unpack(buf[:4])
132 if u == len(key):
133 if self.match(key, pos + 8):
134 dlen = uint32_unpack(buf[4:])
135 dpos = pos + 8 + len(key)
136 return self.read(dlen, dpos)
137 raise KeyError
138
139 def __getitem__(self, key):
140 self.findstart()
141 return self.findnext(key)
142
143 def get(self, key, default=None):
144 self.findstart()
145 try:
146 return self.findnext(key)
147 except KeyError:
148 return default
149
150 def cdb_dump(infile):
151 """dump a database in djb's cdbdump format"""
152 db = Cdb(infile)
153 for key,value in db.iteritems():
154 print "+%d,%d:%s->%s" % (len(key), len(value), key, value)
155 print
156
157 def cdb_make(outfile, items):
158 pos = 2048
159 tables = {} # { h & 255 : [(h, p)] }
160
161 # write keys and data
162 outfile.seek(pos)
163 for key, value in items:
164 outfile.write(uint32_pack(len(key)) + uint32_pack(len(value)))
165 h = cdb_hash(key)
166 outfile.write(key)
167 outfile.write(value)
168 tables.setdefault(h & 255, []).append((h, pos))
169 pos += 8 + len(key) + len(value)
170
171 final = ''
172 # write hash tables
173 for i in range(256):
174 entries = tables.get(i, [])
175 nslots = 2*len(entries)
176 final += uint32_pack(pos) + uint32_pack(nslots)
177 null = (0, 0)
178 table = [null] * nslots
179 for h, p in entries:
180 n = (h >> 8) % nslots
181 while table[n] is not null:
182 n = (n + 1) % nslots
183 table[n] = (h, p)
184 for h, p in table:
185 outfile.write(uint32_pack(h) + uint32_pack(p))
186 pos += 8
187
188 # write header (pointers to tables and their lengths)
189 outfile.flush()
190 outfile.seek(0)
191 outfile.write(final)
192
193
194 def test():
195 #db = Cdb(open("t"))
196 #print db['one']
197 #print db['two']
198 #print db['foo']
199 #print db['us']
200 #print db.get('ec')
201 #print db.get('notthere')
202 db = open('test.cdb', 'wb')
203 cdb_make(db,
204 [('one', 'Hello'),
205 ('two', 'Goodbye'),
206 ('foo', 'Bar'),
207 ('us', 'United States'),
208 ])
209 db.close()
210 db = Cdb(open("test.cdb", 'rb'))
211 print db['one']
212 print db['two']
213 print db['foo']
214 print db['us']
215 print db.get('ec')
216 print db.get('notthere')
217
218 if __name__ == '__main__':
219 test()