key/value store optimized for disk storage
Emile van Sebille
emile at fenx.com
Fri May 4 15:14:18 EDT 2012
On 5/4/2012 10:46 AM Tim Chase said...
I hit a few snags testing this on my winxp w/python2.6.1 in that getsize
wasn't finding the file as it was created in two parts with .dat and
.dir extension.
Also, setting key failed as update returns None.
The changes I needed to make are marked below.
Emile
> import os
> import hashlib
> import random
> from string import letters
>
> import anydbm
>
> KB = 1024
> MB = KB * KB
> GB = MB * KB
> DESIRED_SIZE = 1 * GB
> KEYS_TO_SAMPLE = 20
> FNAME = "mydata.db"
FDATNAME = r"mydata.db.dat"
>
> i = 0
> md5 = hashlib.md5()
> db = anydbm.open(FNAME, 'c')
> try:
> print("Generating junk data...")
> while os.path.getsize(FNAME)< 6*GB:
while os.path.getsize(FDATNAME) < 6*GB:
> key = md5.update(str(i))[:16]
md5.update(str(i))
key = md5.hexdigest()[:16]
> size = random.randrange(1*KB, 4*KB)
> value = ''.join(random.choice(letters)
> for _ in range(size))
> db[key] = value
> i += 1
> print("Gathering %i sample keys" % KEYS_TO_SAMPLE)
> keys_of_interest = random.sample(db.keys(), KEYS_TO_SAMPLE)
> finally:
> db.close()
>
> print("Reopening for a cold sample set in case it matters")
> db = anydbm.open(FNAME)
> try:
> print("Performing %i lookups")
> for key in keys_of_interest:
> v = db[key]
> print("Done")
> finally:
> db.close()
>
More information about the Python-list
mailing list