This
entry got me thinking.
Here's my 5 cents. In all cases the construct
if key in dictionary:
value = dictionary[key]
else:
value = default
performed best.
update: I increased the amount of data to reduce the amount of white-noise in the test results, though things haven't changed. An explanation to the output. The programm compares nine data-sets against four test functions. The nine data sets are the combinations of size-distribution between search/dictionary and size-distribution between search and matching keysets ( thereof the small/even/big combinations ).
update2: Why, the version of python is 2.4 of course :)
Here's the output of the programm.
big dict, small search, big intersect
test_has_key 0.9690
test_get 1.0000
test_try 1.2190
test_in 0.5930
big dict, small search, small intersect
test_has_key 0.8750
test_get 1.0320
test_try 6.6250
test_in 0.5150
big dict, small search, half intersect
test_has_key 0.9220
test_get 1.0310
test_try 3.9530
test_in 0.5470
even, big intersect
test_has_key 1.4370
test_get 1.4840
test_try 1.7030
test_in 1.0780
even, small intersect
test_has_key 1.4530
test_get 1.6090
test_try 7.3120
test_in 1.0780
even, half intersect
test_has_key 1.4690
test_get 1.5620
test_try 4.5630
test_in 1.0780
small dict, big search, big intersect
test_has_key 0.1090
test_get 0.1250
test_try 0.7970
test_in 0.0630
small dict, big search, small intersect
test_has_key 1.0790
test_get 1.2180
test_try 7.0160
test_in 0.7340
small dict, big search, half intersect
test_has_key 0.5790
test_get 0.6560
test_try 3.8440
test_in 0.3910
and that's the programm
import sys, time
def random_key( keylen = 10 ):
import random
return ''.join(chr(random.randint(ord('a'),ord('z'))) for _ in range( keylen ))
def new_key( existing_keys ):
key = random_key()
while key in existing_keys:
key = random_key()
return key
def unique_keys( amount=1000, existing_keys=set() ):
keys = set( existing_keys )
for _ in xrange( amount ):
key = new_key( keys )
keys.add( key )
return keys-existing_keys
def data( dictionary_size=1000, search_size=3000, intersection=0.5 ):
dict_keys = unique_keys( dictionary_size )
intersection_amount = int(search_size*intersection)
unique_search_keys = unique_keys( search_size-intersection_amount, dict_keys )
intersection_search_keys = set(list(dict_keys)[0:intersection_amount])
return dict((key,None) for key in dict_keys), unique_search_keys | intersection_search_keys
def core_test( out_prefix, dictionary, searched, test_functions ):
print out_prefix
for function in test_functions:
start = time.time()
for _ in range( 20*(len(dictionary)/len(searched) or 1) ):
function( dictionary, searched, None )
end = time.time()
print '\t%-20s %2.4f'%(function.__name__, end-start)
def inner_test( out_prefix, dictionary_size, search_size, test_functions ):
#big intersection
dictionary, search = data( dictionary_size, search_size, 0.9 )
core_test( '%s, big intersect'%out_prefix, dictionary, search, test_functions )
#small intersection
dictionary, search = data( dictionary_size, search_size, 0.1 )
core_test( '%s, small intersect'%out_prefix, dictionary, search, test_functions )
#half intersection
dictionary, search = data( dictionary_size, search_size, 0.5 )
core_test( '%s, half intersect'%out_prefix, dictionary, search, test_functions )
#test functions
def test_has_key( dictionary, searched, default ):
for key in searched:
if dictionary.has_key( key ):
value = dictionary[key]
else:
value = default
def test_get( dictionary, searched, default ):
for key in searched:
value = dictionary.get(key, default)
def test_try( dictionary, searched, default ):
for key in searched:
try:
value = dictionary[key]
except KeyError:
value = default
def test_in( dictionary, searched, default ):
for key in searched:
if key in dictionary:
value = dictionary[key]
else:
value = default
def test():
factor = 100
test_functions = [test_has_key, test_get, test_try, test_in]
#big dict, small search, small
inner_test( 'big dict, small search', 1000*factor, 1*factor, test_functions )
#even dict/search
inner_test( 'even', 1000*factor, 1000*factor, test_functions )
#small dict, big search
inner_test( 'small dict, big search', 1*factor, 1000*factor, test_functions )
if __name__ == '__main__':
test()