This

entry got me thinking.

Here's my 5 cents. In all cases the construct

if key in dictionary:

value = dictionary[key]

else:

value = default

performed best.

update: I increased the amount of data to reduce the amount of white-noise in the test results, though things haven't changed. An explanation to the output. The programm compares nine data-sets against four test functions. The nine data sets are the combinations of size-distribution between search/dictionary and size-distribution between search and matching keysets ( thereof the small/even/big combinations ).

update2: Why, the version of python is 2.4 of course :)

Here's the output of the programm.

big dict, small search, big intersect

test_has_key 0.9690

test_get 1.0000

test_try 1.2190

test_in 0.5930

big dict, small search, small intersect

test_has_key 0.8750

test_get 1.0320

test_try 6.6250

test_in 0.5150

big dict, small search, half intersect

test_has_key 0.9220

test_get 1.0310

test_try 3.9530

test_in 0.5470

even, big intersect

test_has_key 1.4370

test_get 1.4840

test_try 1.7030

test_in 1.0780

even, small intersect

test_has_key 1.4530

test_get 1.6090

test_try 7.3120

test_in 1.0780

even, half intersect

test_has_key 1.4690

test_get 1.5620

test_try 4.5630

test_in 1.0780

small dict, big search, big intersect

test_has_key 0.1090

test_get 0.1250

test_try 0.7970

test_in 0.0630

small dict, big search, small intersect

test_has_key 1.0790

test_get 1.2180

test_try 7.0160

test_in 0.7340

small dict, big search, half intersect

test_has_key 0.5790

test_get 0.6560

test_try 3.8440

test_in 0.3910

and that's the programm

import sys, time

def random_key( keylen = 10 ):

import random

return ''.join(chr(random.randint(ord('a'),ord('z'))) for _ in range( keylen ))

def new_key( existing_keys ):

key = random_key()

while key in existing_keys:

key = random_key()

return key

def unique_keys( amount=1000, existing_keys=set() ):

keys = set( existing_keys )

for _ in xrange( amount ):

key = new_key( keys )

keys.add( key )

return keys-existing_keys

def data( dictionary_size=1000, search_size=3000, intersection=0.5 ):

dict_keys = unique_keys( dictionary_size )

intersection_amount = int(search_size*intersection)

unique_search_keys = unique_keys( search_size-intersection_amount, dict_keys )

intersection_search_keys = set(list(dict_keys)[0:intersection_amount])

return dict((key,None) for key in dict_keys), unique_search_keys | intersection_search_keys

def core_test( out_prefix, dictionary, searched, test_functions ):

print out_prefix

for function in test_functions:

start = time.time()

for _ in range( 20*(len(dictionary)/len(searched) or 1) ):

function( dictionary, searched, None )

end = time.time()

print '\t%-20s %2.4f'%(function.__name__, end-start)

def inner_test( out_prefix, dictionary_size, search_size, test_functions ):

#big intersection

dictionary, search = data( dictionary_size, search_size, 0.9 )

core_test( '%s, big intersect'%out_prefix, dictionary, search, test_functions )

#small intersection

dictionary, search = data( dictionary_size, search_size, 0.1 )

core_test( '%s, small intersect'%out_prefix, dictionary, search, test_functions )

#half intersection

dictionary, search = data( dictionary_size, search_size, 0.5 )

core_test( '%s, half intersect'%out_prefix, dictionary, search, test_functions )

#test functions

def test_has_key( dictionary, searched, default ):

for key in searched:

if dictionary.has_key( key ):

value = dictionary[key]

else:

value = default

def test_get( dictionary, searched, default ):

for key in searched:

value = dictionary.get(key, default)

def test_try( dictionary, searched, default ):

for key in searched:

try:

value = dictionary[key]

except KeyError:

value = default

def test_in( dictionary, searched, default ):

for key in searched:

if key in dictionary:

value = dictionary[key]

else:

value = default

def test():

factor = 100

test_functions = [test_has_key, test_get, test_try, test_in]

#big dict, small search, small

inner_test( 'big dict, small search', 1000*factor, 1*factor, test_functions )

#even dict/search

inner_test( 'even', 1000*factor, 1000*factor, test_functions )

#small dict, big search

inner_test( 'small dict, big search', 1*factor, 1000*factor, test_functions )

if __name__ == '__main__':

test()