Why is this slower?
nn
pruebauno at latinmail.com
Mon Oct 5 13:22:52 EDT 2009
On Oct 5, 12:46 pm, Joseph Reagle <rea... at mit.edu> wrote:
> I would think the commented code would be faster (fewer loops), but it is
> not (because of function calls).
>
> #Average user_time = 5.9975 over 4 iterations
> inSRC = set([bio.name for bio in bios.values()])
> inEB = set([bio.name for bio in bios.values() if bio.eb_title])
> inWP = set([bio.name for bio in bios.values() if bio.wp_title])
> inBoth = inEB & inWP
> missingEB = inSRC - inEB
> missingWP = inSRC - inWP
> missingBoth = missingEB & missingWP
> avg_size_ratio = find_average(
> [bio.wp_ratio for bio in bios.values() if bio.wp_wc and bio.eb_wc])
> mdn_size_ratio = find_median(
> [bio.wp_ratio for bio in bios.values() if bio.wp_wc and bio.eb_wc])
> SRCfem = set([bio.name for bio in bios.values() if bio.gender
> == 'female'])
> EBfem = set([bio.name for bio in bios.values() if bio.eb_gender
> == 'female'])
> WPfem = set([bio.name for bio in bios.values() if bio.wp_gender
> == 'female'])
> SRCmale = set([bio.name for bio in bios.values() if bio.gender
> == 'male'])
> EBmale = set([bio.name for bio in bios.values() if bio.eb_gender
> == 'male'])
> WPmale = set([bio.name for bio in bios.values() if bio.wp_gender
> == 'male'])
> SRCun = set([bio.name for bio in bios.values() if bio.gender
> == 'unknown'])
> EBun = set([bio.name for bio in bios.values() if bio.eb_gender
> == 'unknown'])
> WPun = set([bio.name for bio in bios.values() if bio.wp_gender
> == 'unknown'])
>
> ##Average user_time = 6.0025 over 4 iterations
> #def set_amend(obj, bio):
> #if obj == None:
> #obj = set([])
> #obj.add(bio.name)
> #return obj
>
> #inSRC = set([])
> #inSRC = set([])
> #inEB = set([])
> #inWP = set([])
> #SRCfem = set([])
> #EBfem = set([])
> #WPfem = set([])
> #SRCmale = set([])
> #EBmale = set([])
> #WPmale = set([])
> #SRCun = set([])
> #EBun = set([])
> #WPun = set([])
>
> #for bio in bios.values():
> ### use a function that takes set name (creates one) and conditional
> #inSRC = set_amend(inSRC, bio)
> #if bio.eb_title: inEB = set_amend(inEB, bio)
> #if bio.wp_title: inWP = set_amend(inWP, bio)
> #if bio.gender == 'female': SRCfem = set_amend(SRCfem, bio)
> #if bio.eb_gender == 'female': EBfem = set_amend(EBfem, bio)
> #if bio.wp_gender == 'female': WPfem = set_amend(WPfem,bio)
> #if bio.gender == 'male': SRCmale = set_amend(SRCmale, bio)
> #if bio.eb_gender == 'male': EBmale = set_amend(EBmale, bio)
> #if bio.wp_gender == 'male': WPmale = set_amend(WPmale, bio)
> #if bio.gender == 'unknown': SRCun = set_amend(SRCun, bio)
> #if bio.eb_gender == 'unknown': EBun = set_amend(EBun, bio)
> #if bio.wp_gender == 'unknown': WPun = set_amend(WPun, bio)
> #inBoth = inEB & inWP
> #missingEB = inSRC - inEB
> #missingWP = inSRC - inWP
> #missingBoth = missingEB & missingWP
> #avg_size_ratio = find_average(
> #[bio.wp_ratio for bio in bios.values() if bio.wp_wc and bio.eb_wc])
> #mdn_size_ratio = find_median(
> #[bio.wp_ratio for bio in bios.values() if bio.wp_wc and bio.eb_wc])
Not only are you doing many function calls but you are assigning 12
objects each time. Why not do this?
for bio in bios.values():
inSRC.add(bio)
if bio.eb_title: inEB.add(bio)
if bio.wp_title: inWP.add(bio)
if bio.gender == 'female': SRCfem.add(bio)
if bio.eb_gender == 'female': EBfem.add(bio)
if bio.wp_gender == 'female': WPfem.add(bio)
if bio.gender == 'male': SRCmale.add(bio)
if bio.eb_gender == 'male': EBmale.add(bio)
if bio.wp_gender == 'male': WPmale.add(bio)
if bio.gender == 'unknown': SRCun.add(bio)
if bio.eb_gender == 'unknown': EBun.add(bio)
if bio.wp_gender == 'unknown': WPun.add(bio)
More information about the Python-list
mailing list