[Numpy-discussion] numpy.append & numpy.where vs list.append and brute iterative for loop

Dewald Pieterse dewald.pieterse at gmail.com
Thu Jan 27 16:03:22 EST 2011


I am processing two csv files against another, my first implementation used
python list of lists and list.append to generate a new list while looping
all the data including the non-relevant data (can't determine location of
specific data element in a list of list). So I re-implented the exact same
code but using numpy.array's (2d arrays) using numpy.where to prevent
looping over an entire dataset needlessly but the numpy.array based code is
about 7.6 times slower?

relevant list of list code:

> starttime = time.clock()
> #NI_data_list room_eqp_list
> NI_data_list_new = []
> for NI_row in NI_data_list:
>     treelevel = NI_row[0]
>     elevation = NI_row[1]
>     locater = NI_row[2]
>     area = NI_row[3]
>     NIroom = NI_row[4]
>     #Write appropriate equipment models and drawing into new list
>     if NIroom != '':
>         #Write appropriate equipment models and drawing into new list
>         for row in room_eqp_list:
>             eqp_room = row[0]
>             if len(eqp_room) == 5:
>                 eqp_drawing = row[1]
>                 if NIroom == eqp_room:
>                     newrow =
> [int(treelevel)+1,elevation,locater,area,NIroom,eqp_drawing]
>                     NI_data_list_new.append(newrow)
>         #Write appropriate piping info into the new list
>         for prow in unique_piping_list:
>             pipe_room = prow[0]
>             if len(pipe_room) == 5:
>                 pipe_drawing = prow[1]
>                 if pipe_room == NIroom:
>                     piperow =
> [int(treelevel)+1,elevation,locater,area,NIroom,pipe_drawing]
>                     NI_data_list_new.append(piperow)
>     #Write appropriate equipment models and drawing into new list
>     if (locater != '' and NIroom == ''):
>         #Write appropriate equipment models and drawing into new list
>         for row in room_eqp_list:
>             eqp_locater = row[0]
>             if len(eqp_locater) == 4:
>                 eqp_drawing = row[1]
>                 if locater == eqp_locater:
>                     newrow =
> [int(treelevel)+1,elevation,eqp_locater,area,'',eqp_drawing]
>                     NI_data_list_new.append(newrow)
>         #Write appropriate piping info into the new list
>         for prow in unique_piping_list:
>             pipe_locater = prow[0]
>             if len(pipe_locater) == 4:
>                 pipe_drawing = prow[1]
>                 if pipe_locater == locater:
>                     piperow =
> [int(treelevel)+1,elevation,pipe_locater,area,'',pipe_drawing]
>                     NI_data_list_new.append(piperow)
>     #Rewrite NI_data to new list
>     if NIroom == '':
>         NI_data_list_new.append(NI_row)
>
> print (time.clock()-starttime)
>

relevant numpy.array code:

> NI_data_write_url = reports_dir + 'NI_data_room2.csv'
> NI_data_list_file = open(NI_data_write_url, 'wb')
> NI_data_list_writer = csv.writer(NI_data_list_file, delimiter=',',
> quotechar='"')
> starttime = time.clock()
> #NI_data_list room_eqp_list
> NI_data_list_new = numpy.array([['TreeDepth', 'Elevation',
> 'BuildingLocater', 'Area', 'Room', 'Item']])
> for NI_row in NI_data_list:
>     treelevel = NI_row[0]
>     elevation = NI_row[1]
>     locater = NI_row[2]
>     area = NI_row[3]
>     NIroom = NI_row[4]
>     #Write appropriate equipment models and drawing into new array
>     if NIroom != '':
>         #Write appropriate equipment models and drawing into new array
>         (rowtest, columntest) = numpy.where(room_eqp_list==NIroom)
>         for row_iter in rowtest:
>             eqp_room = room_eqp_list[row_iter,0]
>             if len(eqp_room) == 5:
>                 eqp_drawing = room_eqp_list[row_iter,1]
>                 if NIroom == eqp_room:
>                     newrow =
> numpy.array([[int(treelevel)+1,elevation,locater,area,NIroom,eqp_drawing]])
>                     NI_data_list_new = numpy.append(NI_data_list_new,
> newrow, 0)
>
>         #Write appropriate piping info into the new array
>         (rowtest, columntest) =
> numpy.where(unique_room_piping_list==NIroom)
>         for row_iter in rowtest: #unique_room_piping_list
>             pipe_room = unique_room_piping_list[row_iter,0]
>             if len(pipe_room) == 5:
>                 pipe_drawing = unique_room_piping_list[row_iter,1]
>                 if pipe_room == NIroom:
>                     piperow =
> numpy.array([[int(treelevel)+1,elevation,locater,area,NIroom,pipe_drawing]])
>                     NI_data_list_new = numpy.append(NI_data_list_new,
> piperow, 0)
>     #Write appropriate equipment models and drawing into new array
>     if (locater != '' and NIroom == ''):
>         #Write appropriate equipment models and drawing into new array
>         (rowtest, columntest) = numpy.where(room_eqp_list==locater)
>         for row_iter in rowtest:
>             eqp_locater = room_eqp_list[row_iter,0]
>             if len(eqp_locater) == 4:
>                 eqp_drawing = room_eqp_list[row_iter,1]
>                 if locater == eqp_locater:
>                     newrow =
> numpy.array([[int(treelevel)+1,elevation,eqp_locater,area,'',eqp_drawing]])
>                     NI_data_list_new = numpy.append(NI_data_list_new,
> newrow, 0)
>         #Write appropriate piping info into the new array
>         (rowtest, columntest) = numpy.where(unique_room_eqp_list==locater)
>         for row_iter in rowtest:
>             pipe_locater = unique_room_piping_list[row_iter,0]
>             if len(pipe_locater) == 4:
>                 pipe_drawing = unique_room_piping_list[row_iter,1]
>                 if pipe_locater == locater:
>                     piperow =
> numpy.array([[int(treelevel)+1,elevation,pipe_locater,area,'',pipe_drawing]])
>                     NI_data_list_new = numpy.append(NI_data_list_new,
> piperow, 0)
>     #Rewrite NI_data to new list
>     if NIroom == '':
>         NI_data_list_new = numpy.append(NI_data_list_new,[NI_row],0)
>
> print (time.clock()-starttime)
>

some relevant output

> >>> print NI_data_list_new
> [['TreeDepth' 'Elevation' 'BuildingLocater' 'Area' 'Room' 'Item']
>  ['0' '' '1000' '' '' '']
>  ['1' '' '1000' '' '' 'docname Rev 0']
>  ...,
>  ['5' '6' '1164' '4' '' 'eqp11 RB, R. surname, 24-NOV-08']
>  ['4' '6' '1164' '4' '' 'anotherdoc Rev A']
>  ['0' '' '' '' '' '']]
>

Is numpy.append so slow? or is the culprit numpy.where?

Dewald Pieterse

"A democracy is nothing more than mob rule, where fifty-one percent of the
people take away the rights of the other forty-nine." ~ Thomas Jefferson
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/numpy-discussion/attachments/20110127/df4f05d2/attachment.html>


More information about the NumPy-Discussion mailing list