I am processing two csv files against another, my first implementation used python list of lists and list.append to generate a new list while looping all the data including the non-relevant data (can't determine location of specific data element in a list of list). So I re-implented the exact same code but using numpy.array's (2d arrays) using numpy.where to prevent looping over an entire dataset needlessly but the numpy.array based code is about 7.6 times slower?<br>
<br>relevant list of list code:<br><blockquote style="margin: 0pt 0pt 0pt 0.8ex; border-left: 1px solid rgb(204, 204, 204); padding-left: 1ex;" class="gmail_quote">starttime = time.clock()<br>#NI_data_list room_eqp_list<br>
NI_data_list_new = []<br>for NI_row in NI_data_list:<br>    treelevel = NI_row[0]<br>    elevation = NI_row[1]<br>    locater = NI_row[2]<br>    area = NI_row[3]<br>    NIroom = NI_row[4]<br>    #Write appropriate equipment models and drawing into new list<br>
    if NIroom != '':<br>        #Write appropriate equipment models and drawing into new list<br>        for row in room_eqp_list:<br>            eqp_room = row[0]<br>            if len(eqp_room) == 5:<br>                eqp_drawing = row[1]<br>
                if NIroom == eqp_room:<br>                    newrow = [int(treelevel)+1,elevation,locater,area,NIroom,eqp_drawing]<br>                    NI_data_list_new.append(newrow)<br>        #Write appropriate piping info into the new list<br>
        for prow in unique_piping_list:<br>            pipe_room = prow[0]<br>            if len(pipe_room) == 5:<br>                pipe_drawing = prow[1]<br>                if pipe_room == NIroom:<br>                    piperow = [int(treelevel)+1,elevation,locater,area,NIroom,pipe_drawing]<br>
                    NI_data_list_new.append(piperow)<br>    #Write appropriate equipment models and drawing into new list<br>    if (locater != '' and NIroom == ''):<br>        #Write appropriate equipment models and drawing into new list<br>
        for row in room_eqp_list:<br>            eqp_locater = row[0]<br>            if len(eqp_locater) == 4:<br>                eqp_drawing = row[1]<br>                if locater == eqp_locater:<br>                    newrow = [int(treelevel)+1,elevation,eqp_locater,area,'',eqp_drawing]<br>
                    NI_data_list_new.append(newrow)<br>        #Write appropriate piping info into the new list<br>        for prow in unique_piping_list:<br>            pipe_locater = prow[0]<br>            if len(pipe_locater) == 4:<br>
                pipe_drawing = prow[1]<br>                if pipe_locater == locater:<br>                    piperow = [int(treelevel)+1,elevation,pipe_locater,area,'',pipe_drawing]<br>                    NI_data_list_new.append(piperow)                <br>
    #Rewrite NI_data to new list<br>    if NIroom == '':<br>        NI_data_list_new.append(NI_row)<br><br>print (time.clock()-starttime)<br></blockquote><br>relevant numpy.array code:<br><blockquote style="margin: 0pt 0pt 0pt 0.8ex; border-left: 1px solid rgb(204, 204, 204); padding-left: 1ex;" class="gmail_quote">
NI_data_write_url = reports_dir + 'NI_data_room2.csv'<br>NI_data_list_file = open(NI_data_write_url, 'wb')<br>NI_data_list_writer = csv.writer(NI_data_list_file, delimiter=',', quotechar='"')<br>
starttime = time.clock()<br>#NI_data_list room_eqp_list<br>NI_data_list_new = numpy.array([['TreeDepth', 'Elevation', 'BuildingLocater', 'Area', 'Room', 'Item']])<br>for NI_row in NI_data_list:<br>
    treelevel = NI_row[0]<br>    elevation = NI_row[1]<br>    locater = NI_row[2]<br>    area = NI_row[3]<br>    NIroom = NI_row[4]<br>    #Write appropriate equipment models and drawing into new array<br>    if NIroom != '':<br>
        #Write appropriate equipment models and drawing into new array<br>        (rowtest, columntest) = numpy.where(room_eqp_list==NIroom)<br>        for row_iter in rowtest:<br>            eqp_room = room_eqp_list[row_iter,0]<br>
            if len(eqp_room) == 5:<br>                eqp_drawing = room_eqp_list[row_iter,1]<br>                if NIroom == eqp_room:<br>                    newrow = numpy.array([[int(treelevel)+1,elevation,locater,area,NIroom,eqp_drawing]])<br>
                    NI_data_list_new = numpy.append(NI_data_list_new, newrow, 0)<br>                <br>        #Write appropriate piping info into the new array<br>        (rowtest, columntest) = numpy.where(unique_room_piping_list==NIroom)<br>
        for row_iter in rowtest: #unique_room_piping_list<br>            pipe_room = unique_room_piping_list[row_iter,0]<br>            if len(pipe_room) == 5:<br>                pipe_drawing = unique_room_piping_list[row_iter,1]<br>
                if pipe_room == NIroom:<br>                    piperow = numpy.array([[int(treelevel)+1,elevation,locater,area,NIroom,pipe_drawing]])<br>                    NI_data_list_new = numpy.append(NI_data_list_new, piperow, 0)<br>
    #Write appropriate equipment models and drawing into new array<br>    if (locater != '' and NIroom == ''):<br>        #Write appropriate equipment models and drawing into new array<br>        (rowtest, columntest) = numpy.where(room_eqp_list==locater)<br>
        for row_iter in rowtest:<br>            eqp_locater = room_eqp_list[row_iter,0]<br>            if len(eqp_locater) == 4:<br>                eqp_drawing = room_eqp_list[row_iter,1]<br>                if locater == eqp_locater:<br>
                    newrow = numpy.array([[int(treelevel)+1,elevation,eqp_locater,area,'',eqp_drawing]])<br>                    NI_data_list_new = numpy.append(NI_data_list_new, newrow, 0)<br>        #Write appropriate piping info into the new array<br>
        (rowtest, columntest) = numpy.where(unique_room_eqp_list==locater)<br>        for row_iter in rowtest:<br>            pipe_locater = unique_room_piping_list[row_iter,0]<br>            if len(pipe_locater) == 4:<br>
                pipe_drawing = unique_room_piping_list[row_iter,1]<br>                if pipe_locater == locater:<br>                    piperow = numpy.array([[int(treelevel)+1,elevation,pipe_locater,area,'',pipe_drawing]])<br>
                    NI_data_list_new = numpy.append(NI_data_list_new, piperow, 0) <br>    #Rewrite NI_data to new list<br>    if NIroom == '':<br>        NI_data_list_new = numpy.append(NI_data_list_new,[NI_row],0)<br>
        <br>print (time.clock()-starttime)<br clear="all"></blockquote><br>some relevant output<br><blockquote style="margin: 0pt 0pt 0pt 0.8ex; border-left: 1px solid rgb(204, 204, 204); padding-left: 1ex;" class="gmail_quote">
>>> print NI_data_list_new<br>[['TreeDepth' 'Elevation' 'BuildingLocater' 'Area' 'Room' 'Item']<br> ['0' '' '1000' '' '' '']<br>
 ['1' '' '1000' '' '' 'docname Rev 0']<br> ..., <br> ['5' '6' '1164' '4' '' 'eqp11 RB, R. surname, 24-NOV-08']<br> ['4' '6' '1164' '4' '' 'anotherdoc Rev A']<br>
 ['0' '' '' '' '' '']]<br></blockquote><br>Is numpy.append so slow? or is the culprit numpy.where?<br><br>Dewald Pieterse<br><br>"A democracy is nothing more than mob rule, where fifty-one percent of the people take away the rights of the other forty-nine." ~ Thomas Jefferson<br>