[Tutor] Huge list comprehension
Abdur-Rahmaan Janhangeer
arj.python at gmail.com
Sat Jun 10 03:35:03 EDT 2017
take a look at numpy
and don't necessarily give us the whole code. it becomes too long without
purpose
Abdur-Rahmaan Janhangeer,
Mauritius
abdurrahmaanjanhangeer.wordpress.com
On 6 Jun 2017 03:26, "syed zaidi" <syedzaidi85 at hotmail.co.uk> wrote:
hi,
I would appreciate if you can help me suggesting a quick and efficient
strategy for comparing multiple lists with one principal list
I have about 125 lists containing about 100,000 numerical entries in each
my principal list contains about 6 million entries.
I want to compare each small list with main list and append yes/no or 0/1
in each new list corresponding to each of 125 lists
The program is working but it takes ages to process huge files,
Can someone pleases tell me how can I make this process fast. Right now it
takes arounf 2 weeks to complete this task
the code I have written and is working is as under:
sample_name = []
main_op_list,principal_list = [],[]
dictionary = {}
with open("C:/Users/INVINCIBLE/Desktop/T2D_ALL_blastout_batch.txt", 'r') as
f:
reader = csv.reader(f, dialect = 'excel', delimiter='\t')
list2 = filter(None, reader)
for i in range(len(list2)):
col1 = list2[i][0]
operon = list2[i][1]
main_op_list.append(operon)
col1 = col1.strip().split("_")
sample_name = col1[0]
if dictionary.get(sample_name):
dictionary[sample_name].append(operon)
else:
dictionary[sample_name] = []
dictionary[sample_name].append(operon)
locals().update(dictionary) ## converts dictionary keys to variables
##print DLF004
dict_values = dictionary.values()
dict_keys = dictionary.keys()
print dict_keys
print len(dict_keys)
main_op_list_np = np.array(main_op_list)
DLF002_1,DLF004_1,DLF005_1,DLF006_1,DLF007_1,DLF008_1,
DLF009_1,DLF010_1,DLF012_1,DLF013_1,DLF014_1,DLM001_1,
DLM002_1,DLM003_1,DLM004_1,DLM005_1,DLM006_1,DLM009_1,
DLM011_1,DLM012_1,DLM018_1,DOF002_1,DOF003_1 =[],[],[],[],[],[],[],[],[],[]
,[],[],[],[],[],[],[],[],[],[],[],[],[]
DOF004_1,DOF006_1,DOF007_1,DOF008_1,DOF009_1,DOF010_1,
DOF011_1,DOF012_1,DOF013_1,DOF014_1,DOM001_1,DOM003_1,
DOM005_1,DOM008_1,DOM010_1,DOM012_1,DOM013_1,DOM014_1,
DOM015_1,DOM016_1,DOM017_1,DOM018_1,DOM019_1 =[],[],[],[],[],[],[],[],[],[]
,[],[],[],[],[],[],[],[],[],[],[],[],[]
DOM020_1,DOM021_1,DOM022_1,DOM023_1,DOM024_1,DOM025_1,DOM026_1 =
[],[],[],[],[],[],[]
NLF001_1,NLF002_1,NLF005_1,NLF006_1,NLF007_1,NLF008_1,
NLF009_1,NLF010_1,NLF011_1,NLF012_1,NLF013_1,NLF014_1,
NLF015_1,NLM001_1,NLM002_1,NLM003_1,NLM004_1,NLM005_1,
NLM006_1,NLM007_1,NLM008_1,NLM009_1,NLM010_1 =[],[],[],[],[],[],[],[],[],[]
,[],[],[],[],[],[],[],[],[],[],[],[],[]
NLM015_1,NLM016_1,NLM017_1,NLM021_1,NLM022_1,NLM023_1,
NLM024_1,NLM025_1,NLM026_1,NLM027_1,NLM028_1,NLM029_1,
NLM031_1,NLM032_1,NOF001_1,NOF002_1,NOF004_1,NOF005_1,
NOF006_1,NOF007_1,NOF008_1,NOF009_1,NOF010_1 =[],[],[],[],[],[],[],[],[],[]
,[],[],[],[],[],[],[],[],[],[],[],[],[]
NOF011_1,NOF012_1,NOF013_1,NOF014_1,NOM001_1,NOM002_1,
NOM004_1,NOM005_1,NOM007_1,NOM008_1,NOM009_1,NOM010_1,
NOM012_1,NOM013_1,NOM015_1,NOM016_1,NOM017_1,NOM018_1,
NOM019_1,NOM020_1,NOM022_1,NOM023_1,NOM025_1 =[],[],[],[],[],[],[],[],[],[]
,[],[],[],[],[],[],[],[],[],[],[],[],[]
NOM026_1,NOM027_1,NOM028_1,NOM029_1 = [],[],[],[]
for i in main_op_list_np:
if i in DLF002: DLF002_1.append('1')
else:DLF002_1.append('0')
if i in DLF004: DLF004_1.append('1')
else:DLF004_1.append('0')
if i in DLF005: DLF005_1.append('1')
else:DLF005_1.append('0')
if i in DLF006: DLF006_1.append('1')
else:DLF006_1.append('0')
if i in DLF007: DLF007_1.append('1')
else:DLF007_1.append('0')
if i in DLF008: DLF008_1.append('1')
else:DLF008_1.append('0')
## if main_op_list[i] in DLF009: DLF009_1.append('1')
## else:DLF009_1.append('0')
if i in DLF010: DLF010_1.append('1')
else:DLF010_1.append('0')
if i in DLF012: DLF012_1.append('1')
else:DLF012_1.append('0')
if i in DLF013: DLF013_1.append('1')
else:DLF013_1.append('0')
if i in DLF014: DLF014_1.append('1')
else:DLF014_1.append('0')
if i in DLM001: DLM001_1.append('1')
else:DLM001_1.append('0')
if i in DLM002: DLM002_1.append('1')
else:DLM002_1.append('0')
if i in DLM003: DLM003_1.append('1')
else:DLM003_1.append('0')
if i in DLM004: DLM004_1.append('1')
else:DLM004_1.append('0')
if i in DLM005: DLM005_1.append('1')
else:DLM005_1.append('0')
if i in DLM006: DLM006_1.append('1')
else:DLM006_1.append('0')
if i in DLM009: DLM009_1.append('1')
else:DLM009_1.append('0')
if i in DLM011: DLM011_1.append('1')
else:DLM011_1.append('0')
if i in DLM012: DLM012_1.append('1')
else:DLM012_1.append('0')
if i in DLM018: DLM018_1.append('1')
else:DLM018_1.append('0')
if i in DOF002: DOF002_1.append('1')
else:DOF002_1.append('0')
if i in DOF003: DOF003_1.append('1')
else:DOF003_1.append('0')
if i in DOF004: DOF004_1.append('1')
else:DOF004_1.append('0')
if i in DOF006: DOF006_1.append('1')
else:DOF006_1.append('0')
if i in DOF007: DOF007_1.append('1')
else:DOF007_1.append('0')
if i in DOF008: DOF008_1.append('1')
else:DOF008_1.append('0')
if i in DOF009: DOF009_1.append('1')
else:DOF009_1.append('0')
if i in DOF010: DOF010_1.append('1')
else:DOF010_1.append('0')
if i in DOF011: DOF011_1.append('1')
else:DOF011_1.append('0')
if i in DOF012: DOF012_1.append('1')
else:DOF012_1.append('0')
if i in DOF013: DOF013_1.append('1')
else:DOF013_1.append('0')
if i in DOF014: DOF014_1.append('1')
else:DOF014_1.append('0')
if i in DOM001: DOM001_1.append('1')
else:DOM001_1.append('0')
if i in DOM003: DOM003_1.append('1')
else:DOM003_1.append('0')
if i in DOM005: DOM005_1.append('1')
else:DOM005_1.append('0')
if i in DOM008: DOM008_1.append('1')
else:DOM008_1.append('0')
if i in DOM010: DOM010_1.append('1')
else:DOM010_1.append('0')
if i in DOM012: DOM012_1.append('1')
else:DOM012_1.append('0')
if i in DOM013: DOM013_1.append('1')
else:DOM013_1.append('0')
if i in DOM014: DOM014_1.append('1')
else:DOM014_1.append('0')
if i in DOM015: DOM015_1.append('1')
else:DOM015_1.append('0')
if i in DOM016: DOM016_1.append('1')
else:DOM016_1.append('0')
if i in DOM017: DOM017_1.append('1')
else:DOM017_1.append('0')
if i in DOM018: DOM018_1.append('1')
else:DOM018_1.append('0')
if i in DOM019: DOM019_1.append('1')
else:DOM019_1.append('0')
if i in DOM020: DOM020_1.append('1')
else:DOM020_1.append('0')
if i in DOM021: DOM021_1.append('1')
else:DOM021_1.append('0')
if i in DOM022: DOM022_1.append('1')
else:DOM022_1.append('0')
if i in DOM023: DOM023_1.append('1')
else:DOM023_1.append('0')
if i in DOM024: DOM024_1.append('1')
else:DOM024_1.append('0')
if i in DOM025: DOM025_1.append('1')
else:DOM025_1.append('0')
if i in DOM026: DOM026_1.append('1')
else:DOM026_1.append('0')
if i in NLF001: NLF001_1.append(' | 1')
else:NLF001_1.append(' | 0')
if i in NLF002: NLF002_1.append('1')
else:NLF002_1.append('0')
if i in NLF005: NLF005_1.append('1')
else:NLF005_1.append('0')
if i in NLF006: NLF006_1.append('1')
else:NLF006_1.append('0')
if i in NLF007: NLF007_1.append('1')
else:NLF007_1.append('0')
if i in NLF008: NLF008_1.append('1')
else:NLF008_1.append('0')
if i in NLF009: NLF009_1.append('1')
else:NLF009_1.append('0')
if i in NLF010: NLF010_1.append('1')
else:NLF010_1.append('0')
if i in NLF011: NLF011_1.append('1')
else:NLF011_1.append('0')
if i in NLF012: NLF012_1.append('1')
else:NLF012_1.append('0')
if i in NLF013: NLF013_1.append('1')
else:NLF013_1.append('0')
if i in NLF014: NLF014_1.append('1')
else:NLF014_1.append('0')
if i in NLF015: NLF015_1.append('1')
else:NLF015_1.append('0')
if i in NLM001: NLM001_1.append('1')
else:NLM001_1.append('0')
if i in NLM002: NLM002_1.append('1')
else:NLM002_1.append('0')
if i in NLM003: NLM003_1.append('1')
else:NLM003_1.append('0')
if i in NLM004: NLM004_1.append('1')
else:NLM004_1.append('0')
if i in NLM005: NLM005_1.append('1')
else:NLM005_1.append('0')
if i in NLM006: NLM006_1.append('1')
else:NLM006_1.append('0')
if i in NLM007: NLM007_1.append('1')
else:NLM007_1.append('0')
if i in NLM008: NLM008_1.append('1')
else:NLM008_1.append('0')
if i in NLM009: NLM009_1.append('1')
else:NLM009_1.append('0')
if i in NLM010: NLM010_1.append('1')
else:NLM010_1.append('0')
if i in NLM015: NLM015_1.append('1')
else:NLM015_1.append('0')
if i in NLM016: NLM016_1.append('1')
else:NLM016_1.append('0')
if i in NLM017: NLM017_1.append('1')
else:NLM017_1.append('0')
if i in NLM021: NLM021_1.append('1')
else:NLM021_1.append('0')
if i in NLM022: NLM022_1.append('1')
else:NLM022_1.append('0')
if i in NLM023: NLM023_1.append('1')
else:NLM023_1.append('0')
if i in NLM024: NLM024_1.append('1')
else:NLM024_1.append('0')
if i in NLM025: NLM025_1.append('1')
else:NLM025_1.append('0')
if i in NLM026: NLM026_1.append('1')
else:NLM026_1.append('0')
if i in NLM027: NLM027_1.append('1')
else:NLM027_1.append('0')
if i in NLM028: NLM028_1.append('1')
else:NLM028_1.append('0')
if i in NLM029: NLM029_1.append('1')
else:NLM029_1.append('0')
if i in NLM031: NLM031_1.append('1')
else:NLM031_1.append('0')
if i in NLM032: NLM032_1.append('1')
else:NLM032_1.append('0')
if i in NOF001: NOF001_1.append('1')
else:NOF001_1.append('0')
if i in NOF002: NOF002_1.append('1')
else:NOF002_1.append('0')
if i in NOF004: NOF004_1.append('1')
else:NOF004_1.append('0')
if i in NOF005: NOF005_1.append('1')
else:NOF005_1.append('0')
if i in NOF006: NOF006_1.append('1')
else:NOF006_1.append('0')
if i in NOF007: NOF007_1.append('1')
else:NOF007_1.append('0')
if i in NOF008: NOF008_1.append('1')
else:NOF008_1.append('0')
if i in NOF009: NOF009_1.append('1')
else:NOF009_1.append('0')
if i in NOF010: NOF010_1.append('1')
else:NOF010_1.append('0')
if i in NOF011: NOF011_1.append('1')
else:NOF011_1.append('0')
if i in NOF012: NOF012_1.append('1')
else:NOF012_1.append('0')
if i in NOF013: NOF013_1.append('1')
else:NOF013_1.append('0')
if i in NOF014: NOF014_1.append('1')
else:NOF014_1.append('0')
if i in NOM001: NOM001_1.append('1')
else:NOM001_1.append('0')
if i in NOM002: NOM002_1.append('1')
else:NOM002_1.append('0')
if i in NOM004: NOM004_1.append('1')
else:NOM004_1.append('0')
if i in NOM005: NOM005_1.append('1')
else:NOM005_1.append('0')
if i in NOM007: NOM007_1.append('1')
else:NOM007_1.append('0')
if i in NOM008: NOM008_1.append('1')
else:NOM008_1.append('0')
if i in NOM009: NOM009_1.append('1')
else:NOM009_1.append('0')
if i in NOM010: NOM010_1.append('1')
else:NOM010_1.append('0')
if i in NOM012: NOM012_1.append('1')
else:NOM012_1.append('0')
if i in NOM013: NOM013_1.append('1')
else:NOM013_1.append('0')
if i in NOM015: NOM015_1.append('1')
else:NOM015_1.append('0')
if i in NOM016: NOM016_1.append('1')
else:NOM016_1.append('0')
if i in NOM017: NOM017_1.append('1')
else:NOM017_1.append('0')
if i in NOM018: NOM018_1.append('1')
else:NOM018_1.append('0')
if i in NOM019: NOM019_1.append('1')
else:NOM019_1.append('0')
if i in NOM020: NOM020_1.append('1')
else:NOM020_1.append('0')
if i in NOM022: NOM022_1.append('1')
else:NOM022_1.append('0')
if i in NOM023: NOM023_1.append('1')
else:NOM023_1.append('0')
if i in NOM025: NOM025_1.append('1')
else:NOM025_1.append('0')
if i in NOM026: NOM026_1.append('1')
else:NOM026_1.append('0')
if i in NOM027: NOM027_1.append('1')
else:NOM027_1.append('0')
if i in NOM028: NOM028_1.append('1')
else:NOM028_1.append('0')
if i in NOM029: NOM029_1.append('1')
else:NOM029_1.append('0')
##
print 'saving'
zoo = zip(main_op_list, DLF002_1,DLF004_1,DLF005_1,
DLF006_1,DLF007_1,DLF008_1,DLF009_1,DLF010_1,DLF012_1,
DLF013_1,DLF014_1,DLM001_1,DLM002_1,DLM003_1,DLM004_1,
DLM005_1,DLM006_1,DLM009_1,DLM011_1,DLM012_1,DLM018_1,
DOF002_1,DOF003_1,DOF004_1,DOF006_1,DOF007_1,DOF008_1,
DOF009_1,DOF010_1,DOF011_1,DOF012_1,DOF013_1,DOF014_1,
DOM001_1,DOM003_1,DOM005_1,DOM008_1,DOM010_1,DOM012_1,
DOM013_1,DOM014_1,DOM015_1,DOM016_1,DOM017_1,DOM018_1,
DOM019_1,DOM020_1,DOM021_1,DOM022_1,DOM023_1,DOM024_1,
DOM025_1,DOM026_1,NLF001_1,NLF002_1,NLF005_1,NLF006_1,
NLF007_1,NLF008_1,NLF009_1,NLF010_1,NLF011_1,NLF012_1,
NLF013_1,NLF014_1,NLF015_1,NLM001_1,NLM002_1,NLM003_1,
NLM004_1,NLM005_1,NLM006_1,NLM007_1,NLM008_1,NLM009_1,
NLM010_1,NLM015_1,NLM016_1,NLM017_1,NLM021_1,NLM022_1,
NLM023_1,NLM024_1,NLM025_1,NLM026_1,NLM027_1,NLM028_1,
NLM029_1,NLM031_1,NLM032_1,NOF001_1,NOF002_1,NOF004_1,
NOF005_1,NOF006_1,NOF007_1,NOF008_1,NOF009_1,NOF010_1,
NOF011_1,NOF012_1,NOF013_1,NOF014_1,NOM001_1,NOM002_1,
NOM004_1,NOM005_1,NOM007_1,NO
M008_1,NOM009_1,NOM010_1,NOM012_1,NOM013_1,NOM015_1,
NOM016_1,NOM017_1,NOM018_1,NOM019_1,NOM020_1,NOM022_1,
NOM023_1,NOM025_1,NOM026_1,NOM027_1,NOM028_1,NOM029_1)
with open("test.tab", 'w+') as outfile:
writer =csv.writer(outfile, delimiter = '\t', lineterminator = '\n')
writer.writerow([' ','DLF2','DLF4','DLF5','DLF6',
'DLF7','DLF8','DLF9','DLF10','DLF12','DLF13','DLF14','DLM1',
'DLM2','DLM3','DLM4','DLM5','DLM6','DLM9','DLM11','DLM12','
DLM18','DOF2','DOF3','DOF4','DOF6','DOF7','DOF8','DOF9','
DOF10','DOF11','DOF12','DOF13','DOF04','DOM1','DOM3','DOM5',
'DOM8','DOM10','DOM12','DOM13','DOM14','DOM15','DOM16','
DOM17','DOM18','DOM19','DOM20','DOM21','DOM22','DOM23','
DOM24','DOM25','DOM26','NLF1','NLF2','NLF5','NLF6','NLF7','
NLF8','NLF9','NLF10','NLF11','NLF12','NLF13','NLF14','NLF15'
,'NLM1','NLM2','NLM3','NLM4','NLM5','NLM6','NLM7','NLM8','
NLM9','NLM10','NLM15','NLM16','NLM17','NLM21','NLM22','
NLM23','NLM24','NLM25','NLM26','NLM27','NLM28','NLM29','
NLM31','NLM32','NOF1','NOF2','NOF4','NOF5','NOF6','NOF7','
NOF8','NOF9','NOF10','NOF11','NOF12','NOF13','NOF14','NOM1',
'NOM2','NOM4','NOM5','NOM7','NOM8','NOM9','NOM10','NOM12','
NOM13','NOM15','NOM16','NOM17','NOM18','NOM19','NOM20','
NOM22','NOM23','NOM25','NOM26','NOM27','NOM28','NOM29'])
writer.writerows(zoo)
outfile.close()
print 'done'
end_time = time.time()
elapsed = end_time-start_time
print "Time elapsed.", elapsed
Thanks
Best Regards
Syed Shujaat Ali Zaidi
PhD Scholar (Bioinformatics)
MOE Key Laboratory of Bioinformatics
Bioinformatics Division, TNLIST & Department of Automation
FIT 1-107, Tsinghua University, Beijing 100084, China
Lecturer (Bioinformatics)
Department of Bio Sciences
COMSATS Institute of Information Technology
Islamabad, Pakistan
_______________________________________________
Tutor maillist - Tutor at python.org
To unsubscribe or change subscription options:
https://mail.python.org/mailman/listinfo/tutor
More information about the Tutor
mailing list