Re: Re: joining files

mannu jha mannu_0523 at rediffmail.com
Mon May 17 04:25:37 EDT 2010


On Sun, 16 May 2010 23:51:10 +0530  wrote
>On 05/16/2010 05:04 PM, Dave Angel wrote:
> (You forgot to include the python-list in your response. So it only
> went to me. Normally, you just do reply-all to the message)
> mannu jha wrote:
>> On Sun, 16 May 2010 13:52:31 +0530 wrote
>>> mannu jha wrote:
>>> Hi,
>>> I have few files like this:
>>> file1:
>>> 22 110.1 33 331.5 22.7 5 271.9 17.2 33.4
>>> 4 55.1
>>> file1 has total 4 column but some of them are missing in few row.
>>> file2:
>>> 5 H
>>> 22 0
>>> file3:
>>> 4 T
>>> 5 B
>>> 22 C
>>> 121 S
>>> in all these files first column is the main source of matching their entries. So What I want in the output is only those entries which is coming in all three files. output required:
>>> 5 271.9 17.2 33.4 5 H 5 T
>>> 22 110.1 22 0 22 C
>> I am trying with this :
>> from collections import defaultdict
>> def merge(sources):
>> blanks = [blank for items, blank, keyfunc in sources]
>> d = defaultdict(lambda: blanks[:])
>> for index, (items, blank, keyfunc) in enumerate(sources):
>> for item in items:
>> d[keyfunc(item)][index] = item
>> for key in sorted(d):
>> yield d[key]
>> if __name__ == "__main__":
>> a = open("input1.txt")
>> c = open("input2.txt")
>> def key(line):
>> return line[:2]
>> def source(stream, blank="", key=key):
>> return (line.strip() for line in stream), blank, key
>> for m in merge([source(x) for x in [a,c]]):
>> print "|".join(c.ljust(10) for c in m)
>> but with input1.txt:
>> 187 7.79 122.27 54.37 4.26 179.75
>> 194 8.00 121.23 54.79 4.12 180.06
>> 15 8.45 119.04 55.02 4.08 178.89
>> 176 7.78 118.68 54.57 4.20 181.06
>> 180 7.50 119.21 53.93 179.80
>> 190 7.58 120.44 54.62 4.25 180.02
>> 152 8.39 120.63 55.10 4.15 179.10
>> 154 7.79 119.62 54.47 4.22 180.46
>> 175 8.42 120.50 55.31 4.04 180.33
>> and input2.txt:
>> 15 H 37 H 95 T
>> 124 H 130 H 152 H 154 H 158 H 164 H
>> 175 H 176 H 180 H
>> 187 H 190 T
>> 194 C
>> 196 H 207 H 210 H 232 H it is giving output as:
>> |
>> |124 H
>> |130 H
>> 154 7.79 119.62 54.47 4.22 180.46|158 H
>> |164 H
>> 175 8.42 120.50 55.31 4.04 180.33|176 H
>> 180 7.50 119.21 53.93 179.80|187 H
>> 190 7.58 120.44 54.62 4.25 180.02|196 H
>> |207 H
>> |210 H
>> |232 H
>> |37 H
>> |95 T
>> so it not matching it properly, can anyone please suggest where I am doing mistake.

import os

def merge_sources(sources):
   # sources is a list of tuples (source_name, source_data)
   data = []
   keysets = []
   for nme, sce in sources:
     lines = {}
     for line in sce.split(os.linesep):
       lst = line.split()
       lines[lst[0]] = (nme, lst)
     keysets.append(set(lines.keys()))
     data.append(lines)
   common_keys = keysets[0]
   for keys in keysets[1:]:
     common_keys = common_keys.intersection(keys)
   result = {}
   for key in common_keys:
     result[key] = dict(d[key] for d in data if key in d)
   return result
if __name__ == "__main__":
   # Your test files here are replaced by local strings
   print merge_sources([("file1", file1), ("file2", file2), ("file3", 
file3)])
   print merge_sources([("input1", input1), ("input2", input2)])
Test_results = '''
{'22': {'file3': ['22', 'C'],
     'file2': ['22', '0'],
     'file1': ['22', '110.1', '33', '331.5', '22.7', '5', '271.9',
          '17.2', '33.4']}}
{'194': {'input2': ['194', 'C'],
     'input1': ['194', '8.00', '121.23', '54.79', '4.12',
           '180.06']},
 '175': {'input2': ['175', 'H', '176', 'H', '180', 'H'],
     'input1': ['175', '8.42', '120.50', '55.31', '4.04',
           '180.33']},
  '15': {'input2': ['15', 'H', '37', 'H', '95', 'T'],
     'input1': ['15', '8.45', '119.04', '55.02', '4.08',
           '178.89']},
 '187': {'input2': ['187', 'H', '190', 'T'],
     'input1': ['187', '7.79', '122.27', '54.37', '4.26',
           '179.75']}}

Dear Sir,

I tried above program but with that it is showing error:
nmruser at caf:~> python join1.py
Traceback (most recent call last):
  File "join1.py", line 24, in 
    print merge_sources([("file1", file1), ("file2", file2), ("file3",
NameError: name 'file1' is not defined                                
nmruser at caf:~> 



-- 

http://mail.python.org/mailman/listinfo/python-list

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/python-list/attachments/20100517/be628583/attachment.html>


More information about the Python-list mailing list