Trying to run a program within a python script on multiple output files creating by the same script

ubatool at ufl.edu ubatool at ufl.edu
Mon Apr 24 13:41:33 EDT 2017


I'm writing a script that takes two command line options, a file containing barcodes and a file containing sequences. I've managed to create output files for each barcode with the matching and corresponding sequences in it.

For the next part of my script, I'm trying to create more output files by using the output files creating earlier as input files. So my first output files contains sequences in a simple format. Now each needs to be converted to fasta, then used mafft and quicktree commands on them.

However, this part only converts one of the original output files into fasta, running mafft and quicktree on that one. 

#!/usr/bin/python

import sys
import os


fname           = sys.argv[2]
barcodefname  = sys.argv[1]

barcodefile = open(barcodefname, "r")
for barcode in barcodefile:
        barcode = barcode.strip()
        print "barcode: %s" %  barcode
        infname = "%s.%s" % (fname,barcode)
        inf = open(infname, "w")
        handle1 = open(fname, "r")
        for lines in handle1:
                seqid = lines[0:3]
                i = 4
                potential_barcode = lines[i:(i+len(barcode))]
                if potential_barcode == barcode:
                        outseq = lines[i+len(barcode):]
                        sys.stdout.write(outseq)
                        inf.write("%s %s%s" % (seqid,barcode,outseq))
        handle1.close()
        inf.close()
barcodefile.close()

infname    = "%s.%s" %(fname,barcode)
fastafname = infname + ".fasta"
mafftfname = fastafname + ".mafft"
stfname    = infname + ".stock"


# convert simple to fasta #
for file in infname
        handle = open(infname, "r")
        outf   = open(fastafname, "w")
        for line in handle:
                linearr = line.split()
                seqids = linearr[0]
                seq   = linearr[1]
                outf.write(">%s\n%s\n" % (seqids,seq))
        handle.close()
        outf.close()


# align using mafft #


cmd = "mafft %s > %s" % (fastafname,mafftfname)
sys.stderr.write("command: %s\n" % cmd)
os.system(cmd)
sys.stderr.write("command done\n")


#convert fasta alignment to stockholm
# fasta_to_stockholm seq.data.txt.fasta.mafft > TEST.stockholm
cmd = "fasta_to_stockholm %s > %s" % (mafftfname, stfname)
sys.stderr.write("command: %s\n" % cmd)
os.system(cmd)
sys.stderr.write("command done\n")


#run quicktree to get distance matrix
# quicktree -out m TEST.stockholm
cmd = "quicktree -out m %s" % stfname
sys.stderr.write("Command: %s\n" % cmd)
os.system(cmd)
sys.stderr.write("command done\n")



More information about the Python-list mailing list