ATTN : Georges ( gry at ll.mit.edu)

Tue Apr 12 15:10:36 EDT 2005


> My code so far:
> # -*- coding: iso-8859-1 -*-
> import sys
> import os
> from progadn import *
>
> ab1seq = raw_input("Entrez le répertoire où sont les fichiers à  
> analyser: ") or None

	Ce serait mieux d'utiliser sys.argv pour spécifier le répertoire dans la  
ligne de commande du programme :
import sys
help(sys.argv)

> if ab1seq == None :
>     print "Erreur: Pas de répertoire! \n" \
> "\nAu revoir \n"
>     sys.exit()

je propose :

import os, os.path, sys

def usage():
	print "documentation..."
	sys.exit(-1)


args = sys.argv[1:]

if not args:
	usage()

files = []
for path in args:
	if os.path.isfile( path ):
		files.append( path )
	elif os.path.isdir( path ):
		files.extend( [os.path.join( path, fname ) for fname in os.listdir( path  
)] )
	else:
		print "%s n'est ni un fichier ni un répertoire..." % path
		usage()

files = [ fname for fname in files if fname.endswith( ".Seq" ) ]
88
if not files:
	print "Aucun fichier a traiter."
	usage()

print "Fichier à traiter :"
print ", ".join( files )

for path in files:
	print path
	checkDNA( open( path ).read() )

> def checkDNA(seq):
>     """Retourne une liste des caractères non conformes à l'IUPAC."""
>
>     junk=[]
>     for c in range (len(seq)):
>         if seq[c] not in iupac:
>             junk.append([seq[c],c])
>             #print junk
>             print "ATTN: Il y a le caractère %s en position %s " %  
> (seq[c],c)
>         if junk == []:
>              indinv=range(len(seq))
>              indinv.reverse()
>              resultat=""
>              for i in indinv:
>                  resultat +=comp[seq[i]]
>              return resultat

	Je réécris un peu votre fonction d'une manière plus "python", à placer  
dans le programme avant son appel bien sûr !

def checkDNA( seq ):
	seq = seq.strip()
	if not seq:
		print "Fichier vide."
		return
	resultat = []
	for i,c in enumerate(seq):
		try:
			resultat.append( comp[c] )
		except KeyError:
			print "Catactère <%s> en position <%d> invalide" % (c,i)
	resultat.reverse()
	return ''.join( resultat )
			

>
> seq=checkDNA(seq)
>
> -------------------------------------------------------------------------------------------------------------------------
>
> Path:  
> news3!feeder.news-service.com!news.glorb.com!postnews.google.com!o13g2000cwo.googlegroups.com!not-for-mail
> From: gry at ll.mit.edu
> Newsgroups: comp.lang.python
> Subject: Re: problem with the logic of read files
> Date: 12 Apr 2005 10:47:17 -0700
> Organization: http://groups.google.com
> Lines: 104
2> Message-ID: <1113328037.070319.136110 at o13g2000cwo.googlegroups.com>
> References: <68da8$425bf954$43461874$29054 at nf1.news-service.com>
> NNTP-Posting-Host: 129.55.200.20
> Mime-Version: 1.0
> Content-Type: text/plain; charset="iso-8859-1"
> Content-Transfer-Encoding: quoted-printable
> X-Trace: posting.google.com 1113328069 32347 127.0.0.1 (12 Apr 2005  
> 17:47:49 GMT)
> X-Complaints-To: groups-abuse at google.com
> NNTP-Posting-Date: Tue, 12 Apr 2005 17:47:49 +0000 (UTC)
> In-Reply-To: <68da8$425bf954$43461874$29054 at nf1.news-service.com>
> User-Agent: G2/0.2
> Complaints-To: groups-abuse at google.com
> Injection-Info: o13g2000cwo.googlegroups.com; posting-host=129.55.200.20;
>    posting-account=tzIXbQwAAACT3z3X4eITVLtksgiDRxhx
> Xref: news-x2.support.nl comp.lang.python:438583
>
>
> <m_t... at yahoo.com> wrote:
>> I am new to python and I am not in computer science. In fact I am a
> biologist and I ma trying to learn python. So if someone can help me, I
> will appreciate it.
>> Thanks
>>
>>
>> #!/cbi/prg/python/current/bin/python
>> # -*- coding: iso-8859-1 -*-
>> import sys
>> import os
>> from progadn import *
>>
>> ab1seq =3D raw_input("Entrez le r=E9pertoire o=F9 sont les fichiers =E0
> analyser: ") or None
>> if ab1seq =3D=3D None :
>>     print "Erreur: Pas de r=E9pertoire! \n"
>> "\nAu revoir \n"
>>     sys.exit()
>>
>> listrep =3D os.listdir(ab1seq)
>> #print listrep
>>
>> extseq=3D[]
>>
>> for f in listrep:
> ###### Minor -- this is better said as:  if f.endswith(".Seq"):
>>      if f[-4:]=3D=3D".Seq":
>>          extseq.append(f)
>> #         print extseq
>>
>> for x in extseq:
>>      f =3D open(x, "r")
> ###### seq=3D... discards previous data and refers only to that just
> read.
> ###### It would be simplest to process each file as it is read:
> @@@@@@ seq=3Df.read()
> @@@@@@ checkDNA(seq)
>>      seq=3Df.read()
>>      f.close()
>>      s=3Dseq
>>
>> def checkDNA(seq):
>>     """Retourne une liste des caract=E8res non conformes =E0
> l'IUPAC."""
>>
>>     junk=3D[]
>>     for c in range (len(seq)):
>>         if seq[c] not in iupac:
>>             junk.append([seq[c],c])
>>             #print junk
>>             print "ATTN: Il y a le caract=E8re %s en position %s " %
> (seq[c],c)
>>         if junk =3D=3D []:
>>              indinv=3Drange(len(seq))
>>              indinv.reverse()
>>              resultat=3D""
>>              for i in indinv:
>>                  resultat +=3Dcomp[seq[i]]
>>              return resultat
>>
>> seq=3DcheckDNA(seq)
>> print seq
>
> ##### The program segment you posted did not define "comp" or "iupac",
> ##### so it's a little hard to guess how it's supposed to work.  It
> would
> ##### be helpful if you gave a concise description of what you want the
>
> ##### program to do, as well as brief sample of input data.
> ##### I hope this helps!  -- George
>>
>> #I got the following ( as you see only one file is proceed by the
> function even if more files is in extseq
>>
>> ['B1-11_win3F_B04_04.ab1.Seq']
>> ['B1-11_win3F_B04_04.ab1.Seq', 'B1-11_win3R_C04_06.ab1.Seq']
>> ['B1-11_win3F_B04_04.ab1.Seq', 'B1-11_win3R_C04_06.ab1.Seq',
> 'B1-18_win3F_D04_08.ab1.Seq']
>> ['B1-11_win3F_B04_04.ab1.Seq', 'B1-11_win3R_C04_06.ab1.Seq',
> 'B1-18_win3F_D04_08.ab1.Seq', 'B1-18_win3R_E04_10.ab1.Seq']
>> ['B1-11_win3F_B04_04.ab1.Seq', 'B1-11_win3R_C04_06.ab1.Seq',
> 'B1-18_win3F_D04_08.ab1.Seq', 'B1-18_win3R_E04_10.ab1.Seq',
> 'B1-19_win3F_F04_12.ab1.Seq']
>> ..
>> ['B1-11_win3F_B04_04.ab1.Seq', 'B1-11_win3R_C04_06.ab1.Seq',
> 'B1-18_win3F_D04_08.ab1.Seq', 'B1-18_win3R_E04_10.ab1.Seq',
> 'B1-19_win3F_F04_12.ab1.Seq', 'B1-19_win3R_G04_14.ab1.Seq',
> 'B90_win3F_H04_16.ab1.Seq', 'B90_win3R_A05_01.ab1.Seq',
> 'DL2-11_win3F_H03_15.ab1.Seq', 'DL2-11_win3R_A04_02.ab1.Seq',
> 'DL2-12_win3F_F03_11.ab1.Seq', 'DL2-12_win3R_G03_13.ab1.Seq',
> 'M7757_win3F_B05_03.ab1.Seq', 'M7757_win3R_C05_05.ab1.Seq',
> 'M7759_win3F_D05_07.ab1.Seq', 'M7759_win3R_E05_09.ab1.Seq',
> 'TCR700-114_win3F_H05_15.ab1.Seq', 'TCR700-114_win3R_A06_02.ab1.Seq',
> 'TRC666-100_win3F_F05_11.ab1.Seq', 'TRC666-100_win3R_G05_13.ab1.Seq']
>>
>> after this listing my programs proceed only the last element of this
> listing (TRC666-100_win3R_G05_13.ab1.Seq)
>>
>>
> NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTCCCGAAGTGTCCCAGAGCAAATAAATGGACCAAAACGTTTTTAG=
> NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTCCCGAAGTGTCCCAGAGCAAATAAATGGACCAAAACGTTTTTAG=
> AATACTTGAACGTGTAATCTCATTTTAA
>
>
>
> **********End Of Post*************
>
>
>
>