searching substrings with interpositions
Claudio Grondi
claudio.grondi at freenet.de
Tue May 24 14:19:29 EDT 2005
<borges2003xx at yahoo.it> schrieb im Newsbeitrag
news:1116939594.958040.14110 at z14g2000cwz.googlegroups.com...
> thanx everyone, is what i need.
> As Claudio argues, it's a standard problem of dna sequences
> comparation.
> the next step of my job is to make limits of lenght of interposed
> sequences (if someone can help me in this way i'll apreciate a lot)
> thanx everyone.
> giorgio
>
Note: code below is intended to help to clarify things only,
so that a bunch of examples can be tested.
If you need bugfree production quality code, maybe
someone else can provide it.
I have introduced two additional parameter to the function.
If intMaxLenOfGap == 0 the gap size doesn't matter.
lstStartEndOfRangeOfBwithOccurenceOfA returns in its
0,1 elements the begin and end of the range strA was
found in strB.
Hope this does what you mean with
"make limits of lenght of interposed sequences",
does it?
Claudio
P.S. Here the code:
def blnFindCharSequenceAevenIfSpreadOverEntireStringB(strA, strB,
intMaxLenOfGap = 0, lstStartEndOfRangeOfBwithOccurenceOfA = []):
lstStartEndOfRangeOfBwithOccurenceOfA = []
intNoOfCharsFound = 0
intPtrToFirstCharFound = 0
intPtrToBeginOfSubsectionOfB = 0
intLenA = len(strA)
intLenB = len(strB)
blnStrAinB = False
indxToA = 0
while(indxToA < intLenA):
# print chrA
if(indxToA == 0):
blnFoundChrA = False
for indxToB in range(intPtrToBeginOfSubsectionOfB, intLenB):
if(strA[indxToA] == strB[indxToB]):
intNoOfCharsFound += 1
# print " ",chrA, strB[indxToB], indxToB
intPtrToFirstCharFound = indxToB
intPtrToBeginOfSubsectionOfB = indxToB + 1
blnFoundChrA = True
break
#:if
#:for
if(intNoOfCharsFound == intLenA):
blnStrAinB = True
print "sequence '%s' found in '%s'"%(strA, strB)
break
#:if
if(blnFoundChrA == False):
break
#:if
indxToA += 1
else:
intGapLen = 0
blnFoundChrA = False
for indxToB in range(intPtrToBeginOfSubsectionOfB, intLenB):
if(strA[indxToA] == strB[indxToB]):
intNoOfCharsFound += 1
# print " ",chrA, strB[indxToB], indxToB
intPtrToBeginOfSubsectionOfB = indxToB + 1
blnFoundChrA = True
break
#:if
intGapLen += 1
if(intMaxLenOfGap > 0 and intGapLen > intMaxLenOfGap):
indxToA = 0
blnFoundChrA = False
intPtrToBeginOfSubsectionOfB = intPtrToFirstCharFound + 1
intNoOfCharsFound = 0
break
#:if
#:for
if(intNoOfCharsFound == intLenA):
blnStrAinB = True
print "sequence '%s' found in '%s' at range(%i, %i)"%(strA, strB,
intPtrToFirstCharFound, indxToB+1)
lstStartEndOfRangeOfB.append(intPtrToFirstCharFound)
lstStartEndOfRangeOfB.append(indxToB+1)
break
#:if
if(blnFoundChrA == False):
break
#:if
indxToA += 1
#:if/else
#:while
if blnStrAinB == False:
if(intMaxLenOfGap > 0 and intGapLen > intMaxLenOfGap):
print "sequence '%s' not in '%s' (maybe allowed gap of %i chars was
too small?)"%(strA, strB, intMaxLenOfGap)
else:
print "sequence '%s' not in '%s'"%(strA, strB)
#:if
#:def
print
lstStartEndOfRangeOfB = []
strA = "0101"
strB = "000011110100"
blnFindCharSequenceAevenIfSpreadOverEntireStringB(strA, strB)
lstStartEndOfRangeOfB = []
strA = "0101"
strB = "000011110100"
blnFindCharSequenceAevenIfSpreadOverEntireStringB(strA, strB, 2)
strA = "010101"
strB = "000011110100"
blnFindCharSequenceAevenIfSpreadOverEntireStringB(strA, strB, 6,
lstStartEndOfRangeOfB)
strA = "010101"
strB = "00001111010000001"
blnFindCharSequenceAevenIfSpreadOverEntireStringB(strA, strB, 4,
lstStartEndOfRangeOfB)
strA = "010101"
strB = "00001111010000001"
blnFindCharSequenceAevenIfSpreadOverEntireStringB(strA, strB, 5,
lstStartEndOfRangeOfB)
print
print "usage of lstStartEndOfRangeOfB parameter passed to function for use
as return value:"
print "sequence '%s' was found in '%s' at range(%i, %i)"%(strA, strB,
lstStartEndOfRangeOfB[0], lstStartEndOfRangeOfB[1])
More information about the Python-list
mailing list