[Tutor] longest common substring

lina lina.lastname at gmail.com
Fri Nov 11 05:14:33 CET 2011


<snip>

#!/usr/bin/python3

import os.path

xrange = range


c=['71', '82', '80', '70', '84', '56', '58', '34', '77', '76', '61',
'76', '34', '76', '58', '34', '56', '61', '65', '82', '65', '80',
'65', '82', '80', '82', '65', '82', '61', '80', '82', '65', '61',
'63', '65', '70', '80', '71', '34', '71', '64', '34', '58', '61',
'80', '34', '40', '72', '38', '4', '70', '72', '40', '72', '4', '72',
'42', '69', '40', '70', '40', '61', '40', '34', '61', '33', '34',
'61', '34', '35', '61', '35', '61', '70', '61', '34', '61', '34',
'54', '34', '32', '35', '59', '55', '59', '34', '43', '32', '34',
'32', '24', '34', '32', '35', '32', '43', '34', '32', '34', '45',
'35', '32', '83', '61', '58', '32', '58', '83', '32', '34', '61',
'52', '34', '32', '34', '84', '32', '52', '34', '57', '34', '52',
'20', '58', '34', '32', '34', '58', '34', '58', '61', '34', '30',
'35', '28', '52', '22', '21', '22', '30', '61', '79', '70', '80',
'70', '65', '61', '80', '59', '52', '61', '20', '30', '20', '58',
'20', '29', '74', '58', '20', '31', '20', '31', '57', '31', '34',
'20', '58', '34', '52', '34', '20', '58', '83', '58', '34', '61',
'34', '32', '76', '34', '35', '52', '77', '76', '74', '76', '58',
'20', '57', '58', '33', '76', '58', '52', '74', '20', '36', '61',
'36', '74', '61', '36', '83', '61', '83', '31', '61', '59', '33',
'36', '61', '20', '34', '84', '70', '61', '36', '61', '36', '77',
'20', '38', '36', '61', '59', '38', '10', '38', '36', '38', '77',
'36', '39', '38', '36', '23', '26', '8', '36', '8', '19', '8', '19',
'8', '19', '20', '8', '36', '34', '8', '21', '8', '28', '22', '18',
'10', '20', '76', '36', '57', '20', '26', '10', '20', '28', '33',
'35', '36', '34', '36', '20', '34', '10', '36', '76', '57', '76',
'57', '16', '10', '59', '20', '19', '59', '20', '28', '20', '37',
'23', '38', '21', '23', '79', '32', '29', '36', '29', '31', '29',
'36', '20', '34', '79', '23', '20', '28', '20', '79', '74', '34',
'20', '59', '32', '20', '23', '28', '20', '10', '56', '22', '56',
'52', '57', '28', '76', '74', '20', '34', '77', '20', '36', '22',
'61', '59', '22', '20', '22', '21', '23', '20', '61', '59', '77',
'22', '34', '58', '20', '34', '28', '29', '22', '8', '22', '23', '20',
'59', '22', '20', '57', '20', '57', '22', '77', '20', '76', '36',
'20', '77', '23', '35', '77', '20', '8', '74', '10', '76', '20', '34',
'10', '31', '20', '33', '59', '61', '42', '41']

d=['45', '64', '13', '5', '64', '45', '13', '15', '13', '16', '10',
'7', '16', '10', '8', '16', '8', '10', '13', '64', '10', '45', '64',
'43', '64', '47', '64', '43', '64', '45', '47', '45', '15', '43',
'17', '64', '47', '64', '62', '75', '16', '60', '45', '64', '13',
'64', '75', '45', '47', '64', '75', '64', '60', '64', '60', '64',
'58', '60', '64', '45', '16', '64', '58', '16', '58', '60', '64', '7',
'60', '64', '7', '64', '47', '10', '64', '58', '64', '60', '58', '64',
'58', '75', '60', '64', '45', '64', '45', '58', '45', '60', '64',
'58', '64', '45', '60', '58', '75', '58', '75', '45', '60', '58',
'60', '58', '7', '13', '58', '49', '57', '64', '49', '63', '50', '63',
'49', '50', '81', '61', '49', '69', '70', '49', '39', '48', '83',
'29', '52', '39', '29', '52', '37', '52', '29', '52', '27', '83',
'52', '83', '52', '39', '27', '39', '27', '39', '41', '27', '29',
'39', '27', '83', '29', '39', '27', '29', '41', '39', '61', '28',
'41', '81', '28', '41', '28', '41', '81', '36', '51', '61', '59',
'53', '48', '53', '83', '59', '48', '59', '53', '57', '41', '83',
'61', '42', '81', '61', '40', '79', '41', '28', '59', '27', '33',
'28', '41', '83', '79', '81', '41', '61', '29', '39', '28', '61',
'39', '28', '42', '41', '31', '41', '84', '82', '84', '61', '31',
'41', '61', '41', '82', '28', '41', '57', '48', '59', '83', '48',
'83', '48', '57', '61', '57', '83', '42', '48', '61', '46', '48',
'51', '59', '51', '81', '51', '57', '51', '81', '51', '57', '48',
'59', '48', '83', '61', '83', '48', '81', '60', '48', '51', '48',
'57', '48', '51', '74', '53', '51', '53', '51', '81', '52', '51',
'61', '51', '41', '61', '83', '81', '83', '61', '81', '39', '28',
'41', '84', '42', '61', '36', '61', '63', '84', '83', '41', '72',
'41', '37', '39', '41', '82', '41', '61', '28', '39', '28', '41',
'39', '28', '41', '83', '41', '83', '61', '84', '83', '84', '83',
'51', '61', '83', '40', '83', '63', '61', '59', '28', '84', '42',
'28', '84', '61', '40', '41', '40', '41', '63', '84', '63', '59',
'83', '61', '59', '61', '39', '84', '72', '61', '40', '84', '61',
'83', '42', '59', '36', '40', '61', '63', '61', '59', '61', '40',
'29', '61', '29', '61', '39', '61', '31', '61', '70', '61']

def LongestCommonSubstring(S1, S2):
    M = [[0]*(1+len(S2)) for i in xrange(1+len(S1))] ## creat 4*5 matrix
    longest, x_longest = 0, 0
    for x in xrange(1,1+len(S1)):                 ## read each row
        for y in xrange(1,1+len(S2)):             ## read each coloumn
            if S1[x-1] == S2[y-1]:
                M[x][y] = M[x-1][y-1]+1
                if M[x][y] > longest:
                    longest = M[x][y]
                    x_longest = x
                else:
                    M[x][y] = 0
    return S1[x_longest-longest:x_longest]




if __name__=="__main__":

    a=open("atom-pair_4.txt","r").readline().strip()

    b=open("atom-pair_8.txt","r").readline().strip()

    print(LongestCommonSubstring(LongestCommonSubstring(a,a),LongestCommonSubstring(b,b)))
    print(LongestCommonSubstring(c,d))


 $ python3 LongestCommonSubstring.py
2189
['
['82']

The results are wrong.
c, d are the string from file atom-pair_4,txt, exactly the same as a,
d is the same as b.

and even for (c,d) results are not correct, visually we can see some
similar groups, not mention the longest groups.

Thanks,

P.S

the 4 and 8 file are attached:

zip one

https://docs.google.com/open?id=0B93SVRfpVVg3ZWVmYWM5N2UtNzZjNy00M2NmLWI4ODYtYTVjMTQxZmQ3YjBh

tar.gz one

https://docs.google.com/open?id=0B93SVRfpVVg3MDY3NmI2NDQtOGJiMS00ODk4LWExMWEtZWIyODYyZGMxNTY3

Thanks again,


More information about the Tutor mailing list