How to get the size of a file?

Bengt Richter bokr at
Sun Oct 17 11:45:17 CEST 2004

On Sun, 17 Oct 2004 06:29:36 GMT, bokr at (Bengt Richter) wrote:
>This should list duplicate files in the specified directory:
>You can hack to suit. Not very tested. Just what you see ;-)
[... version which only worked for current working directory...]
Phooey. Hopefully better:

import os, md5
def get_dupes(thedir):
    finfo = {}
    for f in os.listdir(thedir):
        p = os.path.join(thedir, f)
        if os.path.isfile(p):
            finfo.setdefault(os.path.getsize(p), []).append(f)

    result = []
    for size, flist in finfo.items():
        if len(flist)>1:
            dupes = {}
            for name in flist:
                dupes.setdefault(, name), 'rb'
            for digest, names in dupes.items():
                if len(names)>1: result.append((size, digest, names))
    return result

if __name__ == '__main__':
    import sys
        dupes = get_dupes(sys.argv[1])
        if dupes:
            print '%8s %32s %s' % ('size','md5 digest','files with the given size, digest')
            print '%8s %32s %s' % ('----','-'*32      ,'---------------------------------')
            for duped in dupes:
                print '%8s %32s %s' % duped
            print 'No duplicate files in %r' % sys.argv[1]
        raise SystemExit, 'Usage: python directory'
Bengt Richter

More information about the Python-list mailing list