Script Optimization
lev
levlozhkin at gmail.com
Sun May 4 16:01:15 EDT 2008
> * Remove newlines introduced by email
> * Move imports to start of file
used imports of the edited script you sent.
> * Change indentation from 8 spaces to 4
I like using tabs because of the text editor I use, the script at
the end is with 4 though.
> * Move main() to bottom of script
> * Remove useless "pass" and "return" lines
I replaced the return nothing lines with passes, but I like
keeping them in case the indentation is ever lost - makes it easy to
go back to original indentation
> * Temporarily change broken "chdir" line
removed as many instances of chdir as possible (a few useless ones
to accomodate the functions - changed functions to not chdir as much),
that line seems to work... I made it in case the script is launched
with say: 'python somedir\someotherdir\script.py' rather than 'python
script.py', because I need it to work in it's own and parent
directory.
> * Split lines so they fit into 80 chars
> * Add spaces after commas
> * Use path.join instead of string interpolation
in all cases when possible - done
> * rename rename() to rename_md5() because rename() shadows a function
> imported from os.
renamed all functions to more understandable names (without
collisions)
> * Rename vars shadowing imported names
renamed almost all vars to more understandable names
> * Improve logic for checking when to print help
the example you gave me does pretty much the exact same thing as
before... (the options are either false or true depending on if the
argument was used, if false for both then no logic was done and help
is shown, which would be exactly the same if the did_something var
remained false.
> * Create emtpy md5 listing file if one doesn't exist
I intended it to be a script to help ripping a specific mp3cd to
disk, not necessarily create checksum files, because i intend to
include the checksums file.
> * Add a comment for a dodgy-looking section
The 4 folders to be renamed are intentional (this is for a
specific mp3cd with 4 album folders)
I added comments to explain what I was doing with the dictionary[x][1]
[1][0], and also what the indexes for the strings are used for ([3:]
to remove the 001 in 001Track.mp3, etc.)
Thanks for the advice so far,
lev
#!/usr/bin/env python
import md5
from glob import glob
from optparse import OptionParser
from os import chdir, path, rename, remove
from sys import argv, exit
def verify_checksum_set(checksums):
checksums = open(checksums, 'r')
changed_files = {}
missing_files = []
for fline in checksums.readlines():
line = fline.split(' *')
original_sum = line[0].upper()
try:
new_sum = calculate_checksum(line[1].strip())
if new_sum == original_sum:
print '.',
pass
else:
changed_files[line[1]] = (original_sum, new_sum)
pass
except IOError:
missing_files.append(line[1])
pass
pass
checksums.close()
changed_files_keys = changed_files.keys()
changed_files_keys.sort()
missing_files.sort()
print '\n'
if len(changed_files) != 0:
print 'File(s) changed:'
for key in changed_files_keys:
print key.strip('\n'), 'changed from:\n\t',
changed_files[key][0], \
'to\n\t', changed_files[key][1]
pass
print '\n\t', len(changed_files), 'file(s) changed.\n'
pass
if len(missing_files) != 0:
print 'File(s) not found:'
for x in range(len(missing_files)):
print '\t', missing_files[x]
pass
print '\n\t', len(missing_files), 'file(s) not found.\n'
pass
if not len(changed_files) and not len(missing_files):
print "\n\tChecksums Verified\n"
pass
pass
def calculate_checksum(file_name):
file_to_check = open(file_name, 'rb')
chunk = 8196
checksum = md5.new()
while (True):
chunkdata = file_to_check.read(chunk)
if not chunkdata:
break
checksum.update(chunkdata)
pass
file_to_check.close()
return checksum.hexdigest().upper()
def rename_file_set(new_dir_names, checksums):
file_info = md5format(checksums)
dirlist = glob('00[1-4]Volume [1-4]')
dirlist.sort()
for x in range(4):
rename(dirlist[x], new_dir_names[x])
print '\t', dirlist[x], 'renamed to:', new_dir_names[x]
chdir(new_dir_names[x])
for old_file_name in glob ('*.mp3'):
# old_file_name[3:] is part of removing numbering:
'001Track ...'
new_file_name = old_file_name[3:]
rename(old_file_name, new_file_name)
print '\t\t', old_file_name, 'renamed to:', new_file_name
pass
chdir('..')
file_info = md5file_name_edit(file_info,dirlist[x],
new_dir_names[x])
pass
md5write(file_info, checksums)
replace_strings('The American Century.htm', dirlist,
new_dir_names)
print '\n\tDirectories and Files renamed.'
pass
def md5format(checksums):
file_info = {}
checksums = open(checksums, 'r')
for line in checksums.readlines():
splitline = line.split(' *')
#original full filename = (checksum, [directory name, file
name])
file_info[splitline[1]] = (splitline[0],splitline[1].split('\
\'))
pass
checksums.close()
return file_info
def md5file_name_edit(file_info, old_dir_name, new_dir_name):
for x in file_info.keys():
dir_name_from_file = file_info[x][1][0]
if dir_name_from_file == old_dir_name:
checksum = file_info[x][0]
file_name_from_file = file_info[x][1][1]
#md5 format: 'C8109BF6B0EF724770A66CF4ED6251A7 *001Album
1\001Track.mp3'
file_info[x] = (checksum, [new_dir_name,
file_name_from_file])
#mp3cd numbering: '001Track.mp3, 002Track.mp3...'
if file_name_from_file[0] == '0':
file_info[x] =(checksum, [new_dir_name,
file_name_from_file[3:]])
pass
pass
pass
return file_info
def md5write(file_info, checksums):
keys = file_info.keys()
keys.sort()
checksums = open(checksums, 'w')
for x in keys:
checksum = file_info[x][0]
try:
#when the file is one directory deep:
#'C8109BF6B0EF724770A66CF4ED6251A7 *001Album
1\001Track.mp3'
dir_name = file_info[x][1][0]
file_name = file_info[x][1][1]
checksums.writelines('%s *%s' % (checksum,
os.path.join(dir_name, \
file_name)))
pass
except IndexError:
#when the file is in root dir:
'007CC9C12342017709A2F19AF75247BD *010Track.mp3'
file_name = file_info[x][1][0]
checksums.writelines('%s *%s' % (checksum, file_name))
pass
pass
checksums.close()
pass
def replace_strings(file_name, oldlist, newlist):
try:
new_file = open(file_name, 'r').read();
for x in range(4):
new_file = new_file.replace(oldlist[x], newlist[x], 1)
pass
remove(file_name)
file_name = open(file_name, 'w', len(new_file))
file_name.write(new_file)
file_name.close()
pass
except IOError:
print file_name, 'not found'
pass
pass
def main():
full_path = path.abspath(path.dirname(argv[0]))
chdir(full_path)
chdir('..')
checksums = path.join(full_path, 'checksums.md5')
new_dir_names = ('Volume 1 - 1889-1929', 'Volume 2 - 1929-1945', \
'Volume 3 - 1945-1965', 'Volume 4 - 1963-1989')
parser = OptionParser()
parser.add_option ('-v', '--verify', action = 'store_true', \
dest = 'verify', help = 'verify checksums')
parser.add_option ('-r', '--rename', action = 'store_true', dest =
\
'rename', help = \
'rename files to a more usable form (write rights needed)')
(options, args) = parser.parse_args()
if options.verify:
verify_checksum_set(checksums)
pass
if options.rename:
rename_file_set(new_dir_names, checksums)
pass
if not options.verify and not options.rename:
parser.print_help()
pass
pass
if __name__ == '__main__':
main()
More information about the Python-list
mailing list