Windows file paths, again
Dan Guido
dguido at gmail.com
Wed Oct 21 14:13:19 EDT 2009
I'm trying to write a few methods that normalize Windows file paths.
I've gotten it to work in 99% of the cases, but it seems like my code
still chokes on '\x'. I've pasted my code below, can someone help me
figure out a better way to write this? This seems overly complicated
for such a simple problem...
# returns normalized filepath with arguments removed
def remove_arguments(filepath):
#print "removing args from: " + filepath
(head, tail) = os.path.split(filepath)
pathext = os.environ['PATHEXT'].split(";")
while(tail != ''):
#print "trying: " + os.path.join(head,tail)
# does it just work?
if os.path.isfile(os.path.join(head, tail)):
#print "it just worked"
return os.path.join(head, tail)
# try every extension
for ext in pathext:
if os.path.isfile(os.path.join(head, tail) + ext):
return os.path.join(head, tail) + ext
# remove the last word, try again
tail = tail.split()[:-1]
tail = " ".join(tail)
return None
escape_dict={'\a':r'\a',
'\b':r'\b',
'\c':r'\c',
'\f':r'\f',
'\n':r'\n',
'\r':r'\r',
'\t':r'\t',
'\v':r'\v',
'\'':r'\'',
#'\"':r'\"',
'\0':r'\0',
'\1':r'\1',
'\2':r'\2',
'\3':r'\3',
'\4':r'\4',
'\5':r'\5',
'\6':r'\6',
'\7':r'\a', #i have no idea
'\8':r'\8',
'\9':r'\9'}
def raw(text):
"""Returns a raw string representation of text"""
new_string=''
for char in text:
try:
new_string+=escape_dict[char]
#print "escaped"
except KeyError:
new_string+=char
#print "keyerror"
#print new_string
return new_string
# returns the normalized path to a file if it exists
# returns None if it doesn't exist
def normalize_path(path):
#print "not normal: " + path
# make sure it's not blank
if(path == ""):
return None
# get rid of mistakenly escaped bytes
path = raw(path)
#print "step1: " + path
# remove quotes
path = path.replace('"', '')
#print "step2: " + path
#convert to lowercase
lower = path.lower()
#print "step3: " + lower
# expand all the normally formed environ variables
expanded = os.path.expandvars(lower)
#print "step4: " + expanded
# chop off \??\
if expanded[:4] == "\\??\\":
expanded = expanded[4:]
#print "step5: " + expanded
# strip a leading '/'
if expanded[:1] == "\\":
expanded = expanded[1:]
#print "step7: " + expanded
systemroot = os.environ['SYSTEMROOT']
# sometimes systemroot won't have %
r = re.compile('systemroot', re.IGNORECASE)
expanded = r.sub(systemroot, expanded)
#print "step8: " + expanded
# prepend the %systemroot% if its missing
if expanded[:8] == "system32" or "syswow64":
expanded = os.path.join(systemroot, expanded)
#print "step9: " + expanded
stripped = remove_arguments(expanded.lower())
# just in case you're running as LUA
# this is a race condition but you can suck it
if(stripped):
if os.access(stripped, os.R_OK):
return stripped
return None
def test_normalize():
test1 = "\??\C:\WINDOWS\system32\Drivers\CVPNDRVA.sys"
test2 = "C:\WINDOWS\system32\msdtc.exe"
test3 = "%SystemRoot%\system32\svchost.exe -k netsvcs"
test4 = "\SystemRoot\System32\drivers\vga.sys"
test5 = "system32\DRIVERS\compbatt.sys"
test6 = "C:\Program Files\ABC\DEC Windows Services\Client Services.exe"
test7 = "c:\Program Files\Common Files\Symantec Shared\SNDSrvc.exe"
test8 = "C:\WINDOWS\system32\svchost -k dcomlaunch"
test9 = ""
test10 = "SysWow64\drivers\AsIO.sys"
test11 = "\SystemRoot\system32\DRIVERS\amdsbs.sys"
test12 = "C:\windows\system32\xeuwhatever.sys" #this breaks everything
print normalize_path(test1)
print normalize_path(test2)
print normalize_path(test3)
print normalize_path(test4)
print normalize_path(test5)
print normalize_path(test6)
print normalize_path(test7)
print normalize_path(test8)
print normalize_path(test9)
print normalize_path(test10)
print normalize_path(test11)
print normalize_path(test12)
--
Dan Guido
More information about the Python-list
mailing list