Windows file paths, again
Diez B. Roggisch
deets at nospam.web.de
Wed Oct 21 14:34:53 EDT 2009
Dan Guido wrote:
> I'm trying to write a few methods that normalize Windows file paths.
> I've gotten it to work in 99% of the cases, but it seems like my code
> still chokes on '\x'. I've pasted my code below, can someone help me
> figure out a better way to write this? This seems overly complicated
> for such a simple problem...
>
>
> # returns normalized filepath with arguments removed
> def remove_arguments(filepath):
> #print "removing args from: " + filepath
> (head, tail) = os.path.split(filepath)
> pathext = os.environ['PATHEXT'].split(";")
>
> while(tail != ''):
> #print "trying: " + os.path.join(head,tail)
>
> # does it just work?
> if os.path.isfile(os.path.join(head, tail)):
> #print "it just worked"
> return os.path.join(head, tail)
>
> # try every extension
> for ext in pathext:
> if os.path.isfile(os.path.join(head, tail) + ext):
> return os.path.join(head, tail) + ext
>
> # remove the last word, try again
> tail = tail.split()[:-1]
> tail = " ".join(tail)
>
> return None
>
> escape_dict={'\a':r'\a',
> '\b':r'\b',
> '\c':r'\c',
> '\f':r'\f',
> '\n':r'\n',
> '\r':r'\r',
> '\t':r'\t',
> '\v':r'\v',
> '\'':r'\'',
> #'\"':r'\"',
> '\0':r'\0',
> '\1':r'\1',
> '\2':r'\2',
> '\3':r'\3',
> '\4':r'\4',
> '\5':r'\5',
> '\6':r'\6',
> '\7':r'\a', #i have no idea
> '\8':r'\8',
> '\9':r'\9'}
>
> def raw(text):
> """Returns a raw string representation of text"""
> new_string=''
> for char in text:
> try:
> new_string+=escape_dict[char]
> #print "escaped"
> except KeyError:
> new_string+=char
> #print "keyerror"
> #print new_string
> return new_string
>
> # returns the normalized path to a file if it exists
> # returns None if it doesn't exist
> def normalize_path(path):
> #print "not normal: " + path
>
> # make sure it's not blank
> if(path == ""):
> return None
>
> # get rid of mistakenly escaped bytes
> path = raw(path)
> #print "step1: " + path
>
> # remove quotes
> path = path.replace('"', '')
> #print "step2: " + path
>
> #convert to lowercase
> lower = path.lower()
> #print "step3: " + lower
>
> # expand all the normally formed environ variables
> expanded = os.path.expandvars(lower)
> #print "step4: " + expanded
>
> # chop off \??\
> if expanded[:4] == "\\??\\":
> expanded = expanded[4:]
> #print "step5: " + expanded
>
> # strip a leading '/'
> if expanded[:1] == "\\":
> expanded = expanded[1:]
> #print "step7: " + expanded
>
> systemroot = os.environ['SYSTEMROOT']
>
> # sometimes systemroot won't have %
> r = re.compile('systemroot', re.IGNORECASE)
> expanded = r.sub(systemroot, expanded)
> #print "step8: " + expanded
>
> # prepend the %systemroot% if its missing
> if expanded[:8] == "system32" or "syswow64":
> expanded = os.path.join(systemroot, expanded)
> #print "step9: " + expanded
>
> stripped = remove_arguments(expanded.lower())
>
> # just in case you're running as LUA
> # this is a race condition but you can suck it
> if(stripped):
> if os.access(stripped, os.R_OK):
> return stripped
>
> return None
>
> def test_normalize():
> test1 = "\??\C:\WINDOWS\system32\Drivers\CVPNDRVA.sys"
> test2 = "C:\WINDOWS\system32\msdtc.exe"
> test3 = "%SystemRoot%\system32\svchost.exe -k netsvcs"
> test4 = "\SystemRoot\System32\drivers\vga.sys"
> test5 = "system32\DRIVERS\compbatt.sys"
> test6 = "C:\Program Files\ABC\DEC Windows Services\Client Services.exe"
> test7 = "c:\Program Files\Common Files\Symantec Shared\SNDSrvc.exe"
> test8 = "C:\WINDOWS\system32\svchost -k dcomlaunch"
> test9 = ""
> test10 = "SysWow64\drivers\AsIO.sys"
> test11 = "\SystemRoot\system32\DRIVERS\amdsbs.sys"
> test12 = "C:\windows\system32\xeuwhatever.sys" #this breaks everything
If I'm getting this right, what you try to do is to convert characters that
come from string-literal escape-codes to their literal representation. Why?
A simple
test12 = r"C:\windows\system32\xeuwhatever.sys"
is all you need - note the leading r. Then
test12[2] == "\\" # need escape on the right because of backslashes at end
of raw-string-literals rule.
holds.
Diez
More information about the Python-list
mailing list