Windows file paths, again

Diez B. Roggisch deets at nospam.web.de
Wed Oct 21 14:34:53 EDT 2009


Dan Guido wrote:

> I'm trying to write a few methods that normalize Windows file paths.
> I've gotten it to work in 99% of the cases, but it seems like my code
> still chokes on '\x'. I've pasted my code below, can someone help me
> figure out a better way to write this? This seems overly complicated
> for such a simple problem...
> 
> 
> # returns normalized filepath with arguments removed
> def remove_arguments(filepath):
> #print "removing args from: " + filepath
> (head, tail) = os.path.split(filepath)
> pathext = os.environ['PATHEXT'].split(";")
> 
> while(tail != ''):
> #print "trying: " + os.path.join(head,tail)
> 
> # does it just work?
> if os.path.isfile(os.path.join(head, tail)):
> #print "it just worked"
> return os.path.join(head, tail)
> 
> # try every extension
> for ext in pathext:
> if os.path.isfile(os.path.join(head, tail) + ext):
> return os.path.join(head, tail) + ext
> 
> # remove the last word, try again
> tail = tail.split()[:-1]
> tail = " ".join(tail)
> 
> return None
> 
> escape_dict={'\a':r'\a',
>            '\b':r'\b',
>            '\c':r'\c',
>            '\f':r'\f',
>            '\n':r'\n',
>            '\r':r'\r',
>            '\t':r'\t',
>            '\v':r'\v',
>            '\'':r'\'',
>            #'\"':r'\"',
>            '\0':r'\0',
>            '\1':r'\1',
>            '\2':r'\2',
>            '\3':r'\3',
>            '\4':r'\4',
>            '\5':r'\5',
>            '\6':r'\6',
>            '\7':r'\a', #i have no idea
>            '\8':r'\8',
>            '\9':r'\9'}
> 
> def raw(text):
> """Returns a raw string representation of text"""
> new_string=''
> for char in text:
> try:
> new_string+=escape_dict[char]
> #print "escaped"
> except KeyError:
> new_string+=char
> #print "keyerror"
> #print new_string
> return new_string
> 
> # returns the normalized path to a file if it exists
> # returns None if it doesn't exist
> def normalize_path(path):
> #print "not normal: " + path
> 
> # make sure it's not blank
> if(path == ""):
> return None
> 
> # get rid of mistakenly escaped bytes
> path = raw(path)
> #print "step1: " + path
> 
> # remove quotes
> path = path.replace('"', '')
> #print "step2: " + path
> 
> #convert to lowercase
> lower = path.lower()
> #print "step3: " + lower
> 
> # expand all the normally formed environ variables
> expanded = os.path.expandvars(lower)
> #print "step4: " + expanded
> 
> # chop off \??\
> if expanded[:4] == "\\??\\":
> expanded = expanded[4:]
> #print "step5: " + expanded
> 
> # strip a leading '/'
> if expanded[:1] == "\\":
> expanded = expanded[1:]
> #print "step7: " + expanded
> 
> systemroot = os.environ['SYSTEMROOT']
> 
> # sometimes systemroot won't have %
> r = re.compile('systemroot', re.IGNORECASE)
> expanded = r.sub(systemroot, expanded)
> #print "step8: " + expanded
> 
> # prepend the %systemroot% if its missing
> if expanded[:8] == "system32" or "syswow64":
> expanded = os.path.join(systemroot, expanded)
> #print "step9: " + expanded
> 
> stripped = remove_arguments(expanded.lower())
> 
> # just in case you're running as LUA
> # this is a race condition but you can suck it
> if(stripped):
> if os.access(stripped, os.R_OK):
> return stripped
> 
> return None
> 
> def test_normalize():
> test1 = "\??\C:\WINDOWS\system32\Drivers\CVPNDRVA.sys"
> test2 = "C:\WINDOWS\system32\msdtc.exe"
> test3 = "%SystemRoot%\system32\svchost.exe -k netsvcs"
> test4 = "\SystemRoot\System32\drivers\vga.sys"
> test5 = "system32\DRIVERS\compbatt.sys"
> test6 = "C:\Program Files\ABC\DEC Windows Services\Client Services.exe"
> test7 = "c:\Program Files\Common Files\Symantec Shared\SNDSrvc.exe"
> test8 = "C:\WINDOWS\system32\svchost -k dcomlaunch"
> test9 = ""
> test10 = "SysWow64\drivers\AsIO.sys"
> test11 = "\SystemRoot\system32\DRIVERS\amdsbs.sys"
> test12 = "C:\windows\system32\xeuwhatever.sys" #this breaks everything

If I'm getting this right, what you try to do is to convert characters that
come from string-literal escape-codes to their literal representation. Why?

A simple

  test12 = r"C:\windows\system32\xeuwhatever.sys"

is all you need - note the leading r. Then 

  test12[2] == "\\" # need escape on the right because of backslashes at end
of raw-string-literals rule.

holds.

Diez



More information about the Python-list mailing list