Windows file paths, again

Dan Guido dguido at gmail.com
Wed Oct 21 15:20:05 EDT 2009


Hi Diez,

The source of the string literals is ConfigParser, so I can't just
mark them with an 'r'.

config = ConfigParser.RawConfigParser()
config.read(filename)
crazyfilepath = config.get(name, "ImagePath")
normalfilepath = normalize_path(crazyfilepath)

The ultimate origin of the strings is the _winreg function. Here I
also can't mark them with an 'r'.

regkey = OpenKey(HKEY_LOCAL_MACHINE,
"SYSTEM\\CurrentControlSet\\Services\\" + name)
crazyimagepath = QueryValueEx(regkey, "ImagePath")[0]
CloseKey(key)

--
Dan Guido



On Wed, Oct 21, 2009 at 2:34 PM, Diez B. Roggisch <deets at nospam.web.de> wrote:
> Dan Guido wrote:
>
>> I'm trying to write a few methods that normalize Windows file paths.
>> I've gotten it to work in 99% of the cases, but it seems like my code
>> still chokes on '\x'. I've pasted my code below, can someone help me
>> figure out a better way to write this? This seems overly complicated
>> for such a simple problem...
>>
>>
>> # returns normalized filepath with arguments removed
>> def remove_arguments(filepath):
>> #print "removing args from: " + filepath
>> (head, tail) = os.path.split(filepath)
>> pathext = os.environ['PATHEXT'].split(";")
>>
>> while(tail != ''):
>> #print "trying: " + os.path.join(head,tail)
>>
>> # does it just work?
>> if os.path.isfile(os.path.join(head, tail)):
>> #print "it just worked"
>> return os.path.join(head, tail)
>>
>> # try every extension
>> for ext in pathext:
>> if os.path.isfile(os.path.join(head, tail) + ext):
>> return os.path.join(head, tail) + ext
>>
>> # remove the last word, try again
>> tail = tail.split()[:-1]
>> tail = " ".join(tail)
>>
>> return None
>>
>> escape_dict={'\a':r'\a',
>>            '\b':r'\b',
>>            '\c':r'\c',
>>            '\f':r'\f',
>>            '\n':r'\n',
>>            '\r':r'\r',
>>            '\t':r'\t',
>>            '\v':r'\v',
>>            '\'':r'\'',
>>            #'\"':r'\"',
>>            '\0':r'\0',
>>            '\1':r'\1',
>>            '\2':r'\2',
>>            '\3':r'\3',
>>            '\4':r'\4',
>>            '\5':r'\5',
>>            '\6':r'\6',
>>            '\7':r'\a', #i have no idea
>>            '\8':r'\8',
>>            '\9':r'\9'}
>>
>> def raw(text):
>> """Returns a raw string representation of text"""
>> new_string=''
>> for char in text:
>> try:
>> new_string+=escape_dict[char]
>> #print "escaped"
>> except KeyError:
>> new_string+=char
>> #print "keyerror"
>> #print new_string
>> return new_string
>>
>> # returns the normalized path to a file if it exists
>> # returns None if it doesn't exist
>> def normalize_path(path):
>> #print "not normal: " + path
>>
>> # make sure it's not blank
>> if(path == ""):
>> return None
>>
>> # get rid of mistakenly escaped bytes
>> path = raw(path)
>> #print "step1: " + path
>>
>> # remove quotes
>> path = path.replace('"', '')
>> #print "step2: " + path
>>
>> #convert to lowercase
>> lower = path.lower()
>> #print "step3: " + lower
>>
>> # expand all the normally formed environ variables
>> expanded = os.path.expandvars(lower)
>> #print "step4: " + expanded
>>
>> # chop off \??\
>> if expanded[:4] == "\\??\\":
>> expanded = expanded[4:]
>> #print "step5: " + expanded
>>
>> # strip a leading '/'
>> if expanded[:1] == "\\":
>> expanded = expanded[1:]
>> #print "step7: " + expanded
>>
>> systemroot = os.environ['SYSTEMROOT']
>>
>> # sometimes systemroot won't have %
>> r = re.compile('systemroot', re.IGNORECASE)
>> expanded = r.sub(systemroot, expanded)
>> #print "step8: " + expanded
>>
>> # prepend the %systemroot% if its missing
>> if expanded[:8] == "system32" or "syswow64":
>> expanded = os.path.join(systemroot, expanded)
>> #print "step9: " + expanded
>>
>> stripped = remove_arguments(expanded.lower())
>>
>> # just in case you're running as LUA
>> # this is a race condition but you can suck it
>> if(stripped):
>> if os.access(stripped, os.R_OK):
>> return stripped
>>
>> return None
>>
>> def test_normalize():
>> test1 = "\??\C:\WINDOWS\system32\Drivers\CVPNDRVA.sys"
>> test2 = "C:\WINDOWS\system32\msdtc.exe"
>> test3 = "%SystemRoot%\system32\svchost.exe -k netsvcs"
>> test4 = "\SystemRoot\System32\drivers\vga.sys"
>> test5 = "system32\DRIVERS\compbatt.sys"
>> test6 = "C:\Program Files\ABC\DEC Windows Services\Client Services.exe"
>> test7 = "c:\Program Files\Common Files\Symantec Shared\SNDSrvc.exe"
>> test8 = "C:\WINDOWS\system32\svchost -k dcomlaunch"
>> test9 = ""
>> test10 = "SysWow64\drivers\AsIO.sys"
>> test11 = "\SystemRoot\system32\DRIVERS\amdsbs.sys"
>> test12 = "C:\windows\system32\xeuwhatever.sys" #this breaks everything
>
> If I'm getting this right, what you try to do is to convert characters that
> come from string-literal escape-codes to their literal representation. Why?
>
> A simple
>
>  test12 = r"C:\windows\system32\xeuwhatever.sys"
>
> is all you need - note the leading r. Then
>
>  test12[2] == "\\" # need escape on the right because of backslashes at end
> of raw-string-literals rule.
>
> holds.
>
> Diez
> --
> http://mail.python.org/mailman/listinfo/python-list
>



More information about the Python-list mailing list