Changing filenames from Greeklish => Greek (subprocess complain)
Νικόλαος Κούρας
nikos.gr33k at gmail.com
Thu Jun 6 16:05:29 EDT 2013
Τη Πέμπτη, 6 Ιουνίου 2013 10:42:25 μ.μ. UTC+3, ο χρήστης MRAB έγραψε:
> On 06/06/2013 19:13, οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½ οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½
> wrote:
>
>
>
> οΏ½οΏ½ οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½, 6 οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½ 2013 3:50:52 οΏ½.οΏ½. UTC+3, οΏ½ οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½ MRAB οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½:
>
> > If you're happy for that change to happen, then go ahead.
>
> I have made some modifications to the code you provided me but i think something that doesnt accur to me needs fixing.
>
>
> for example i switched:
>
> # Give the path as a bytestring so that we'll get the filenames as bytestrings
> path = b"/home/nikos/public_html/data/apps/"
>
> # Walk through the files.
> for root, dirs, files in os.walk( path ):
> for filename in files:
>
> to:
>
> # Give the path as a bytestring so that we'll get the filenames as bytestrings
> path = os.listdir( b'/home/nikos/public_html/data/apps/' )
>
>
> os.listdir returns a list of the names of the objects in the given
> directory.
>
>
>
>
> # iterate over all filenames in the apps directory
>
>
> Exactly, all the names.
>
>
>
>
> for fullpath in path
> # Grabbing just the filename from path
>
>
> The name is a bytestring. Note, name, NOT full path.
>
>
>
> The following line will fail because the name is a bytestring,
> and you can't mix bytestrings with Unicode strings:
>
>
> filename = fullpath.replace( '/home/nikos/public_html/data/apps/', '' )
>
> οΏ½ οΏ½ οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½ ^ bytestringοΏ½οΏ½οΏ½
> οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½ ^ Unicode stringοΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½ οΏ½
> οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½οΏ½ ^ Unicode string
>
>
> I dont know if it has the same effect:
> Here is the the whole snippet:
>
>
> =============================================
> # Give the path as a bytestring so that we'll get the filenames as bytestrings
> path = os.listdir( b'/home/nikos/public_html/data/apps/' )
>
> # iterate over all filenames in the apps directory
> for fullpath in path
> # Grabbing just the filename from path
> filename = fullpath.replace( '/home/nikos/public_html/data/apps/', '' )
> try:
> # Is this name encoded in utf-8?
> filename.decode('utf-8')
> except UnicodeDecodeError:
> # Decoding from UTF-8 failed, which means that the name is not valid utf-8
>
> # It appears that this filename is encoded in greek-iso, so decode from that and re-encode to utf-8
> new_filename = filename.decode('iso-8859-7').encode('utf-8')
>
> # rename filename form greek bytestream-> utf-8 bytestream
> old_path = os.path.join(root, filename)
> new_path = os.path.join(root, new_filename)
> os.rename( old_path, new_path )
>
>
> #============================================================
> # Compute a set of current fullpaths
> path = os.listdir( '/home/nikos/public_html/data/apps/' )
>
> # Load'em
> for fullpath in path:
> try:
> # Check the presence of a file against the database and insert if it doesn't exist
> cur.execute('''SELECT url FROM files WHERE url = %s''', (fullpath,) )
> data = cur.fetchone() #URL is unique, so should only be one
>
> if not data:
> # First time for file; primary key is automatic, hit is defaulted
> cur.execute('''INSERT INTO files (url, host, lastvisit) VALUES (%s, %s, %s)''', (fullpath, host, lastvisit) )
> except pymysql.ProgrammingError as e:
> print( repr(e) )
> ==================================================================
>
> The error is:
> [Thu Jun 06 21:10:23 2013] [error] [client 79.103.41.173] File "files.py", line 64
> [Thu Jun 06 21:10:23 2013] [error] [client 79.103.41.173] for fullpath in path
> [Thu Jun 06 21:10:23 2013] [error] [client 79.103.41.173] ^
> [Thu Jun 06 21:10:23 2013] [error] [client 79.103.41.173] SyntaxError: invalid syntax
>
>
> Doesn't os.listdir( ...) returns a list with all filenames?
>
> But then again when replacing take place to shert the fullpath to just the filane i think it doesn't not work because the os.listdir was opened as bytestring and not as a string....
>
> What am i doing wrong?
>
>
> You're changing things without checking what they do!
Ah yes, it retruens filenames, not path/to/filenames
#========================================================
# Give the path as a bytestring so that we'll get the filenames as bytestrings
path = os.listdir( b'/home/nikos/public_html/data/apps/' )
# iterate over all filenames in the apps directory
for filename in path:
# Grabbing just the filename from path
try:
# Is this name encoded in utf-8?
filename.decode('utf-8')
except UnicodeDecodeError:
# Decoding from UTF-8 failed, which means that the name is not valid utf-8
# It appears that this filename is encoded in greek-iso, so decode from that and re-encode to utf-8
new_filename = filename.decode('iso-8859-7').encode('utf-8')
# rename filename form greek bytestream-> utf-8 bytestream
old_path = os.path.join(root, filename)
new_path = os.path.join(root, new_filename)
os.rename( old_path, new_path )
#========================================================
# Compute a set of current fullpaths
path = os.listdir( '/home/nikos/public_html/data/apps/' )
# Load'em
for filename in path:
try:
# Check the presence of a file against the database and insert if it doesn't exist
cur.execute('''SELECT url FROM files WHERE url = %s''', (filename,) )
data = cur.fetchone() #URL is unique, so should only be one
if not data:
# First time for file; primary key is automatic, hit is defaulted
cur.execute('''INSERT INTO files (url, host, lastvisit) VALUES (%s, %s, %s)''', (filename, host, lastvisit) )
except pymysql.ProgrammingError as e:
print( repr(e) )
# Delete spurious
cur.execute('''SELECT url FROM files''')
data = cur.fetchall()
for fullpath in data:
if fullpath not in "What should be written here in place of ditched set"
cur.execute('''DELETE FROM files WHERE url = %s''', (fullpath,) )
=============================
a) Is it correct that the first time i open os.listdir() as binary to grab the fileenames as bytestring and the 2nd normally to grab the filanems as unicode strings?
b) My spurious procedure is messed up now that i ditch the set fullpaths()
More information about the Python-list
mailing list