Alphabetical sorts
Leo Kislov
Leo.Kislov at gmail.com
Mon Oct 16 19:16:07 EDT 2006
On Oct 16, 2:39 pm, Tuomas <tuomas.vesteri... at pp.inet.fi> wrote:
> My application needs to handle different language sorts. Do you know a
> way to apply strxfrm dynamically i.e. without setting the locale?
Collation is almost always locale dependant. So you have to set locale.
One day I needed collation that worked on Windows and Linux. It's not
that polished and not that tested but it worked for me:
import locale, os, codecs
current_encoding = 'ascii'
current_locale = ''
def get_collate_encoding(s):
'''Grab character encoding from locale name'''
split_name = s.split('.')
if len(split_name) != 2:
return 'ascii'
encoding = split_name[1]
if os.name == "nt":
encoding = 'cp' + encoding
try:
codecs.lookup(encoding)
return encoding
except LookupError:
return 'ascii'
def setup_locale(locale_name):
'''Switch to new collation locale or do nothing if locale
is the same'''
global current_locale, current_encoding
if current_locale == locale_name:
return
current_encoding = get_collate_encoding(
locale.setlocale(locale.LC_COLLATE, locale_name))
current_locale = locale_name
def collate_key(s):
'''Return collation weight of a string'''
return locale.strxfrm(s.encode(current_encoding, 'ignore'))
def collate(lst, locale_name):
'''Sort a list of unicode strings according to locale rules.
Locale is specified as 2 letter code'''
setup_locale(locale_name)
return sorted(lst, key = collate_key)
words = u'c ch f'.split()
print ' '.join(collate(words, 'en'))
print ' '.join(collate(words, 'cz'))
Prints:
c ch f
c f ch
More information about the Python-list
mailing list