[Python-checkins] python/dist/src/Lib/test cjkencodings_test.py, NONE, 1.1 test_codecencodings_cn.py, NONE, 1.1 test_codecencodings_jp.py, NONE, 1.1 test_codecencodings_kr.py, NONE, 1.1 test_codecencodings_tw.py, NONE, 1.1 test_codecmaps_cn.py, NONE, 1.1 test_codecmaps_jp.py, NONE, 1.1 test_codecmaps_kr.py, NONE, 1.1 test_codecmaps_tw.py, NONE, 1.1 test_multibytecodec.py, NONE, 1.1 test_multibytecodec_support.py, NONE, 1.1 regrtest.py, 1.150, 1.151

perky at users.sourceforge.net perky at users.sourceforge.net
Sat Jan 17 09:29:30 EST 2004


Update of /cvsroot/python/python/dist/src/Lib/test
In directory sc8-pr-cvs1:/tmp/cvs-serv14239/Lib/test

Modified Files:
	regrtest.py 
Added Files:
	cjkencodings_test.py test_codecencodings_cn.py 
	test_codecencodings_jp.py test_codecencodings_kr.py 
	test_codecencodings_tw.py test_codecmaps_cn.py 
	test_codecmaps_jp.py test_codecmaps_kr.py test_codecmaps_tw.py 
	test_multibytecodec.py test_multibytecodec_support.py 
Log Message:
Add CJK codecs support as discussed on python-dev. (SF #873597)

Several style fixes are suggested by Martin v. Loewis and
Marc-Andre Lemburg. Thanks!


--- NEW FILE: cjkencodings_test.py ---
teststring = {
'big5': (
"\xa6\x70\xa6\xf3\xa6\x62\x20\x50\x79\x74\x68\x6f\x6e\x20\xa4\xa4"
"\xa8\xcf\xa5\xce\xac\x4a\xa6\xb3\xaa\xba\x20\x43\x20\x6c\x69\x62"
"\x72\x61\x72\x79\x3f\x0a\xa1\x40\xa6\x62\xb8\xea\xb0\x54\xac\xec"
"\xa7\xde\xa7\xd6\xb3\x74\xb5\x6f\xae\x69\xaa\xba\xa4\xb5\xa4\xd1"
"\x2c\x20\xb6\x7d\xb5\x6f\xa4\xce\xb4\xfa\xb8\xd5\xb3\x6e\xc5\xe9"
"\xaa\xba\xb3\x74\xab\xd7\xac\x4f\xa4\xa3\xae\x65\xa9\xbf\xb5\xf8"
"\xaa\xba\x0a\xbd\xd2\xc3\x44\x2e\x20\xac\xb0\xa5\x5b\xa7\xd6\xb6"
"\x7d\xb5\x6f\xa4\xce\xb4\xfa\xb8\xd5\xaa\xba\xb3\x74\xab\xd7\x2c"
"\x20\xa7\xda\xad\xcc\xab\x4b\xb1\x60\xa7\xc6\xb1\xe6\xaf\xe0\xa7"
"\x51\xa5\xce\xa4\x40\xa8\xc7\xa4\x77\xb6\x7d\xb5\x6f\xa6\x6e\xaa"
"\xba\x0a\x6c\x69\x62\x72\x61\x72\x79\x2c\x20\xa8\xc3\xa6\xb3\xa4"
"\x40\xad\xd3\x20\x66\x61\x73\x74\x20\x70\x72\x6f\x74\x6f\x74\x79"
"\x70\x69\x6e\x67\x20\xaa\xba\x20\x70\x72\x6f\x67\x72\x61\x6d\x6d"
"\x69\x6e\x67\x20\x6c\x61\x6e\x67\x75\x61\x67\x65\x20\xa5\x69\x0a"
"\xa8\xd1\xa8\xcf\xa5\xce\x2e\x20\xa5\xd8\xab\x65\xa6\xb3\xb3\x5c"
"\xb3\x5c\xa6\x68\xa6\x68\xaa\xba\x20\x6c\x69\x62\x72\x61\x72\x79"
"\x20\xac\x4f\xa5\x48\x20\x43\x20\xbc\x67\xa6\xa8\x2c\x20\xa6\xd3"
[...962 lines suppressed...]
"\x20\xe3\x81\xa7\xe3\x81\xaf\xe3\x81\x9d\xe3\x81\x86\xe3\x81\x84"
"\xe3\x81\xa3\xe3\x81\x9f\xe5\xb0\x8f\xe7\xb4\xb0\xe5\xb7\xa5\xe3"
"\x81\x8c\xe8\xbf\xbd\xe5\x8a\xa0\xe3\x81\x95\xe3\x82\x8c\xe3\x82"
"\x8b\xe3\x81\x93\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\x82\xe3\x81\xbe"
"\xe3\x82\x8a\xe3\x81\x82\xe3\x82\x8a\xe3\x81\xbe\xe3\x81\x9b\xe3"
"\x82\x93\xe3\x80\x82\x0a\xe8\xa8\x80\xe8\xaa\x9e\xe8\x87\xaa\xe4"
"\xbd\x93\xe3\x81\xae\xe6\xa9\x9f\xe8\x83\xbd\xe3\x81\xaf\xe6\x9c"
"\x80\xe5\xb0\x8f\xe9\x99\x90\xe3\x81\xab\xe6\x8a\xbc\xe3\x81\x95"
"\xe3\x81\x88\xe3\x80\x81\xe5\xbf\x85\xe8\xa6\x81\xe3\x81\xaa\xe6"
"\xa9\x9f\xe8\x83\xbd\xe3\x81\xaf\xe6\x8b\xa1\xe5\xbc\xb5\xe3\x83"
"\xa2\xe3\x82\xb8\xe3\x83\xa5\xe3\x83\xbc\xe3\x83\xab\xe3\x81\xa8"
"\xe3\x81\x97\xe3\x81\xa6\xe8\xbf\xbd\xe5\x8a\xa0\xe3\x81\x99\xe3"
"\x82\x8b\xe3\x80\x81\xe3\x81\xa8\xe3\x81\x84\xe3\x81\x86\xe3\x81"
"\xae\xe3\x81\x8c\x20\x50\x79\x74\x68\x6f\x6e\x20\xe3\x81\xae\xe3"
"\x83\x9d\xe3\x83\xaa\xe3\x82\xb7\xe3\x83\xbc\xe3\x81\xa7\xe3\x81"
"\x99\xe3\x80\x82\x0a\x0a\xe3\x83\x8e\xe3\x81\x8b\xe3\x82\x9a\x20"
"\xe3\x83\x88\xe3\x82\x9a\x20\xe3\x83\x88\xe3\x82\xad\xef\xa8\xb6"
"\xef\xa8\xb9\x20\xf0\xa1\x9a\xb4\xf0\xaa\x8e\x8c\x20\xe9\xba\x80"
"\xe9\xbd\x81\xf0\xa9\x9b\xb0\x0a"),
}

--- NEW FILE: test_codecencodings_cn.py ---
#!/usr/bin/env python
#
# test_codecencodings_cn.py
#   Codec encoding tests for PRC encodings.
#
# $CJKCodecs: test_codecencodings_cn.py,v 1.1 2003/12/19 03:00:05 perky Exp $

from test import test_support
from test import test_multibytecodec_support
import unittest

class Test_GB2312(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'gb2312'
    tstring = test_multibytecodec_support.load_teststring('gb2312')
    codectests = (
        # invalid bytes
        ("abc\x81\x81\xc1\xc4", "strict",  None),
        ("abc\xc8", "strict",  None),
        ("abc\x81\x81\xc1\xc4", "replace", u"abc\ufffd\u804a"),
        ("abc\x81\x81\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
        ("abc\x81\x81\xc1\xc4", "ignore",  u"abc\u804a"),
        ("\xc1\x64", "strict", None),
    )

class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'gbk'
    tstring = test_multibytecodec_support.load_teststring('gbk')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict",  None),  
        ("abc\xc8", "strict",  None),  
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\u804a"),
        ("\x83\x34\x83\x31", "strict", None),
    )

class Test_GB18030(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'gb18030'
    tstring = test_multibytecodec_support.load_teststring('gb18030')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict",  None),
        ("abc\xc8", "strict",  None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\u804a"),
        ("abc\x84\x39\x84\x39\xc1\xc4", "replace", u"abc\ufffd\u804a"),
    )
    has_iso10646 = True

def test_main():
    suite = unittest.TestSuite()
    suite.addTest(unittest.makeSuite(Test_GB2312))
    suite.addTest(unittest.makeSuite(Test_GBK))
    suite.addTest(unittest.makeSuite(Test_GB18030))
    test_support.run_suite(suite)

if __name__ == "__main__":
    test_main()


--- NEW FILE: test_codecencodings_jp.py ---
#!/usr/bin/env python
#
# test_codecencodings_jp.py
#   Codec encoding tests for Japanese encodings.
#
# $CJKCodecs: test_codecencodings_jp.py,v 1.2 2004/01/06 09:25:37 perky Exp $

from test import test_support
from test import test_multibytecodec_support
import unittest

class Test_CP932(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'cp932'
    tstring = test_multibytecodec_support.load_teststring('shift_jis')
    codectests = (
        # invalid bytes
        ("abc\x81\x00\x81\x00\x82\x84", "strict",  None),
        ("abc\xf8", "strict",  None),
        ("abc\x81\x00\x82\x84", "replace", u"abc\ufffd\uff44"),
        ("abc\x81\x00\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"),
        ("abc\x81\x00\x82\x84", "ignore",  u"abc\uff44"),
        # sjis vs cp932
        ("\\\x7e", "replace", u"\\\x7e"),
        ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\uff3c\u2225\uff0d"),
    )

class Test_EUC_JISX0213(test_multibytecodec_support.TestBase,
                        unittest.TestCase):
    encoding = 'euc_jisx0213'
    tstring = test_multibytecodec_support.load_teststring('euc_jisx0213')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict",  None),
        ("abc\xc8", "strict",  None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u7956"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u7956\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\u7956"),
        ("abc\x8f\x83\x83", "replace", u"abc\ufffd"),
        ("\xc1\x64", "strict", None),
        ("\xa1\xc0", "strict", u"\uff3c"),
    )
    xmlcharnametest = (
        u"\xab\u211c\xbb = \u2329\u1234\u232a",
        "\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
    )

eucjp_commontests = (
    ("abc\x80\x80\xc1\xc4", "strict",  None),
    ("abc\xc8", "strict",  None),
    ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u7956"),
    ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u7956\ufffd"),
    ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\u7956"),
    ("abc\x8f\x83\x83", "replace", u"abc\ufffd"),
    ("\xc1\x64", "strict", None),
)

class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
                         unittest.TestCase):
    encoding = 'euc_jp'
    tstring = test_multibytecodec_support.load_teststring('euc_jp')
    codectests = eucjp_commontests + (
        ("\xa1\xc0\\", "strict", u"\uff3c\\"),
        (u"\xa5", "strict", "\x5c"),
        (u"\u203e", "strict", "\x7e"),
    )

class Test_EUC_JP_STRICT(test_multibytecodec_support.TestBase,
                         unittest.TestCase):
    encoding = 'euc_jp_strict'
    tstring = test_multibytecodec_support.load_teststring('euc_jp')
    codectests = eucjp_commontests + (
        ("\xa1\xc0\\", "strict", u"\\\\"),
        (u"\xa5", "strict", None),
        (u"\u203e", "strict", None),
    )

shiftjis_commonenctests = (
    ("abc\x80\x80\x82\x84", "strict",  None),
    ("abc\xf8", "strict",  None),
    ("abc\x80\x80\x82\x84", "replace", u"abc\ufffd\uff44"),
    ("abc\x80\x80\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"),
    ("abc\x80\x80\x82\x84def", "ignore",  u"abc\uff44def"),
)

class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'shift_jis'
    tstring = test_multibytecodec_support.load_teststring('shift_jis')
    codectests = shiftjis_commonenctests + (
        ("\\\x7e", "strict", u"\\\x7e"),
        ("\x81\x5f\x81\x61\x81\x7c", "strict", u"\uff3c\u2016\u2212"),
    )

class Test_SJIS_STRICT(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'shift_jis_strict'
    tstring = test_multibytecodec_support.load_teststring('shift_jis')
    codectests = shiftjis_commonenctests + (
        ("\\\x7e", "replace", u"\xa5\u203e"),
        ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\x5c\u2016\u2212"),
    )

class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'shift_jisx0213'
    tstring = test_multibytecodec_support.load_teststring('shift_jisx0213')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\x82\x84", "strict",  None),
        ("abc\xf8", "strict",  None),
        ("abc\x80\x80\x82\x84", "replace", u"abc\ufffd\uff44"),
        ("abc\x80\x80\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"),
        ("abc\x80\x80\x82\x84def", "ignore",  u"abc\uff44def"),
        # sjis vs cp932
        ("\\\x7e", "replace", u"\xa5\u203e"),
        ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\x5c\u2016\u2212"),
    )
    xmlcharnametest = (
        u"\xab\u211c\xbb = \u2329\u1234\u232a",
        "\x85Gℜ\x85Q = ⟨ሴ⟩"
    )

def test_main():
    suite = unittest.TestSuite()
    suite.addTest(unittest.makeSuite(Test_CP932))
    suite.addTest(unittest.makeSuite(Test_EUC_JISX0213))
    suite.addTest(unittest.makeSuite(Test_EUC_JP_COMPAT))
    suite.addTest(unittest.makeSuite(Test_SJIS_COMPAT))
    if test_multibytecodec_support.__cjkcodecs__:
        suite.addTest(unittest.makeSuite(Test_EUC_JP_STRICT))
        suite.addTest(unittest.makeSuite(Test_SJIS_STRICT))
    suite.addTest(unittest.makeSuite(Test_SJISX0213))
    test_support.run_suite(suite)

if __name__ == "__main__":
    test_main()


--- NEW FILE: test_codecencodings_kr.py ---
#!/usr/bin/env python
#
# test_codecencodings_kr.py
#   Codec encoding tests for ROK encodings.
#
# $CJKCodecs: test_codecencodings_kr.py,v 1.1 2003/12/19 03:00:06 perky Exp $

from test import test_support
from test import test_multibytecodec_support
import unittest

class Test_CP949(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'cp949'
    tstring = test_multibytecodec_support.load_teststring('cp949')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict",  None),
        ("abc\xc8", "strict",  None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\uc894"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\uc894\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\uc894"),
    )

class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'euc_kr'
    tstring = test_multibytecodec_support.load_teststring('euc_kr')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict",  None),
        ("abc\xc8", "strict",  None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\uc894"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\uc894\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\uc894"),
    )

class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'johab'
    tstring = test_multibytecodec_support.load_teststring('johab')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict",  None),
        ("abc\xc8", "strict",  None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\ucd27"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\ucd27\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\ucd27"),
    )

def test_main():
    suite = unittest.TestSuite()
    suite.addTest(unittest.makeSuite(Test_CP949))
    suite.addTest(unittest.makeSuite(Test_EUCKR))
    suite.addTest(unittest.makeSuite(Test_JOHAB))
    test_support.run_suite(suite)

if __name__ == "__main__":
    test_main()


--- NEW FILE: test_codecencodings_tw.py ---
#!/usr/bin/env python
#
# test_codecencodings_tw.py
#   Codec encoding tests for ROC encodings.
#
# $CJKCodecs: test_codecencodings_tw.py,v 1.1 2003/12/19 03:00:06 perky Exp $

from test import test_support
from test import test_multibytecodec_support
import unittest

class Test_Big5(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'big5'
    tstring = test_multibytecodec_support.load_teststring('big5')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict",  None),
        ("abc\xc8", "strict",  None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u8b10"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u8b10\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\u8b10"),
    )

def test_main():
    suite = unittest.TestSuite()
    suite.addTest(unittest.makeSuite(Test_Big5))
    test_support.run_suite(suite)

if __name__ == "__main__":
    test_main()


--- NEW FILE: test_codecmaps_cn.py ---
#!/usr/bin/env python
#
# test_codecmaps_cn.py
#   Codec mapping tests for PRC encodings
#
# $CJKCodecs: test_codecmaps_cn.py,v 1.2 2004/01/17 12:47:19 perky Exp $

from test import test_support
from test import test_multibytecodec_support
import unittest

class TestGB2312Map(test_multibytecodec_support.TestBase_Mapping,
                   unittest.TestCase):
    encoding = 'gb2312'
    mapfilename = 'EUC-CN.TXT'
    mapfileurl = 'http://people.freebsd.org/~perky/i18n/EUC-CN.TXT'

class TestGBKMap(test_multibytecodec_support.TestBase_Mapping,
                   unittest.TestCase):
    encoding = 'gbk'
    mapfilename = 'CP936.TXT'
    mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/' \
                 'MICSFT/WINDOWS/CP936.TXT'

def test_main():
    suite = unittest.TestSuite()
    suite.addTest(unittest.makeSuite(TestGB2312Map))
    suite.addTest(unittest.makeSuite(TestGBKMap))
    test_support.run_suite(suite)

test_multibytecodec_support.register_skip_expected(TestGB2312Map, TestGBKMap)
if __name__ == "__main__":
    test_main()


--- NEW FILE: test_codecmaps_jp.py ---
#!/usr/bin/env python
#
# test_codecmaps_jp.py
#   Codec mapping tests for Japanese encodings
#
# $CJKCodecs: test_codecmaps_jp.py,v 1.2 2004/01/17 12:47:19 perky Exp $

from test import test_support
from test import test_multibytecodec_support
import unittest

class TestCP932Map(test_multibytecodec_support.TestBase_Mapping,
                   unittest.TestCase):
    encoding = 'cp932'
    mapfilename = 'CP932.TXT'
    mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/' \
                 'WINDOWS/CP932.TXT'
    supmaps = [
        ('\x80', u'\u0080'),
        ('\xa0', u'\uf8f0'),
        ('\xfd', u'\uf8f1'),
        ('\xfe', u'\uf8f2'),
        ('\xff', u'\uf8f3'),
    ]
    for i in range(0xa1, 0xe0):
        supmaps.append((chr(i), unichr(i+0xfec0)))


class TestEUCJPCOMPATMap(test_multibytecodec_support.TestBase_Mapping,
                         unittest.TestCase):
    encoding = 'euc_jp'
    mapfilename = 'EUC-JP.TXT'
    mapfileurl = 'http://people.freebsd.org/~perky/i18n/EUC-JP.TXT'


class TestSJISCOMPATMap(test_multibytecodec_support.TestBase_Mapping,
                        unittest.TestCase):
    encoding = 'shift_jis'
    mapfilename = 'SHIFTJIS.TXT'
    mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE' \
                 '/EASTASIA/JIS/SHIFTJIS.TXT'
    pass_enctest = [
        ('\x81_', u'\\'),
    ]
    pass_dectest = [
        ('\\', u'\xa5'),
        ('~', u'\u203e'),
        ('\x81_', u'\\'),
    ]


class TestSJISSTRICTMap(test_multibytecodec_support.TestBase_Mapping,
                        unittest.TestCase):
    encoding = 'shift_jis_strict'
    mapfilename = 'SHIFTJIS.TXT'
    mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE' \
                 '/EASTASIA/JIS/SHIFTJIS.TXT'


class TestEUCJISX0213Map(test_multibytecodec_support.TestBase_Mapping,
                         unittest.TestCase):
    encoding = 'euc_jisx0213'
    mapfilename = 'EUC-JISX0213.TXT'
    mapfileurl = 'http://people.freebsd.org/~perky/i18n/EUC-JISX0213.TXT'


class TestSJISX0213Map(test_multibytecodec_support.TestBase_Mapping,
                       unittest.TestCase):
    encoding = 'shift_jisx0213'
    mapfilename = 'SHIFT_JISX0213.TXT'
    mapfileurl = 'http://people.freebsd.org/~perky/i18n/SHIFT_JISX0213.TXT'


def test_main():
    suite = unittest.TestSuite()
    suite.addTest(unittest.makeSuite(TestCP932Map))
    suite.addTest(unittest.makeSuite(TestEUCJPCOMPATMap))
    suite.addTest(unittest.makeSuite(TestSJISCOMPATMap))
    if test_multibytecodec_support.__cjkcodecs__:
        suite.addTest(unittest.makeSuite(TestSJISSTRICTMap))
    suite.addTest(unittest.makeSuite(TestEUCJISX0213Map))
    suite.addTest(unittest.makeSuite(TestSJISX0213Map))
    test_support.run_suite(suite)

test_multibytecodec_support.register_skip_expected(TestCP932Map,
    TestEUCJPCOMPATMap, TestSJISCOMPATMap, TestEUCJISX0213Map,
    TestSJISX0213Map)
if __name__ == "__main__":
    test_main()


--- NEW FILE: test_codecmaps_kr.py ---
#!/usr/bin/env python
#
# test_codecmaps_kr.py
#   Codec mapping tests for ROK encodings
#
# $CJKCodecs: test_codecmaps_kr.py,v 1.2 2004/01/17 12:47:19 perky Exp $

from test import test_support
from test import test_multibytecodec_support
import unittest

class TestCP949Map(test_multibytecodec_support.TestBase_Mapping,
                   unittest.TestCase):
    encoding = 'cp949'
    mapfilename = 'CP949.TXT'
    mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT' \
                 '/WINDOWS/CP949.TXT'


class TestEUCKRMap(test_multibytecodec_support.TestBase_Mapping,
                   unittest.TestCase):
    encoding = 'euc_kr'
    mapfilename = 'EUC-KR.TXT'
    mapfileurl = 'http://people.freebsd.org/~perky/i18n/EUC-KR.TXT'


class TestJOHABMap(test_multibytecodec_support.TestBase_Mapping,
                   unittest.TestCase):
    encoding = 'johab'
    mapfilename = 'JOHAB.TXT'
    mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/' \
                 'KSC/JOHAB.TXT'
    # KS X 1001 standard assigned 0x5c as WON SIGN.
    # but, in early 90s that is the only era used johab widely,
    # the most softwares implements it as REVERSE SOLIDUS.
    # So, we ignore the standard here.
    pass_enctest = [('\\', u'\u20a9')]
    pass_dectest = [('\\', u'\u20a9')]

def test_main():
    suite = unittest.TestSuite()
    suite.addTest(unittest.makeSuite(TestCP949Map))
    suite.addTest(unittest.makeSuite(TestEUCKRMap))
    suite.addTest(unittest.makeSuite(TestJOHABMap))
    test_support.run_suite(suite)

test_multibytecodec_support.register_skip_expected(TestCP949Map,
    TestEUCKRMap, TestJOHABMap)
if __name__ == "__main__":
    test_main()


--- NEW FILE: test_codecmaps_tw.py ---
#!/usr/bin/env python
#
# test_codecmaps_tw.py
#   Codec mapping tests for ROC encodings
#
# $CJKCodecs: test_codecmaps_tw.py,v 1.2 2004/01/17 12:47:19 perky Exp $

from test import test_support
from test import test_multibytecodec_support
import unittest

class TestBIG5Map(test_multibytecodec_support.TestBase_Mapping,
                  unittest.TestCase):
    encoding = 'big5'
    mapfilename = 'BIG5.TXT'
    mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE/' \
                 'EASTASIA/OTHER/BIG5.TXT'

class TestCP950Map(test_multibytecodec_support.TestBase_Mapping,
                   unittest.TestCase):
    encoding = 'cp950'
    mapfilename = 'CP950.TXT'
    mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/' \
                 'WINDOWS/CP950.TXT'
    pass_enctest = [
        ('\xa2\xcc', u'\u5341'),
        ('\xa2\xce', u'\u5345'),
    ]

def test_main():
    suite = unittest.TestSuite()
    suite.addTest(unittest.makeSuite(TestBIG5Map))
    suite.addTest(unittest.makeSuite(TestCP950Map))
    test_support.run_suite(suite)

test_multibytecodec_support.register_skip_expected(TestBIG5Map, TestCP950Map)
if __name__ == "__main__":
    test_main()


--- NEW FILE: test_multibytecodec.py ---
#!/usr/bin/env python
#
# test_multibytecodec.py
#   Unit test for multibytecodec itself
#
# $CJKCodecs: test_multibytecodec.py,v 1.5 2004/01/06 02:26:28 perky Exp $

from test import test_support
from test import test_multibytecodec_support
import unittest, StringIO, codecs

class Test_StreamWriter(unittest.TestCase):
    if len(u'\U00012345') == 2: # UCS2
        def test_gb18030(self):
            s= StringIO.StringIO()
            c = codecs.lookup('gb18030')[3](s)
            c.write(u'123')
            self.assertEqual(s.getvalue(), '123')
            c.write(u'\U00012345')
            self.assertEqual(s.getvalue(), '123\x907\x959')
            c.write(u'\U00012345'[0])
            self.assertEqual(s.getvalue(), '123\x907\x959')
            c.write(u'\U00012345'[1] + u'\U00012345' + u'\uac00\u00ac')
            self.assertEqual(s.getvalue(),
                    '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
            c.write(u'\U00012345'[0])
            self.assertEqual(s.getvalue(),
                    '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
            self.assertRaises(UnicodeError, c.reset)
            self.assertEqual(s.getvalue(),
                    '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')

        # standard utf-8 codecs has broken StreamReader
        if test_multibytecodec_support.__cjkcodecs__:
            def test_utf_8(self):
                s= StringIO.StringIO()
                c = codecs.lookup('utf-8')[3](s)
                c.write(u'123')
                self.assertEqual(s.getvalue(), '123')
                c.write(u'\U00012345')
                self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
                c.write(u'\U00012345'[0])
                self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
                c.write(u'\U00012345'[1] + u'\U00012345' + u'\uac00\u00ac')
                self.assertEqual(s.getvalue(),
                    '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
                    '\xea\xb0\x80\xc2\xac')
                c.write(u'\U00012345'[0])
                self.assertEqual(s.getvalue(),
                    '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
                    '\xea\xb0\x80\xc2\xac')
                c.reset()
                self.assertEqual(s.getvalue(),
                    '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
                    '\xea\xb0\x80\xc2\xac\xed\xa0\x88')
                c.write(u'\U00012345'[1])
                self.assertEqual(s.getvalue(),
                    '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
                    '\xea\xb0\x80\xc2\xac\xed\xa0\x88\xed\xbd\x85')

    else: # UCS4
        pass

    def test_nullcoding(self):
        self.assertEqual(''.decode('utf-8'), u'')
        self.assertEqual(unicode('', 'utf-8'), u'')
        self.assertEqual(u''.encode('utf-8'), '')

    def test_str_decode(self):
        self.assertEqual('abcd'.encode('utf-8'), 'abcd')

def test_main():
    suite = unittest.TestSuite()
    suite.addTest(unittest.makeSuite(Test_StreamWriter))
    test_support.run_suite(suite)

if __name__ == "__main__":
    test_main()


--- NEW FILE: test_multibytecodec_support.py ---
#!/usr/bin/env python
#
# test_multibytecodec_support.py
#   Common Unittest Routines for CJK codecs
#
# $CJKCodecs: test_multibytecodec_support.py,v 1.5 2004/01/17 12:47:19 perky Exp $

import sys, codecs, os.path
import unittest
from test import test_support
from StringIO import StringIO

__cjkcodecs__ = 0 # define this as 0 for python

class TestBase:
    encoding        = ''   # codec name
    codec           = None # codec tuple (with 4 elements)
    tstring         = ''   # string to test StreamReader

    codectests      = None # must set. codec test tuple
    roundtriptest   = 1    # set if roundtrip is possible with unicode
    has_iso10646    = 0    # set if this encoding contains whole iso10646 map
    xmlcharnametest = None # string to test xmlcharrefreplace

    def setUp(self):
        if self.codec is None:
            self.codec = codecs.lookup(self.encoding)
        self.encode, self.decode, self.reader, self.writer = self.codec

    def test_chunkcoding(self):
        for native, utf8 in zip(*[StringIO(f).readlines()
                                  for f in self.tstring]):
            u = self.decode(native)[0]
            self.assertEqual(u, utf8.decode('utf-8'))
            if self.roundtriptest:
                self.assertEqual(native, self.encode(u)[0])

    def test_errorhandle(self):
        for source, scheme, expected in self.codectests:
            if type(source) == type(''):
                func = self.decode
            else:
                func = self.encode
            if expected:
                result = func(source, scheme)[0]
                self.assertEqual(result, expected)
            else:
                self.assertRaises(UnicodeError, func, source, scheme)

    if sys.hexversion >= 0x02030000:
        def test_xmlcharrefreplace(self):
            if self.has_iso10646:
                return

            s = u"\u0b13\u0b23\u0b60 nd eggs"
            self.assertEqual(
                self.encode(s, "xmlcharrefreplace")[0],
                "ଓଣୠ nd eggs"
            )

        def test_customreplace(self):
            if self.has_iso10646:
                return

            import htmlentitydefs

            names = {}
            for (key, value) in htmlentitydefs.entitydefs.items():
                if len(value)==1:
                    names[value.decode('latin-1')] = self.decode(key)[0]
                else:
                    names[unichr(int(value[2:-1]))] = self.decode(key)[0]

            def xmlcharnamereplace(exc):
                if not isinstance(exc, UnicodeEncodeError):
                    raise TypeError("don't know how to handle %r" % exc)
                l = []
                for c in exc.object[exc.start:exc.end]:
                    try:
                        l.append(u"&%s;" % names[c])
                    except KeyError:
                        l.append(u"&#%d;" % ord(c))
                return (u"".join(l), exc.end)

            codecs.register_error(
                "test.xmlcharnamereplace", xmlcharnamereplace)

            if self.xmlcharnametest:
                sin, sout = self.xmlcharnametest
            else:
                sin = u"\xab\u211c\xbb = \u2329\u1234\u232a"
                sout = "«ℜ» = ⟨ሴ⟩"
            self.assertEqual(self.encode(sin,
                                        "test.xmlcharnamereplace")[0], sout)

    def test_streamreader(self):
        UTF8Writer = codecs.getwriter('utf-8')
        for name in ["read", "readline", "readlines"]:
            for sizehint in [None, -1] + range(1, 33) + \
                            [64, 128, 256, 512, 1024]:
                istream = self.reader(StringIO(self.tstring[0]))
                ostream = UTF8Writer(StringIO())
                func = getattr(istream, name)
                while 1:
                    data = func(sizehint)
                    if not data:
                        break
                    if name == "readlines":
                        ostream.writelines(data)
                    else:
                        ostream.write(data)

                self.assertEqual(ostream.getvalue(), self.tstring[1])

    def test_streamwriter(self):
        if __cjkcodecs__:
            readfuncs = ('read', 'readline', 'readlines')
        else:
            # standard utf8 codec has broken readline and readlines.
            readfuncs = ('read',)
        UTF8Reader = codecs.getreader('utf-8')
        for name in readfuncs:
            for sizehint in [None] + range(1, 33) + \
                            [64, 128, 256, 512, 1024]:
                istream = UTF8Reader(StringIO(self.tstring[1]))
                ostream = self.writer(StringIO())
                func = getattr(istream, name)
                while 1:
                    if sizehint is not None:
                        data = func(sizehint)
                    else:
                        data = func()

                    if not data:
                        break
                    if name == "readlines":
                        ostream.writelines(data)
                    else:
                        ostream.write(data)

                self.assertEqual(ostream.getvalue(), self.tstring[0])

if len(u'\U00012345') == 2: # ucs2 build
    _unichr = unichr
    def unichr(v):
        if v >= 0x10000:
            return _unichr(0xd800 + ((v - 0x10000) >> 10)) + \
                   _unichr(0xdc00 + ((v - 0x10000) & 0x3ff))
        else:
            return _unichr(v)
    _ord = ord
    def ord(c):
        if len(c) == 2:
            return 0x10000 + ((_ord(c[0]) - 0xd800) << 10) + \
                          (ord(c[1]) - 0xdc00)
        else:
            return _ord(c)

class TestBase_Mapping(unittest.TestCase):
    pass_enctest = []
    pass_dectest = []
    supmaps = []

    def __init__(self, *args, **kw):
        unittest.TestCase.__init__(self, *args, **kw)
        if not os.path.exists(self.mapfilename):
            raise test_support.TestSkipped('%s not found, download from %s' %
                    (self.mapfilename, self.mapfileurl))

    def test_mapping_file(self):
        unichrs = lambda s: u''.join(map(unichr, map(eval, s.split('+'))))
        urt_wa = {}

        for line in open(self.mapfilename):
            if not line:
                break
            data = line.split('#')[0].strip().split()
            if len(data) != 2:
                continue

            csetval = eval(data[0])
            if csetval <= 0x7F:
                csetch = chr(csetval & 0xff)
            elif csetval >= 0x1000000:
                csetch = chr(csetval >> 24) + chr((csetval >> 16) & 0xff) + \
                         chr((csetval >> 8) & 0xff) + chr(csetval & 0xff)
            elif csetval >= 0x10000:
                csetch = chr(csetval >> 16) + \
                         chr((csetval >> 8) & 0xff) + chr(csetval & 0xff)
            elif csetval >= 0x100:
                csetch = chr(csetval >> 8) + chr(csetval & 0xff)
            else:
                continue

            unich = unichrs(data[1])
            if ord(unich) == 0xfffd or urt_wa.has_key(unich):
                continue
            urt_wa[unich] = csetch

            self._testpoint(csetch, unich)

    def test_mapping_supplemental(self):
        for mapping in self.supmaps:
            self._testpoint(*mapping)

    def _testpoint(self, csetch, unich):
        if (csetch, unich) not in self.pass_enctest:
            self.assertEqual(unich.encode(self.encoding), csetch)
        if (csetch, unich) not in self.pass_dectest:
            self.assertEqual(unicode(csetch, self.encoding), unich)

def load_teststring(encoding):
    if __cjkcodecs__:
        etxt = open(os.path.join('sampletexts', encoding) + '.txt').read()
        utxt = open(os.path.join('sampletexts', encoding) + '.utf8').read()
        return (etxt, utxt)
    else:
        from test import cjkencodings_test
        return cjkencodings_test.teststring[encoding]

def register_skip_expected(*cases):
    for case in cases: # len(cases) must be 1 at least.
        for path in [os.path.curdir, os.path.pardir]:
            fn = os.path.join(path, case.mapfilename)
            if os.path.exists(fn):
                case.mapfilename = fn
                break
        else:
            sys.modules[case.__module__].skip_expected = True
            break
    else:
        sys.modules[case.__module__].skip_expected = False


Index: regrtest.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/test/regrtest.py,v
retrieving revision 1.150
retrieving revision 1.151
diff -C2 -d -r1.150 -r1.151
*** regrtest.py	20 Nov 2003 22:11:29 -0000	1.150
--- regrtest.py	17 Jan 2004 14:29:28 -0000	1.151
***************
*** 550,553 ****
--- 550,557 ----
  #         Controlled by test_timeout.skip_expected.  Requires the network
  #         resource and a socket module.
+ #     test_codecmaps_*
+ #         Whether a skip is expected here depends on whether a large test
+ #         input file has been downloaded.  test_codecmaps_*.skip_expected
+ #         controls that.
  
  _expectations = {
***************
*** 566,570 ****
          test_dbm
          test_dl
-         test_email_codecs
          test_fcntl
          test_fork1
--- 570,573 ----
***************
*** 599,603 ****
          test_curses
          test_dl
-         test_email_codecs
          test_gl
          test_imgfile
--- 602,605 ----
***************
*** 624,628 ****
          test_dbm
          test_dl
-         test_email_codecs
          test_fcntl
          test_fork1
--- 626,629 ----
***************
*** 779,783 ****
          test_curses
          test_dl
-         test_email_codecs
          test_gdbm
          test_gl
--- 780,783 ----
***************
*** 804,808 ****
          test_curses
          test_dbm
-         test_email_codecs
          test_gdbm
          test_gl
--- 804,807 ----
***************
*** 851,855 ****
          test_curses
          test_dl
-         test_email_codecs
          test_gdbm
          test_gl
--- 850,853 ----
***************
*** 877,881 ****
          test_curses
          test_dbm
-         test_email_codecs
          test_gl
          test_imgfile
--- 875,878 ----
***************
*** 902,906 ****
          test_curses
          test_dl
-         test_email_codecs
          test_gl
          test_imgfile
--- 899,902 ----
***************
*** 926,930 ****
          test_cd
          test_cl
-         test_email_codecs
          test_gl
          test_imgfile
--- 922,925 ----
***************
*** 956,959 ****
--- 951,956 ----
          from test import test_socket_ssl
          from test import test_timeout
+         from test import test_codecmaps_cn, test_codecmaps_jp
+         from test import test_codecmaps_kr, test_codecmaps_tw
  
          self.valid = False
***************
*** 974,977 ****
--- 971,978 ----
                  self.expected.add('test_timeout')
  
+             for cc in ('cn', 'jp', 'kr', 'tw'):
+                 if eval('test_codecmaps_' + cc).skip_expected:
+                     self.expected.add('test_codecmaps_' + cc)
+ 
              if not sys.platform in ("mac", "darwin"):
                  MAC_ONLY = ["test_macostools", "test_macfs", "test_aepack",





More information about the Python-checkins mailing list