Is all of this locale/encoding testing necessary (or even sufficient)?


```python
import json
import locale
import os


def get_default_encoding():
    """
    TODO XXX: ???
    """
    default_encoding = locale.getdefaultlocale()[1]
    if default_encoding.startswith("UTF-"):
        return default_encoding
    else:
        return "UTF-8"


def dumpf(obj, path, *args, **kwargs):
    with open(
        os.fspath(path),
        "w",
        encoding=kwargs.pop("encoding", get_default_encoding()),
    ) as file_:
        return json.dump(obj, file_, *args, **kwargs)


def loadf(path, *args, **kwargs):
    with open(
        os.fspath(path),
        "r",
        encoding=kwargs.pop("encoding", get_default_encoding()),
    ) as file_:
        return json.load(file_, *args, **kwargs)


import pathlib
import unittest


class TestJsonLoadfAndDumpf(unittest.TestCase):
    def setUp(self):
        self.locales = ["", "C", "en_US.UTF-8", "japanese"]
        self.encodings = [None, "UTF-8", "UTF-16", "UTF-32"]

        data = dict(
            obj=dict(a=dict(b=[1, 2, 3])),
            encoding=None,
            path=pathlib.Path(".") / "test_loadf_and_dumpf.json",
        )
        if os.path.isfile(data["path"]):
            os.unlink(data["path"])
        self.data = data

        self.previous_locale = locale.getlocale()

    def tearDown(self):
        locale.setlocale(locale.LC_ALL, self.previous_locale)

    def test_get_default_encoding(self):
        for localestr in self.locales:
            locale.setlocale(locale.LC_ALL, localestr)
            output = get_default_encoding()
            assert output.startswith("UTF-")

    def test_dumpf_and_loadf(self):
        data = self.data
        for localestr in self.locales:
            locale.setlocale(locale.LC_ALL, localestr)
            for encoding in self.encodings:
                dumpf_output = dumpf(
                    data["obj"], data["path"], encoding=encoding
                )
                loadf_output = loadf(data["path"], encoding=encoding)
                assert loadf_output == data["obj"]
```

On Wed, Sep 16, 2020 at 8:30 PM Christopher Barker <pythonchb@gmail.com> wrote:
On Wed, Sep 16, 2020 at 2:53 PM Wes Turner <wes.turner@gmail.com> wrote:
So I was not correct: dump does not default to UTF-8 (and does not accept an encoding= parameter)


I think dumpf() should use UTF-8, and that's it. If anyone really wants something else, they can get it by providing an open text file object.

Why would we impose UTF-8 when the spec says UTF-8, UTF-16, or UTF-32?

The idea was that the encoding was one of the motivators to doing this in the first place. But I suppose as long as utf-8 is the default, and only the three "official" ones are allowed, then yeah, we could add an encoding keyword argument.

-CHB


--
Christopher Barker, PhD

Python Language Consulting
  - Teaching
  - Scientific Software Development
  - Desktop GUI and Web Development
  - wxPython, numpy, scipy, Cython