Something like this in the docstring?: "In order to support the historical JSON specification and closed ecosystem JSON, it is possible to specify an encoding other than UTF-8."

8.1.  Character Encoding
   JSON text exchanged between systems that are not part of a closed
   ecosystem MUST be encoded using UTF-8 [RFC3629].
   Previous specifications of JSON have not required the use of UTF-8
   when transmitting JSON text.  However, the vast majority of JSON-
   based software implementations have chosen to use the UTF-8 encoding,
   to the extent that it is the only encoding that achieves
   interoperability.
   Implementations MUST NOT add a byte order mark (U+FEFF) to the
   beginning of a networked-transmitted JSON text.  In the interests of
   interoperability, implementations that parse JSON texts MAY ignore
   the presence of a byte order mark rather than treating it as an
   error.

 

```python
import json
import os


def dumpf(obj, path, *, encoding="UTF-8", **kwargs):
    with open(os.fspath(path), "w", encoding=encoding) as f:
        return json.dump(obj, f, **kwargs)


def loadf(path, *, encoding="UTF-8", **kwargs):
    with open(os.fspath(path), "r", encoding=encoding) as f:
        return json.load(f, **kwargs)


import pathlib
import unittest


class TestJsonLoadfAndDumpf(unittest.TestCase):
    def setUp(self):
        self.encodings = [None, "UTF-8", "UTF-16", "UTF-32"]

        data = dict(
            obj=dict(a=dict(b=[1, 2, 3])),
            path=pathlib.Path(".") / "test_loadf_and_dumpf.json",
        )
        if os.path.isfile(data["path"]):
            os.unlink(data["path"])
        self.data = data

    def test_dumpf_and_loadf(self):
        data = self.data
        for encoding in self.encodings:
            path = f'{data["path"]}.{encoding}.json'
            dumpf_output = dumpf(data["obj"], path, encoding=encoding)
            loadf_output = loadf(path, encoding=encoding)
            assert loadf_output == data["obj"]


# $ pip install pytest-cov
# $ pytest -v example.py
# https://docs.pytest.org/en/stable/parametrize.html
# https://docs.pytest.org/en/stable/tmpdir.html

import pytest


@pytest.mark.parametrize("encoding", [None, "UTF-8", "UTF-16", "UTF-32"])
@pytest.mark.parametrize("obj", [dict(a=dict(b=[1, 2, 3]))])
def test_dumpf_and_loadf(obj, encoding, tmpdir):
    pth = pathlib.Path(tmpdir) / f"test_loadf_and_dumpf.{encoding}.json"
    dumpf_output = dumpf(obj, pth, encoding=encoding)
    loadf_output = loadf(pth, encoding=encoding)
    assert loadf_output == obj
```

For whoever creates a PR for this:

- [ ] add parameter and return type annotations
- [ ] copy docstrings from json.load/json.dump and open#encoding
- [ ] correctly support the c module implementation (this just does `import json`)?
- [ ] keep or drop the encoding tests?

On Thu, Sep 17, 2020 at 1:25 AM Christopher Barker <pythonchb@gmail.com> wrote:
Is that suggested code? I don't follow.

But if it is, no. personally, I think ANY use of system settings is a bad idea [*]. But certainly no need to even think about it for JSON.

-CHB

* have we not learned that in the age of the internet the machine the code happens to be running on has nothing to do with the user of the applications' needs? Timezones, encodings, number formats, NOTHING.


On Wed, Sep 16, 2020 at 8:45 PM Wes Turner <wes.turner@gmail.com> wrote:
Is all of this locale/encoding testing necessary (or even sufficient)?


```python
import json
import locale
import os


def get_default_encoding():
    """
    TODO XXX: ???
    """
    default_encoding = locale.getdefaultlocale()[1]
    if default_encoding.startswith("UTF-"):
        return default_encoding
    else:
        return "UTF-8"


def dumpf(obj, path, *args, **kwargs):
    with open(
        os.fspath(path),
        "w",
        encoding=kwargs.pop("encoding", get_default_encoding()),
    ) as file_:
        return json.dump(obj, file_, *args, **kwargs)


def loadf(path, *args, **kwargs):
    with open(
        os.fspath(path),
        "r",
        encoding=kwargs.pop("encoding", get_default_encoding()),
    ) as file_:
        return json.load(file_, *args, **kwargs)


import pathlib
import unittest


class TestJsonLoadfAndDumpf(unittest.TestCase):
    def setUp(self):
        self.locales = ["", "C", "en_US.UTF-8", "japanese"]
        self.encodings = [None, "UTF-8", "UTF-16", "UTF-32"]

        data = dict(
            obj=dict(a=dict(b=[1, 2, 3])),
            encoding=None,
            path=pathlib.Path(".") / "test_loadf_and_dumpf.json",
        )
        if os.path.isfile(data["path"]):
            os.unlink(data["path"])
        self.data = data

        self.previous_locale = locale.getlocale()

    def tearDown(self):
        locale.setlocale(locale.LC_ALL, self.previous_locale)

    def test_get_default_encoding(self):
        for localestr in self.locales:
            locale.setlocale(locale.LC_ALL, localestr)
            output = get_default_encoding()
            assert output.startswith("UTF-")

    def test_dumpf_and_loadf(self):
        data = self.data
        for localestr in self.locales:
            locale.setlocale(locale.LC_ALL, localestr)
            for encoding in self.encodings:
                dumpf_output = dumpf(
                    data["obj"], data["path"], encoding=encoding
                )
                loadf_output = loadf(data["path"], encoding=encoding)
                assert loadf_output == data["obj"]
```

On Wed, Sep 16, 2020 at 8:30 PM Christopher Barker <pythonchb@gmail.com> wrote:
On Wed, Sep 16, 2020 at 2:53 PM Wes Turner <wes.turner@gmail.com> wrote:
So I was not correct: dump does not default to UTF-8 (and does not accept an encoding= parameter)


I think dumpf() should use UTF-8, and that's it. If anyone really wants something else, they can get it by providing an open text file object.

Why would we impose UTF-8 when the spec says UTF-8, UTF-16, or UTF-32?

The idea was that the encoding was one of the motivators to doing this in the first place. But I suppose as long as utf-8 is the default, and only the three "official" ones are allowed, then yeah, we could add an encoding keyword argument.

-CHB


--
Christopher Barker, PhD

Python Language Consulting
  - Teaching
  - Scientific Software Development
  - Desktop GUI and Web Development
  - wxPython, numpy, scipy, Cython


--
Christopher Barker, PhD

Python Language Consulting
  - Teaching
  - Scientific Software Development
  - Desktop GUI and Web Development
  - wxPython, numpy, scipy, Cython