Something like this in the docstring?: "In order to support the historical JSON specification and closed ecosystem JSON, it is possible to specify an encoding other than UTF-8."

8.1. Character Encoding
JSON text exchanged between systems that are not part of a closed
ecosystem MUST be encoded using UTF-8 [RFC3629].
Previous specifications of JSON have not required the use of UTF-8
when transmitting JSON text. However, the vast majority of JSON-
based software implementations have chosen to use the UTF-8 encoding,
to the extent that it is the only encoding that achieves
interoperability.
Implementations MUST NOT add a byte order mark (U+FEFF) to the
beginning of a networked-transmitted JSON text. In the interests of
interoperability, implementations that parse JSON texts MAY ignore
the presence of a byte order mark rather than treating it as an
error.

```python

import json
import os

def dumpf(obj, path, *, encoding="UTF-8", **kwargs):
with open(os.fspath(path), "w", encoding=encoding) as f:
return json.dump(obj, f, **kwargs)

def loadf(path, *, encoding="UTF-8", **kwargs):
with open(os.fspath(path), "r", encoding=encoding) as f:
return json.load(f, **kwargs)

import pathlib
import unittest

class TestJsonLoadfAndDumpf(unittest.TestCase):
def setUp(self):
self.encodings = [None, "UTF-8", "UTF-16", "UTF-32"]

data = dict(
obj=dict(a=dict(b=[1, 2, 3])),
path=pathlib.Path(".") / "test_loadf_and_dumpf.json",
)
if os.path.isfile(data["path"]):
os.unlink(data["path"])
self.data = data

def test_dumpf_and_loadf(self):
data = self.data
for encoding in self.encodings:
path = f'{data["path"]}.{encoding}.json'
dumpf_output = dumpf(data["obj"], path, encoding=encoding)
loadf_output = loadf(path, encoding=encoding)
assert loadf_output == data["obj"]

# $ pip install pytest-cov
# $ pytest -v example.py
# https://docs.pytest.org/en/stable/parametrize.html
# https://docs.pytest.org/en/stable/tmpdir.html

import pytest

@pytest.mark.parametrize("encoding", [None, "UTF-8", "UTF-16", "UTF-32"])
@pytest.mark.parametrize("obj", [dict(a=dict(b=[1, 2, 3]))])
def test_dumpf_and_loadf(obj, encoding, tmpdir):
pth = pathlib.Path(tmpdir) / f"test_loadf_and_dumpf.{encoding}.json"
dumpf_output = dumpf(obj, pth, encoding=encoding)
loadf_output = loadf(pth, encoding=encoding)
assert loadf_output == obj

```

For whoever creates a PR for this:

- [ ] add parameter and return type annotations

- [ ] copy docstrings from json.load/json.dump and open#encoding

- [ ] correctly support the c module implementation (this just does `import json`)?

- [ ] keep or drop the encoding tests?

On Thu, Sep 17, 2020 at 1:25 AM Christopher Barker <pythonchb@gmail.com> wrote:

Is that suggested code? I don't follow.

But if it is, no. personally, I think ANY use of system settings is a bad idea [*]. But certainly no need to even think about it for JSON.

-CHB

* have we not learned that in the age of the internet the machine the code happens to be running on has nothing to do with the user of the applications' needs? Timezones, encodings, number formats, NOTHING.

On Wed, Sep 16, 2020 at 8:45 PM Wes Turner <wes.turner@gmail.com> wrote:
Is all of this locale/encoding testing necessary (or even sufficient)?

```python
import json
import locale
import os

def get_default_encoding():
"""
TODO XXX: ???
"""
default_encoding = locale.getdefaultlocale()[1]
if default_encoding.startswith("UTF-"):
return default_encoding
else:
return "UTF-8"

def dumpf(obj, path, *args, **kwargs):
with open(
os.fspath(path),
"w",
encoding=kwargs.pop("encoding", get_default_encoding()),
) as file_:
return json.dump(obj, file_, *args, **kwargs)

def loadf(path, *args, **kwargs):
with open(
os.fspath(path),
"r",
encoding=kwargs.pop("encoding", get_default_encoding()),
) as file_:
return json.load(file_, *args, **kwargs)

import pathlib
import unittest

class TestJsonLoadfAndDumpf(unittest.TestCase):
def setUp(self):
self.locales = ["", "C", "en_US.UTF-8", "japanese"]
self.encodings = [None, "UTF-8", "UTF-16", "UTF-32"]

data = dict(
obj=dict(a=dict(b=[1, 2, 3])),
encoding=None,
path=pathlib.Path(".") / "test_loadf_and_dumpf.json",
)
if os.path.isfile(data["path"]):
os.unlink(data["path"])
self.data = data

self.previous_locale = locale.getlocale()

def tearDown(self):
locale.setlocale(locale.LC_ALL, self.previous_locale)

def test_get_default_encoding(self):
for localestr in self.locales:
locale.setlocale(locale.LC_ALL, localestr)
output = get_default_encoding()
assert output.startswith("UTF-")

def test_dumpf_and_loadf(self):
data = self.data
for localestr in self.locales:
locale.setlocale(locale.LC_ALL, localestr)
for encoding in self.encodings:
dumpf_output = dumpf(
data["obj"], data["path"], encoding=encoding
)
loadf_output = loadf(data["path"], encoding=encoding)
assert loadf_output == data["obj"]
```

On Wed, Sep 16, 2020 at 8:30 PM Christopher Barker <pythonchb@gmail.com> wrote:
On Wed, Sep 16, 2020 at 2:53 PM Wes Turner <wes.turner@gmail.com> wrote:
So I was not correct: dump does not default to UTF-8 (and does not accept an encoding= parameter)

I think dumpf() should use UTF-8, and that's it. If anyone really wants something else, they can get it by providing an open text file object.

Why would we impose UTF-8 when the spec says UTF-8, UTF-16, or UTF-32?

The idea was that the encoding was one of the motivators to doing this in the first place. But I suppose as long as utf-8 is the default, and only the three "official" ones are allowed, then yeah, we could add an encoding keyword argument.

-CHB

--
Christopher Barker, PhD

Python Language Consulting
- Teaching
- Scientific Software Development
- Desktop GUI and Web Development
- wxPython, numpy, scipy, Cython

--
Christopher Barker, PhD

Python Language Consulting
- Teaching
- Scientific Software Development
- Desktop GUI and Web Development
- wxPython, numpy, scipy, Cython