From 1f892cb9b51733c335af4d4119a1fbee9d11aa80 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 8 Apr 2026 14:16:22 +0200 Subject: [PATCH 1/3] gh-148241: Fix json serialization for str subclasses Fix json serialization: no longer call str(obj) on str subclasses. Replace PyUnicodeWriter_WriteStr() with PyUnicodeWriter_WriteASCII() and private _PyUnicodeWriter_WriteStr(). --- Lib/test/test_json/test_dump.py | 38 +++++++++++++++++++ .../test_json/test_encode_basestring_ascii.py | 7 ++++ ...-04-08-14-19-17.gh-issue-148241.fO_QT4.rst | 2 + Modules/_json.c | 9 ++++- 4 files changed, 54 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-04-08-14-19-17.gh-issue-148241.fO_QT4.rst diff --git a/Lib/test/test_json/test_dump.py b/Lib/test/test_json/test_dump.py index 9880698455ca5e..ce87c3dcaaf474 100644 --- a/Lib/test/test_json/test_dump.py +++ b/Lib/test/test_json/test_dump.py @@ -77,6 +77,44 @@ def __lt__(self, o): d[1337] = "true.dat" self.assertEqual(self.dumps(d, sort_keys=True), '{"1337": "true.dat"}') + def test_dumps_str_subclass(self): + # Don't call obj.__str__() on str subclasses + + # str subclass which returns a different string on str(obj) + class StrSubclass(str): + def __str__(self): + return "StrSubclass" + + obj = StrSubclass('ascii') + self.assertEqual(self.dumps(obj), + '"ascii"') + self.assertEqual(self.dumps([obj]), + '["ascii"]') + self.assertEqual(self.dumps({'key': obj}), + '{"key": "ascii"}') + + obj = StrSubclass('escape\n') + self.assertEqual(self.dumps(obj), + '"escape\\n"') + self.assertEqual(self.dumps([obj]), + '["escape\\n"]') + self.assertEqual(self.dumps({'key': obj}), + '{"key": "escape\\n"}') + + obj = StrSubclass('nonascii:é') + self.assertEqual(self.dumps(obj, ensure_ascii=False), + '"nonascii:é"') + self.assertEqual(self.dumps([obj], ensure_ascii=False), + '["nonascii:é"]') + self.assertEqual(self.dumps({'key': obj}, ensure_ascii=False), + '{"key": "nonascii:é"}') + self.assertEqual(self.dumps(obj), + '"nonascii:\\u00e9"') + self.assertEqual(self.dumps([obj]), + '["nonascii:\\u00e9"]') + self.assertEqual(self.dumps({'key': obj}), + '{"key": "nonascii:\\u00e9"}') + class TestPyDump(TestDump, PyTest): pass diff --git a/Lib/test/test_json/test_encode_basestring_ascii.py b/Lib/test/test_json/test_encode_basestring_ascii.py index c90d3e968e5ef9..1b5dfcfde01d11 100644 --- a/Lib/test/test_json/test_encode_basestring_ascii.py +++ b/Lib/test/test_json/test_encode_basestring_ascii.py @@ -3,6 +3,11 @@ from test.support import bigaddrspacetest +# str subclass which returns a different string on str(obj) +class StrSubclass(str): + def __str__(self): + return "StrSubclass" + CASES = [ ('/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'), ('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), @@ -14,6 +19,8 @@ ('\U0001d120', '"\\ud834\\udd20"'), ('\u03b1\u03a9', '"\\u03b1\\u03a9"'), ("`1~!@#$%^&*()_+-={':[,]}|;.?", '"`1~!@#$%^&*()_+-={\':[,]}|;.?"'), + # Don't call obj.__str__() on str subclasses + (StrSubclass('ascii'), '"ascii"'), ] class TestEncodeBasestringAscii: diff --git a/Misc/NEWS.d/next/Library/2026-04-08-14-19-17.gh-issue-148241.fO_QT4.rst b/Misc/NEWS.d/next/Library/2026-04-08-14-19-17.gh-issue-148241.fO_QT4.rst new file mode 100644 index 00000000000000..bf8d0e4382e6f6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-08-14-19-17.gh-issue-148241.fO_QT4.rst @@ -0,0 +1,2 @@ +:mod:`json`: Fix serialization: no longer call ``str(obj)`` on :class:`str` +subclasses. Patch by Victor Stinner. diff --git a/Modules/_json.c b/Modules/_json.c index 36614138501e79..a20466de8c50e4 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -258,7 +258,10 @@ write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr) if (PyUnicodeWriter_WriteChar(writer, '"') < 0) { return -1; } - if (PyUnicodeWriter_WriteStr(writer, pystr) < 0) { + // gh-148241: Avoid PyUnicodeWriter_WriteStr() which calls str(obj) + // on str subclasses + assert(PyUnicode_IS_ASCII(pystr)); + if (PyUnicodeWriter_WriteASCII(writer, input, input_chars) < 0) { return -1; } return PyUnicodeWriter_WriteChar(writer, '"'); @@ -399,7 +402,9 @@ write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr) if (PyUnicodeWriter_WriteChar(writer, '"') < 0) { return -1; } - if (PyUnicodeWriter_WriteStr(writer, pystr) < 0) { + // gh-148241: Avoid PyUnicodeWriter_WriteStr() which calls str(obj) + // on str subclasses + if (_PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, pystr) < 0) { return -1; } return PyUnicodeWriter_WriteChar(writer, '"'); From c251659ee05390615289e7dcb77374508b117a0a Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 8 Apr 2026 17:32:03 +0200 Subject: [PATCH 2/3] Add a test to test_json.test_enum --- Lib/test/test_json/test_enum.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Lib/test/test_json/test_enum.py b/Lib/test/test_json/test_enum.py index 196229897bd6e3..518c3e11200659 100644 --- a/Lib/test/test_json/test_enum.py +++ b/Lib/test/test_json/test_enum.py @@ -31,6 +31,9 @@ class WeirdNum(float, Enum): neg_inf = NEG_INF nan = NAN +class StringEnum(str, Enum): + COLOR = "color" + class TestEnum: def test_floats(self): @@ -116,5 +119,11 @@ def test_dict_values(self): self.assertEqual(nd['j'], NEG_INF) self.assertTrue(isnan(nd['n'])) + def test_str_enum(self): + obj = StringEnum.COLOR + self.assertEqual(self.dumps(obj), '"color"') + self.assertEqual(self.dumps([obj]), '["color"]') + self.assertEqual(self.dumps({'key': obj}), '{"key": "color"}') + class TestPyEnum(TestEnum, PyTest): pass class TestCEnum(TestEnum, CTest): pass From fdab8891a6c7c53c7d36f04bae459c002850fb34 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 8 Apr 2026 17:34:02 +0200 Subject: [PATCH 3/3] Reformat tests --- Lib/test/test_json/test_dump.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/Lib/test/test_json/test_dump.py b/Lib/test/test_json/test_dump.py index ce87c3dcaaf474..850e5ceeba0c89 100644 --- a/Lib/test/test_json/test_dump.py +++ b/Lib/test/test_json/test_dump.py @@ -86,20 +86,14 @@ def __str__(self): return "StrSubclass" obj = StrSubclass('ascii') - self.assertEqual(self.dumps(obj), - '"ascii"') - self.assertEqual(self.dumps([obj]), - '["ascii"]') - self.assertEqual(self.dumps({'key': obj}), - '{"key": "ascii"}') + self.assertEqual(self.dumps(obj), '"ascii"') + self.assertEqual(self.dumps([obj]), '["ascii"]') + self.assertEqual(self.dumps({'key': obj}), '{"key": "ascii"}') obj = StrSubclass('escape\n') - self.assertEqual(self.dumps(obj), - '"escape\\n"') - self.assertEqual(self.dumps([obj]), - '["escape\\n"]') - self.assertEqual(self.dumps({'key': obj}), - '{"key": "escape\\n"}') + self.assertEqual(self.dumps(obj), '"escape\\n"') + self.assertEqual(self.dumps([obj]), '["escape\\n"]') + self.assertEqual(self.dumps({'key': obj}), '{"key": "escape\\n"}') obj = StrSubclass('nonascii:é') self.assertEqual(self.dumps(obj, ensure_ascii=False), @@ -108,10 +102,8 @@ def __str__(self): '["nonascii:é"]') self.assertEqual(self.dumps({'key': obj}, ensure_ascii=False), '{"key": "nonascii:é"}') - self.assertEqual(self.dumps(obj), - '"nonascii:\\u00e9"') - self.assertEqual(self.dumps([obj]), - '["nonascii:\\u00e9"]') + self.assertEqual(self.dumps(obj), '"nonascii:\\u00e9"') + self.assertEqual(self.dumps([obj]), '["nonascii:\\u00e9"]') self.assertEqual(self.dumps({'key': obj}), '{"key": "nonascii:\\u00e9"}')