Skip to content

Commit 07f2cee

Browse files
authored
bpo-42846: Convert CJK codec extensions to multiphase init (GH-24157)
Convert the 6 CJK codec extension modules (_codecs_cn, _codecs_hk, _codecs_iso2022, _codecs_jp, _codecs_kr and _codecs_tw) to the multiphase initialization API (PEP 489). Remove getmultibytecodec() local cache: always import _multibytecodec. It should be uncommon to get a codec. For example, this function is only called once per CJK codec module. Fix a reference leak in register_maps() error path.
1 parent 07dcd86 commit 07f2cee

File tree

3 files changed

+66
-31
lines changed

3 files changed

+66
-31
lines changed

Lib/test/test_multibytecodec.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,15 @@
33
# Unit test for multibytecodec itself
44
#
55

6+
import _multibytecodec
7+
import codecs
8+
import io
9+
import sys
10+
import textwrap
11+
import unittest
612
from test import support
713
from test.support import os_helper
814
from test.support.os_helper import TESTFN
9-
import unittest, io, codecs, sys
10-
import _multibytecodec
1115

1216
ALL_CJKENCODINGS = [
1317
# _codecs_cn
@@ -205,6 +209,24 @@ def test_issue5640(self):
205209
self.assertEqual(encoder.encode('\xff'), b'\\xff')
206210
self.assertEqual(encoder.encode('\n'), b'\n')
207211

212+
@support.cpython_only
213+
def test_subinterp(self):
214+
# bpo-42846: Test a CJK codec in a subinterpreter
215+
import _testcapi
216+
encoding = 'cp932'
217+
text = "Python の開発は、1990 年ごろから開始されています。"
218+
code = textwrap.dedent("""
219+
import codecs
220+
encoding = %r
221+
text = %r
222+
encoder = codecs.getincrementalencoder(encoding)()
223+
text2 = encoder.encode(text).decode(encoding)
224+
if text2 != text:
225+
raise ValueError(f"encoding issue: {text2!a} != {text!a}")
226+
""") % (encoding, text)
227+
res = _testcapi.run_in_subinterp(code)
228+
self.assertEqual(res, 0)
229+
208230
class Test_IncrementalDecoder(unittest.TestCase):
209231

210232
def test_dbcs(self):
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Convert the 6 CJK codec extension modules (_codecs_cn, _codecs_hk,
2+
_codecs_iso2022, _codecs_jp, _codecs_kr and _codecs_tw) to the multiphase
3+
initialization API (:pep:`489`). Patch by Victor Stinner.

Modules/cjkcodecs/cjkcodecs.h

Lines changed: 39 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -245,15 +245,13 @@ static const struct dbcs_map *mapping_list;
245245
static PyObject *
246246
getmultibytecodec(void)
247247
{
248-
static PyObject *cofunc = NULL;
249-
250-
if (cofunc == NULL) {
251-
PyObject *mod = PyImport_ImportModuleNoBlock("_multibytecodec");
252-
if (mod == NULL)
253-
return NULL;
254-
cofunc = PyObject_GetAttrString(mod, "__create_codec");
255-
Py_DECREF(mod);
248+
PyObject *mod = PyImport_ImportModuleNoBlock("_multibytecodec");
249+
if (mod == NULL) {
250+
return NULL;
256251
}
252+
253+
PyObject *cofunc = PyObject_GetAttrString(mod, "__create_codec");
254+
Py_DECREF(mod);
257255
return cofunc;
258256
}
259257

@@ -297,10 +295,6 @@ getcodec(PyObject *self, PyObject *encoding)
297295
return r;
298296
}
299297

300-
static struct PyMethodDef __methods[] = {
301-
{"getcodec", (PyCFunction)getcodec, METH_O, ""},
302-
{NULL, NULL},
303-
};
304298

305299
static int
306300
register_maps(PyObject *module)
@@ -309,12 +303,17 @@ register_maps(PyObject *module)
309303

310304
for (h = mapping_list; h->charset[0] != '\0'; h++) {
311305
char mhname[256] = "__map_";
312-
int r;
313306
strcpy(mhname + sizeof("__map_") - 1, h->charset);
314-
r = PyModule_AddObject(module, mhname,
315-
PyCapsule_New((void *)h, PyMultibyteCodec_CAPSULE_NAME, NULL));
316-
if (r == -1)
307+
308+
PyObject *capsule = PyCapsule_New((void *)h,
309+
PyMultibyteCodec_CAPSULE_NAME, NULL);
310+
if (capsule == NULL) {
311+
return -1;
312+
}
313+
if (PyModule_AddObject(module, mhname, capsule) < 0) {
314+
Py_DECREF(capsule);
317315
return -1;
316+
}
318317
}
319318
return 0;
320319
}
@@ -395,25 +394,36 @@ importmap(const char *modname, const char *symbol,
395394
}
396395
#endif
397396

397+
static int
398+
_cjk_exec(PyObject *module)
399+
{
400+
return register_maps(module);
401+
}
402+
403+
404+
static struct PyMethodDef _cjk_methods[] = {
405+
{"getcodec", (PyCFunction)getcodec, METH_O, ""},
406+
{NULL, NULL},
407+
};
408+
409+
static PyModuleDef_Slot _cjk_slots[] = {
410+
{Py_mod_exec, _cjk_exec},
411+
{0, NULL}
412+
};
413+
398414
#define I_AM_A_MODULE_FOR(loc) \
399-
static struct PyModuleDef __module = { \
415+
static struct PyModuleDef _cjk_module = { \
400416
PyModuleDef_HEAD_INIT, \
401-
"_codecs_"#loc, \
402-
NULL, \
403-
0, \
404-
__methods, \
405-
NULL, \
406-
NULL, \
407-
NULL, \
408-
NULL \
417+
.m_name = "_codecs_"#loc, \
418+
.m_size = 0, \
419+
.m_methods = _cjk_methods, \
420+
.m_slots = _cjk_slots, \
409421
}; \
422+
\
410423
PyMODINIT_FUNC \
411424
PyInit__codecs_##loc(void) \
412425
{ \
413-
PyObject *m = PyModule_Create(&__module); \
414-
if (m != NULL) \
415-
(void)register_maps(m); \
416-
return m; \
426+
return PyModuleDef_Init(&_cjk_module); \
417427
}
418428

419429
#endif

0 commit comments

Comments
 (0)