From 1631b56536671ef75251a3ced71d8063b495e294 Mon Sep 17 00:00:00 2001 From: Quentin Santos Date: Tue, 19 Dec 2023 08:27:33 +0100 Subject: [PATCH 1/2] gh-113274: fix EUC-JP decoding of FULLWIDTH TILDE --- ...-12-19-08-30-49.gh-issue-113274.0QCYYu.rst | 2 + Modules/cjkcodecs/mappings_jp.h | 104 +++++++++--------- Tools/unicode/genmap_japanese.py | 6 + 3 files changed, 60 insertions(+), 52 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-12-19-08-30-49.gh-issue-113274.0QCYYu.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-12-19-08-30-49.gh-issue-113274.0QCYYu.rst b/Misc/NEWS.d/next/Core and Builtins/2023-12-19-08-30-49.gh-issue-113274.0QCYYu.rst new file mode 100644 index 00000000000000..2a164909dfaacd --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-12-19-08-30-49.gh-issue-113274.0QCYYu.rst @@ -0,0 +1,2 @@ +In EUC-JP, b'\x8f\xa2\xb7' now decodes to ~ (FULLWIDTH TILDE) instead of ~ +(TILDE). diff --git a/Modules/cjkcodecs/mappings_jp.h b/Modules/cjkcodecs/mappings_jp.h index 409aeae25c964a..1f4cd38a57ae43 100644 --- a/Modules/cjkcodecs/mappings_jp.h +++ b/Modules/cjkcodecs/mappings_jp.h @@ -591,10 +591,10 @@ __jisx0208_decmap+6950,33,38},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, }; static const ucs2_t __jisx0212_decmap[6179] = { -728,711,184,729,733,175,731,730,126,900,901,U,U,U,U,U,U,U,U,161,166,191,U,U,U, -U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,186,170, -169,174,8482,164,8470,902,904,905,906,938,U,908,U,910,939,U,911,U,U,U,U,940, -941,942,943,970,912,972,962,973,971,944,974,1026,1027,1028,1029,1030,1031, +728,711,184,729,733,175,731,730,65374,900,901,U,U,U,U,U,U,U,U,161,166,191,U,U, +U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,186, +170,169,174,8482,164,8470,902,904,905,906,938,U,908,U,910,939,U,911,U,U,U,U, +940,941,942,943,970,912,972,962,973,971,944,974,1026,1027,1028,1029,1030,1031, 1032,1033,1034,1035,1036,1038,1039,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U, U,U,U,U,U,U,U,U,U,U,U,U,U,U,1106,1107,1108,1109,1110,1111,1112,1113,1114,1115, 1116,1118,1119,198,272,U,294,U,306,U,321,319,U,330,216,338,U,358,222,U,U,U,U, @@ -1114,51 +1114,51 @@ __jisx0212_decmap+6018,33,126},{__jisx0212_decmap+6112,33,99},{0,0,0},{0,0,0}, }; static const DBCHAR __jisxcommon_encmap[22016] = { -8512,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,41527, -N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,41538, -8561,8562,41584,N,41539,8568,8495,41581,41580,N,8780,N,41582,41524,8555,8542, -N,N,8493,N,8825,N,41521,N,41579,N,N,N,N,41540,43554,43553,43556,43562,43555, -43561,43297,43566,43570,43569,43572,43571,43584,43583,43586,43585,N,43600, -43602,43601,43604,43608,43603,8543,43308,43619,43618,43621,43620,43634,43312, -43342,43810,43809,43812,43818,43811,43817,43329,43822,43826,43825,43828,43827, -43840,43839,43842,43841,43331,43856,43858,43857,43860,43864,43859,8544,43340, -43875,43874,43877,43876,43890,43344,43891,43559,43815,43557,43813,43560,43816, -43563,43819,43564,43820,43567,43823,43565,43821,43568,43824,43298,43330,43575, -43831,N,N,43574,43830,43576,43832,43573,43829,43578,43834,43579,43835,43581, -43837,43580,N,43582,43838,43300,43332,43591,43847,43589,43845,N,N,43590,43846, -43588,43333,43302,43334,43592,43848,43593,43849,43335,43594,43850,43596,43852, -43595,43851,43305,43337,43304,43336,43597,43853,43599,43855,43598,43854,43338, -43307,43339,43607,43863,N,N,43606,43862,43309,43341,43609,43865,43611,43867, -43610,43866,43612,43868,43613,43869,43615,43871,43614,43870,43617,43873,43616, -43872,43311,43343,43628,43884,43625,43881,43622,43878,43627,43883,43624,43880, -43626,43882,43633,43889,43636,43892,43635,43637,43893,43639,43895,43638,43894, -N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N, -N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N, -43558,43814,43587,43843,43605,43861,43623,43879,43632,43888,43629,43885,43631, -43887,43630,43886,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,43833,41520, -N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,41519,41522,41526,41525,N,41523,41528,41529, -42593,N,42594,42595,42596,N,42599,N,42601,42604,42614,9761,9762,9763,9764, -9765,9766,9767,9768,9769,9770,9771,9772,9773,9774,9775,9776,9777,N,9778,9779, -9780,9781,9782,9783,9784,42597,42602,42609,42610,42611,42612,42619,9793,9794, -9795,9796,9797,9798,9799,9800,9801,9802,9803,9804,9805,9806,9807,9808,9809, -42616,9810,9811,9812,9813,9814,9815,9816,42613,42618,42615,42617,42620,10023, -42818,42819,42820,42821,42822,42823,42824,42825,42826,42827,42828,N,42829, -42830,10017,10018,10019,10020,10021,10022,10024,10025,10026,10027,10028,10029, -10030,10031,10032,10033,10034,10035,10036,10037,10038,10039,10040,10041,10042, -10043,10044,10045,10046,10047,10048,10049,10065,10066,10067,10068,10069,10070, -10072,10073,10074,10075,10076,10077,10078,10079,10080,10081,10082,10083,10084, -10085,10086,10087,10088,10089,10090,10091,10092,10093,10094,10095,10096,10097, -N,10071,42866,42867,42868,42869,42870,42871,42872,42873,42874,42875,42876,N, -42877,42878,8510,N,N,N,N,8509,8514,N,8518,8519,N,N,8520,8521,N,N,8823,8824,N, -N,N,8517,8516,N,N,N,N,N,N,N,N,N,8819,N,8556,8557,N,N,N,N,N,N,N,8744,8558,N,N, -N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,41585,N,N,N,N,N,N,N,N,N,N,N,41583,N,N,N,N,N,N, -N,N,8818,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N, -N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N, -N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,8747,8748,8746,8749,N,N, -N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N, -N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,8781,N,8782,8783,N,8799,8784,N,N,N, -8800,8762,N,N,8763,N,N,N,N,N,N,8541,N,N,N,N,N,N,N,8805,N,N,8807,8551,N,8796,N, -N,N,N,N,N,8778,8779,8769,8768,8809,8810,N,N,N,N,N,N,N,8552,8808,N,N,N,N,N,N,N, +8512,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N, +N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,41538,8561, +8562,41584,N,41539,8568,8495,41581,41580,N,8780,N,41582,41524,8555,8542,N,N, +8493,N,8825,N,41521,N,41579,N,N,N,N,41540,43554,43553,43556,43562,43555,43561, +43297,43566,43570,43569,43572,43571,43584,43583,43586,43585,N,43600,43602, +43601,43604,43608,43603,8543,43308,43619,43618,43621,43620,43634,43312,43342, +43810,43809,43812,43818,43811,43817,43329,43822,43826,43825,43828,43827,43840, +43839,43842,43841,43331,43856,43858,43857,43860,43864,43859,8544,43340,43875, +43874,43877,43876,43890,43344,43891,43559,43815,43557,43813,43560,43816,43563, +43819,43564,43820,43567,43823,43565,43821,43568,43824,43298,43330,43575,43831, +N,N,43574,43830,43576,43832,43573,43829,43578,43834,43579,43835,43581,43837, +43580,N,43582,43838,43300,43332,43591,43847,43589,43845,N,N,43590,43846,43588, +43333,43302,43334,43592,43848,43593,43849,43335,43594,43850,43596,43852,43595, +43851,43305,43337,43304,43336,43597,43853,43599,43855,43598,43854,43338,43307, +43339,43607,43863,N,N,43606,43862,43309,43341,43609,43865,43611,43867,43610, +43866,43612,43868,43613,43869,43615,43871,43614,43870,43617,43873,43616,43872, +43311,43343,43628,43884,43625,43881,43622,43878,43627,43883,43624,43880,43626, +43882,43633,43889,43636,43892,43635,43637,43893,43639,43895,43638,43894,N,N,N, +N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N, +N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,43558, +43814,43587,43843,43605,43861,43623,43879,43632,43888,43629,43885,43631,43887, +43630,43886,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,43833,41520,N,N,N, +N,N,N,N,N,N,N,N,N,N,N,N,N,41519,41522,41526,41525,N,41523,41528,41529,42593,N, +42594,42595,42596,N,42599,N,42601,42604,42614,9761,9762,9763,9764,9765,9766, +9767,9768,9769,9770,9771,9772,9773,9774,9775,9776,9777,N,9778,9779,9780,9781, +9782,9783,9784,42597,42602,42609,42610,42611,42612,42619,9793,9794,9795,9796, +9797,9798,9799,9800,9801,9802,9803,9804,9805,9806,9807,9808,9809,42616,9810, +9811,9812,9813,9814,9815,9816,42613,42618,42615,42617,42620,10023,42818,42819, +42820,42821,42822,42823,42824,42825,42826,42827,42828,N,42829,42830,10017, +10018,10019,10020,10021,10022,10024,10025,10026,10027,10028,10029,10030,10031, +10032,10033,10034,10035,10036,10037,10038,10039,10040,10041,10042,10043,10044, +10045,10046,10047,10048,10049,10065,10066,10067,10068,10069,10070,10072,10073, +10074,10075,10076,10077,10078,10079,10080,10081,10082,10083,10084,10085,10086, +10087,10088,10089,10090,10091,10092,10093,10094,10095,10096,10097,N,10071, +42866,42867,42868,42869,42870,42871,42872,42873,42874,42875,42876,N,42877, +42878,8510,N,N,N,N,8509,8514,N,8518,8519,N,N,8520,8521,N,N,8823,8824,N,N,N, +8517,8516,N,N,N,N,N,N,N,N,N,8819,N,8556,8557,N,N,N,N,N,N,N,8744,8558,N,N,N,N, +N,N,N,N,N,N,N,N,N,N,N,N,N,N,41585,N,N,N,N,N,N,N,N,N,N,N,41583,N,N,N,N,N,N,N,N, +8818,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N, +N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N, +N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,8747,8748,8746,8749,N,N,N,N, +N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N, +N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,8781,N,8782,8783,N,8799,8784,N,N,N,8800, +8762,N,N,8763,N,N,N,N,N,N,8541,N,N,N,N,N,N,N,8805,N,N,8807,8551,N,8796,N,N,N, +N,N,N,8778,8779,8769,8768,8809,8810,N,N,N,N,N,N,N,8552,8808,N,N,N,N,N,N,N, 8806,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,8802,N,N,N,N,N,N,N,N,N,N,N,N,N, 8546,8801,N,N,N,N,8549,8550,N,N,8803,8804,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N, N,N,N,N,8766,8767,N,N,8764,8765,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N, @@ -2360,11 +2360,11 @@ N,N,29557,29558,60749,60750,29560,N,29559,60751,60752,60753,60754,60755,29562, 9032,9033,9034,9035,9036,9037,9038,9039,9040,9041,9042,9043,9044,9045,9046, 9047,9048,9049,9050,8526,N,8527,8496,8498,8494,9057,9058,9059,9060,9061,9062, 9063,9064,9065,9066,9067,9068,9069,9070,9071,9072,9073,9074,9075,9076,9077, -9078,9079,9080,9081,9082,8528,8515,8529,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N, +9078,9079,9080,9081,9082,8528,8515,8529,41527,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N, N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N, N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N, -N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,8497, -N,8559, +N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N, +8497,N,8559, }; static const struct unim_index jisxcommon_encmap[256] = { diff --git a/Tools/unicode/genmap_japanese.py b/Tools/unicode/genmap_japanese.py index 838317fa54175e..15e8cb4a9d2774 100644 --- a/Tools/unicode/genmap_japanese.py +++ b/Tools/unicode/genmap_japanese.py @@ -86,6 +86,12 @@ def main(): cp932decmap = loadmap(cp932file) jis3decmap, jis4decmap, jis3_2_decmap, jis4_2_decmap, jis3_pairdecmap = loadmap_jisx0213(jisx0213file) + # In JIS0212.TXT provided by Unicode, JIS X 0212 entry 0x2237 is mapped to + # ~ (TILDE). However, EUC-JP already includes US-ASCII, so it is often + # mapped to ~ (FULLWIDTH TILDE). See + # https://encoding.spec.whatwg.org/index-jis0212.txt + jisx0212decmap[34][55] = ord('~') + if jis3decmap[0x21][0x24] != 0xff0c: raise SystemExit('Please adjust your JIS X 0213 map using jisx0213-2000-std.txt.diff') From 87d6103c1d85b0ebe6214a495349e5c051daf136 Mon Sep 17 00:00:00 2001 From: Quentin Santos Date: Fri, 22 Dec 2023 07:57:52 +0100 Subject: [PATCH 2/2] Pass FULLWIDTH TILDE in euc_jisx0213 --- Lib/test/test_codecmaps_jp.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Lib/test/test_codecmaps_jp.py b/Lib/test/test_codecmaps_jp.py index fdfec8085b24e4..2ea18c659da22c 100644 --- a/Lib/test/test_codecmaps_jp.py +++ b/Lib/test/test_codecmaps_jp.py @@ -47,6 +47,9 @@ class TestEUCJISX0213Map(multibytecodec_support.TestBase_Mapping, encoding = 'euc_jisx0213' mapfilename = 'EUC-JISX0213.TXT' mapfileurl = 'http://www.pythontest.net/unicode/EUC-JISX0213.TXT' + pass_enctest = [ + (b'\xa2\xb2', '~'), + ] class TestSJISX0213Map(multibytecodec_support.TestBase_Mapping,