From 1c38078e132a20c66d9d17a7d613b4790a6fabf0 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Mon, 29 May 2023 16:33:19 +0200 Subject: [PATCH 1/3] gh-105042: Disable unmatched parens syntax error in python tokenize --- .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 3 + Lib/tokenize.py | 10 +++- Parser/tokenizer.c | 60 ++++++++++--------- Parser/tokenizer.h | 1 + Python/Python-tokenize.c | 13 +++- Python/clinic/Python-tokenize.c.h | 21 ++++--- 9 files changed, 68 insertions(+), 43 deletions(-) diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index a83f8fc49fc5ef..ea2d3b08ec2591 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -975,6 +975,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(id)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ident)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ignore)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ignore_unmatched_parens)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(imag)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(importlib)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(in_fd)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index dd6a62f53a9989..3d9ae3178c0f7b 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -463,6 +463,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(id) STRUCT_FOR_ID(ident) STRUCT_FOR_ID(ignore) + STRUCT_FOR_ID(ignore_unmatched_parens) STRUCT_FOR_ID(imag) STRUCT_FOR_ID(importlib) STRUCT_FOR_ID(in_fd) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index d689f717eaf94f..662624c206bb16 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -969,6 +969,7 @@ extern "C" { INIT_ID(id), \ INIT_ID(ident), \ INIT_ID(ignore), \ + INIT_ID(ignore_unmatched_parens), \ INIT_ID(imag), \ INIT_ID(importlib), \ INIT_ID(in_fd), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index db6a157ee7afbf..e050985f0faf73 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1230,6 +1230,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(ignore); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(ignore_unmatched_parens); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(imag); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 4895e94d1dfda7..76b0ab09d59f55 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -447,7 +447,9 @@ def tokenize(readline): def _tokenize(rl_gen, encoding): source = b"".join(rl_gen).decode(encoding) - for token in _generate_tokens_from_c_tokenizer(source, extra_tokens=True): + for token in _generate_tokens_from_c_tokenizer(source, + extra_tokens=True, + ignore_unmatched_parens=True): yield token def generate_tokens(readline): @@ -531,10 +533,12 @@ def error(message, filename=None, location=None): perror("unexpected error: %s" % err) raise -def _generate_tokens_from_c_tokenizer(source, extra_tokens=False): +def _generate_tokens_from_c_tokenizer(source, extra_tokens=False, ignore_unmatched_parens=False): """Tokenize a source reading Python code as unicode strings using the internal C tokenizer""" import _tokenize as c_tokenizer - for info in c_tokenizer.TokenizerIter(source, extra_tokens=extra_tokens): + for info in c_tokenizer.TokenizerIter(source, + extra_tokens=extra_tokens, + ignore_unmatched_parens=ignore_unmatched_parens): yield TokenInfo._make(info) diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 59c817293fbfcd..cb2c87de461a57 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -113,6 +113,7 @@ tok_new(void) tok->report_warnings = 1; tok->tok_extra_tokens = 0; tok->comment_newline = 0; + tok->ignore_unmatched_parens = 0; tok->tok_mode_stack[0] = (tokenizer_mode){.kind =TOK_REGULAR_MODE, .f_string_quote='\0', .f_string_quote_size = 0, .f_string_debug=0}; tok->tok_mode_stack_index = 0; tok->tok_report_warnings = 1; @@ -2496,41 +2497,42 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t case ')': case ']': case '}': - if (!tok->level) { + if (!tok->ignore_unmatched_parens && !tok->level) { if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') { return MAKE_TOKEN(syntaxerror(tok, "f-string: single '}' is not allowed")); } return MAKE_TOKEN(syntaxerror(tok, "unmatched '%c'", c)); } - tok->level--; - int opening = tok->parenstack[tok->level]; - if (!((opening == '(' && c == ')') || - (opening == '[' && c == ']') || - (opening == '{' && c == '}'))) - { - /* If the opening bracket belongs to an f-string's expression - part (e.g. f"{)}") and the closing bracket is an arbitrary - nested expression, then instead of matching a different - syntactical construct with it; we'll throw an unmatched - parentheses error. */ - if (INSIDE_FSTRING(tok) && opening == '{') { - assert(current_tok->curly_bracket_depth >= 0); - int previous_bracket = current_tok->curly_bracket_depth - 1; - if (previous_bracket == current_tok->curly_bracket_expr_start_depth) { - return MAKE_TOKEN(syntaxerror(tok, "f-string: unmatched '%c'", c)); + if (tok->level > 0) { + tok->level--; + int opening = tok->parenstack[tok->level]; + if (!tok->ignore_unmatched_parens && !((opening == '(' && c == ')') || + (opening == '[' && c == ']') || + (opening == '{' && c == '}'))) { + /* If the opening bracket belongs to an f-string's expression + part (e.g. f"{)}") and the closing bracket is an arbitrary + nested expression, then instead of matching a different + syntactical construct with it; we'll throw an unmatched + parentheses error. */ + if (INSIDE_FSTRING(tok) && opening == '{') { + assert(current_tok->curly_bracket_depth >= 0); + int previous_bracket = current_tok->curly_bracket_depth - 1; + if (previous_bracket == current_tok->curly_bracket_expr_start_depth) { + return MAKE_TOKEN(syntaxerror(tok, "f-string: unmatched '%c'", c)); + } + } + if (tok->parenlinenostack[tok->level] != tok->lineno) { + return MAKE_TOKEN(syntaxerror(tok, + "closing parenthesis '%c' does not match " + "opening parenthesis '%c' on line %d", + c, opening, tok->parenlinenostack[tok->level])); + } + else { + return MAKE_TOKEN(syntaxerror(tok, + "closing parenthesis '%c' does not match " + "opening parenthesis '%c'", + c, opening)); } - } - if (tok->parenlinenostack[tok->level] != tok->lineno) { - return MAKE_TOKEN(syntaxerror(tok, - "closing parenthesis '%c' does not match " - "opening parenthesis '%c' on line %d", - c, opening, tok->parenlinenostack[tok->level])); - } - else { - return MAKE_TOKEN(syntaxerror(tok, - "closing parenthesis '%c' does not match " - "opening parenthesis '%c'", - c, opening)); } } diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 02749e355da812..04083279269d6c 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -130,6 +130,7 @@ struct tok_state { int tok_report_warnings; int tok_extra_tokens; int comment_newline; + int ignore_unmatched_parens; #ifdef Py_DEBUG int debug; #endif diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index 4eced66b617708..d0030b16d24010 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -40,12 +40,13 @@ _tokenizer.tokenizeriter.__new__ as tokenizeriter_new source: str * extra_tokens: bool + ignore_unmatched_parens: bool [clinic start generated code]*/ static PyObject * tokenizeriter_new_impl(PyTypeObject *type, const char *source, - int extra_tokens) -/*[clinic end generated code: output=f6f9d8b4beec8106 input=90dc5b6a5df180c2]*/ + int extra_tokens, int ignore_unmatched_parens) +/*[clinic end generated code: output=5437e7bbc30de3f4 input=7f6b22d7c235ffd7]*/ { tokenizeriterobject *self = (tokenizeriterobject *)type->tp_alloc(type, 0); if (self == NULL) { @@ -64,6 +65,12 @@ tokenizeriter_new_impl(PyTypeObject *type, const char *source, if (extra_tokens) { self->tok->tok_extra_tokens = 1; } + if (ignore_unmatched_parens) { + self->tok->ignore_unmatched_parens = 1; + } + if (ignore_unmatched_parens) { + self->tok->ignore_unmatched_parens = 1; + } self->done = 0; return (PyObject *)self; } @@ -82,7 +89,7 @@ _tokenizer_error(struct tok_state *tok) msg = "invalid token"; break; case E_EOF: - if (tok->level) { + if (tok->level > 0) { PyErr_Format(PyExc_SyntaxError, "parenthesis '%c' was never closed", tok->parenstack[tok->level-1]); diff --git a/Python/clinic/Python-tokenize.c.h b/Python/clinic/Python-tokenize.c.h index 7e779388a92dbf..dea7976140c725 100644 --- a/Python/clinic/Python-tokenize.c.h +++ b/Python/clinic/Python-tokenize.c.h @@ -10,7 +10,7 @@ preserve static PyObject * tokenizeriter_new_impl(PyTypeObject *type, const char *source, - int extra_tokens); + int extra_tokens, int ignore_unmatched_parens); static PyObject * tokenizeriter_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) @@ -18,14 +18,14 @@ tokenizeriter_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD PyObject *ob_item[NUM_KEYWORDS]; } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) - .ob_item = { &_Py_ID(source), &_Py_ID(extra_tokens), }, + .ob_item = { &_Py_ID(source), &_Py_ID(extra_tokens), &_Py_ID(ignore_unmatched_parens), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -34,20 +34,21 @@ tokenizeriter_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"source", "extra_tokens", NULL}; + static const char * const _keywords[] = {"source", "extra_tokens", "ignore_unmatched_parens", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "tokenizeriter", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); const char *source; int extra_tokens; + int ignore_unmatched_parens; - fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 1, 1, 1, argsbuf); + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 1, 1, 2, argsbuf); if (!fastargs) { goto exit; } @@ -68,9 +69,13 @@ tokenizeriter_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) if (extra_tokens < 0) { goto exit; } - return_value = tokenizeriter_new_impl(type, source, extra_tokens); + ignore_unmatched_parens = PyObject_IsTrue(fastargs[2]); + if (ignore_unmatched_parens < 0) { + goto exit; + } + return_value = tokenizeriter_new_impl(type, source, extra_tokens, ignore_unmatched_parens); exit: return return_value; } -/*[clinic end generated code: output=940b564c67f6e0e2 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=adf7070944a94fba input=a9049054013a1b77]*/ From ed1237aa654cfd12d1fea47eccfb506bf6666998 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Tue, 30 May 2023 11:54:10 +0200 Subject: [PATCH 2/3] Reuse flag and allow f-string check --- .../pycore_global_objects_fini_generated.h | 1 - Include/internal/pycore_global_strings.h | 1 - .../internal/pycore_runtime_init_generated.h | 1 - .../internal/pycore_unicodeobject_generated.h | 3 --- Lib/tokenize.py | 10 +++------ Parser/tokenizer.c | 15 +++++++------ Parser/tokenizer.h | 1 - Python/Python-tokenize.c | 11 ++-------- Python/clinic/Python-tokenize.c.h | 21 +++++++------------ 9 files changed, 20 insertions(+), 44 deletions(-) diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index ea2d3b08ec2591..a83f8fc49fc5ef 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -975,7 +975,6 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(id)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ident)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ignore)); - _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ignore_unmatched_parens)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(imag)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(importlib)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(in_fd)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 3d9ae3178c0f7b..dd6a62f53a9989 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -463,7 +463,6 @@ struct _Py_global_strings { STRUCT_FOR_ID(id) STRUCT_FOR_ID(ident) STRUCT_FOR_ID(ignore) - STRUCT_FOR_ID(ignore_unmatched_parens) STRUCT_FOR_ID(imag) STRUCT_FOR_ID(importlib) STRUCT_FOR_ID(in_fd) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 662624c206bb16..d689f717eaf94f 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -969,7 +969,6 @@ extern "C" { INIT_ID(id), \ INIT_ID(ident), \ INIT_ID(ignore), \ - INIT_ID(ignore_unmatched_parens), \ INIT_ID(imag), \ INIT_ID(importlib), \ INIT_ID(in_fd), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index e050985f0faf73..db6a157ee7afbf 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1230,9 +1230,6 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(ignore); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); - string = &_Py_ID(ignore_unmatched_parens); - assert(_PyUnicode_CheckConsistency(string, 1)); - _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(imag); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 76b0ab09d59f55..4895e94d1dfda7 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -447,9 +447,7 @@ def tokenize(readline): def _tokenize(rl_gen, encoding): source = b"".join(rl_gen).decode(encoding) - for token in _generate_tokens_from_c_tokenizer(source, - extra_tokens=True, - ignore_unmatched_parens=True): + for token in _generate_tokens_from_c_tokenizer(source, extra_tokens=True): yield token def generate_tokens(readline): @@ -533,12 +531,10 @@ def error(message, filename=None, location=None): perror("unexpected error: %s" % err) raise -def _generate_tokens_from_c_tokenizer(source, extra_tokens=False, ignore_unmatched_parens=False): +def _generate_tokens_from_c_tokenizer(source, extra_tokens=False): """Tokenize a source reading Python code as unicode strings using the internal C tokenizer""" import _tokenize as c_tokenizer - for info in c_tokenizer.TokenizerIter(source, - extra_tokens=extra_tokens, - ignore_unmatched_parens=ignore_unmatched_parens): + for info in c_tokenizer.TokenizerIter(source, extra_tokens=extra_tokens): yield TokenInfo._make(info) diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index cb2c87de461a57..7ddf02b09c1e78 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -113,7 +113,6 @@ tok_new(void) tok->report_warnings = 1; tok->tok_extra_tokens = 0; tok->comment_newline = 0; - tok->ignore_unmatched_parens = 0; tok->tok_mode_stack[0] = (tokenizer_mode){.kind =TOK_REGULAR_MODE, .f_string_quote='\0', .f_string_quote_size = 0, .f_string_debug=0}; tok->tok_mode_stack_index = 0; tok->tok_report_warnings = 1; @@ -2497,18 +2496,18 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t case ')': case ']': case '}': - if (!tok->ignore_unmatched_parens && !tok->level) { - if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') { - return MAKE_TOKEN(syntaxerror(tok, "f-string: single '}' is not allowed")); - } + if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') { + return MAKE_TOKEN(syntaxerror(tok, "f-string: single '}' is not allowed")); + } + if (!tok->tok_extra_tokens && !tok->level) { return MAKE_TOKEN(syntaxerror(tok, "unmatched '%c'", c)); } if (tok->level > 0) { tok->level--; int opening = tok->parenstack[tok->level]; - if (!tok->ignore_unmatched_parens && !((opening == '(' && c == ')') || - (opening == '[' && c == ']') || - (opening == '{' && c == '}'))) { + if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') || + (opening == '[' && c == ']') || + (opening == '{' && c == '}'))) { /* If the opening bracket belongs to an f-string's expression part (e.g. f"{)}") and the closing bracket is an arbitrary nested expression, then instead of matching a different diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 04083279269d6c..02749e355da812 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -130,7 +130,6 @@ struct tok_state { int tok_report_warnings; int tok_extra_tokens; int comment_newline; - int ignore_unmatched_parens; #ifdef Py_DEBUG int debug; #endif diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index d0030b16d24010..2de1daae8c0ddc 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -40,13 +40,12 @@ _tokenizer.tokenizeriter.__new__ as tokenizeriter_new source: str * extra_tokens: bool - ignore_unmatched_parens: bool [clinic start generated code]*/ static PyObject * tokenizeriter_new_impl(PyTypeObject *type, const char *source, - int extra_tokens, int ignore_unmatched_parens) -/*[clinic end generated code: output=5437e7bbc30de3f4 input=7f6b22d7c235ffd7]*/ + int extra_tokens) +/*[clinic end generated code: output=f6f9d8b4beec8106 input=90dc5b6a5df180c2]*/ { tokenizeriterobject *self = (tokenizeriterobject *)type->tp_alloc(type, 0); if (self == NULL) { @@ -65,12 +64,6 @@ tokenizeriter_new_impl(PyTypeObject *type, const char *source, if (extra_tokens) { self->tok->tok_extra_tokens = 1; } - if (ignore_unmatched_parens) { - self->tok->ignore_unmatched_parens = 1; - } - if (ignore_unmatched_parens) { - self->tok->ignore_unmatched_parens = 1; - } self->done = 0; return (PyObject *)self; } diff --git a/Python/clinic/Python-tokenize.c.h b/Python/clinic/Python-tokenize.c.h index dea7976140c725..7e779388a92dbf 100644 --- a/Python/clinic/Python-tokenize.c.h +++ b/Python/clinic/Python-tokenize.c.h @@ -10,7 +10,7 @@ preserve static PyObject * tokenizeriter_new_impl(PyTypeObject *type, const char *source, - int extra_tokens, int ignore_unmatched_parens); + int extra_tokens); static PyObject * tokenizeriter_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) @@ -18,14 +18,14 @@ tokenizeriter_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 3 + #define NUM_KEYWORDS 2 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD PyObject *ob_item[NUM_KEYWORDS]; } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) - .ob_item = { &_Py_ID(source), &_Py_ID(extra_tokens), &_Py_ID(ignore_unmatched_parens), }, + .ob_item = { &_Py_ID(source), &_Py_ID(extra_tokens), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -34,21 +34,20 @@ tokenizeriter_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"source", "extra_tokens", "ignore_unmatched_parens", NULL}; + static const char * const _keywords[] = {"source", "extra_tokens", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "tokenizeriter", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[3]; + PyObject *argsbuf[2]; PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); const char *source; int extra_tokens; - int ignore_unmatched_parens; - fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 1, 1, 2, argsbuf); + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 1, 1, 1, argsbuf); if (!fastargs) { goto exit; } @@ -69,13 +68,9 @@ tokenizeriter_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) if (extra_tokens < 0) { goto exit; } - ignore_unmatched_parens = PyObject_IsTrue(fastargs[2]); - if (ignore_unmatched_parens < 0) { - goto exit; - } - return_value = tokenizeriter_new_impl(type, source, extra_tokens, ignore_unmatched_parens); + return_value = tokenizeriter_new_impl(type, source, extra_tokens); exit: return return_value; } -/*[clinic end generated code: output=adf7070944a94fba input=a9049054013a1b77]*/ +/*[clinic end generated code: output=940b564c67f6e0e2 input=a9049054013a1b77]*/ From c7481a589875bc1dc7d2be29043379f8c779b6ed Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Tue, 30 May 2023 19:07:34 +0200 Subject: [PATCH 3/3] Add tests --- Lib/test/inspect_fodder.py | 5 +++++ Lib/test/test_inspect.py | 4 +++- Lib/test/test_tokenize.py | 7 +++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/Lib/test/inspect_fodder.py b/Lib/test/inspect_fodder.py index 567dfbab804867..60ba7aa78394e8 100644 --- a/Lib/test/inspect_fodder.py +++ b/Lib/test/inspect_fodder.py @@ -113,3 +113,8 @@ async def asyncf(self): # after asyncf - line 113 # end of WhichComments - line 114 # after WhichComments - line 115 + +# Test that getsource works on a line that includes +# a closing parenthesis with the opening paren being in another line +( +); after_closing = lambda: 1 diff --git a/Lib/test/test_inspect.py b/Lib/test/test_inspect.py index a7bd680d0f5bcc..6a49e3b5530e16 100644 --- a/Lib/test/test_inspect.py +++ b/Lib/test/test_inspect.py @@ -557,7 +557,8 @@ def test_getclasses(self): def test_getfunctions(self): functions = inspect.getmembers(mod, inspect.isfunction) - self.assertEqual(functions, [('eggs', mod.eggs), + self.assertEqual(functions, [('after_closing', mod.after_closing), + ('eggs', mod.eggs), ('lobbest', mod.lobbest), ('spam', mod.spam)]) @@ -641,6 +642,7 @@ def test_getsource(self): self.assertSourceEqual(git.abuse, 29, 39) self.assertSourceEqual(mod.StupidGit, 21, 51) self.assertSourceEqual(mod.lobbest, 75, 76) + self.assertSourceEqual(mod.after_closing, 120, 120) def test_getsourcefile(self): self.assertEqual(normcase(inspect.getsourcefile(mod.spam)), modfile) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index cd11dddd0fe51a..3adcc4e420671c 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1100,6 +1100,13 @@ def test_newline_after_parenthesized_block_with_comment(self): NEWLINE '\\n' (4, 1) (4, 2) """) + def test_closing_parenthesis_from_different_line(self): + self.check_tokenize("); x", """\ + OP ')' (1, 0) (1, 1) + OP ';' (1, 1) (1, 2) + NAME 'x' (1, 3) (1, 4) + """) + class GenerateTokensTest(TokenizeTest): def check_tokenize(self, s, expected): # Format the tokens in s in a table format.