Skip to content

bpo-45434: Mark the PyTokenizer C API as private #28924

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions Parser/pegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -729,7 +729,7 @@ _PyPegen_fill_token(Parser *p)
{
const char *start;
const char *end;
int type = PyTokenizer_Get(p->tok, &start, &end);
int type = _PyTokenizer_Get(p->tok, &start, &end);

// Record and skip '# type: ignore' comments
while (type == TYPE_IGNORE) {
Expand All @@ -746,7 +746,7 @@ _PyPegen_fill_token(Parser *p)
PyErr_NoMemory();
return -1;
}
type = PyTokenizer_Get(p->tok, &start, &end);
type = _PyTokenizer_Get(p->tok, &start, &end);
}

// If we have reached the end and we are in single input mode we need to insert a newline and reset the parsing
Expand Down Expand Up @@ -1306,7 +1306,7 @@ _PyPegen_check_tokenizer_errors(Parser *p) {
for (;;) {
const char *start;
const char *end;
switch (PyTokenizer_Get(p->tok, &start, &end)) {
switch (_PyTokenizer_Get(p->tok, &start, &end)) {
case ERRORTOKEN:
if (p->tok->level != 0) {
int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
Expand Down Expand Up @@ -1411,7 +1411,7 @@ _PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filena
const char *enc, const char *ps1, const char *ps2,
PyCompilerFlags *flags, int *errcode, PyArena *arena)
{
struct tok_state *tok = PyTokenizer_FromFile(fp, enc, ps1, ps2);
struct tok_state *tok = _PyTokenizer_FromFile(fp, enc, ps1, ps2);
if (tok == NULL) {
if (PyErr_Occurred()) {
raise_tokenizer_init_error(filename_ob);
Expand Down Expand Up @@ -1441,7 +1441,7 @@ _PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filena
_PyPegen_Parser_Free(p);

error:
PyTokenizer_Free(tok);
_PyTokenizer_Free(tok);
return result;
}

Expand All @@ -1453,9 +1453,9 @@ _PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filen

struct tok_state *tok;
if (flags == NULL || flags->cf_flags & PyCF_IGNORE_COOKIE) {
tok = PyTokenizer_FromUTF8(str, exec_input);
tok = _PyTokenizer_FromUTF8(str, exec_input);
} else {
tok = PyTokenizer_FromString(str, exec_input);
tok = _PyTokenizer_FromString(str, exec_input);
}
if (tok == NULL) {
if (PyErr_Occurred()) {
Expand Down Expand Up @@ -1483,7 +1483,7 @@ _PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filen
_PyPegen_Parser_Free(p);

error:
PyTokenizer_Free(tok);
_PyTokenizer_Free(tok);
return result;
}

Expand Down
4 changes: 2 additions & 2 deletions Parser/string_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
str[0] = '(';
str[len+1] = ')';

struct tok_state* tok = PyTokenizer_FromString(str, 1);
struct tok_state* tok = _PyTokenizer_FromString(str, 1);
if (tok == NULL) {
PyMem_Free(str);
return NULL;
Expand All @@ -409,7 +409,7 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
exit:
PyMem_Free(str);
_PyPegen_Parser_Free(p2);
PyTokenizer_Free(tok);
_PyTokenizer_Free(tok);
return result;
}

Expand Down
47 changes: 20 additions & 27 deletions Parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ static char *
error_ret(struct tok_state *tok) /* XXX */
{
tok->decoding_erred = 1;
if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
if (tok->fp != NULL && tok->buf != NULL) /* see _PyTokenizer_Free */
PyMem_Free(tok->buf);
tok->buf = tok->cur = tok->inp = NULL;
tok->start = NULL;
Expand Down Expand Up @@ -702,7 +702,7 @@ decode_str(const char *input, int single, struct tok_state *tok)
/* Set up tokenizer for string */

struct tok_state *
PyTokenizer_FromString(const char *str, int exec_input)
_PyTokenizer_FromString(const char *str, int exec_input)
{
struct tok_state *tok = tok_new();
char *decoded;
Expand All @@ -711,7 +711,7 @@ PyTokenizer_FromString(const char *str, int exec_input)
return NULL;
decoded = decode_str(str, exec_input, tok);
if (decoded == NULL) {
PyTokenizer_Free(tok);
_PyTokenizer_Free(tok);
return NULL;
}

Expand All @@ -723,23 +723,23 @@ PyTokenizer_FromString(const char *str, int exec_input)
/* Set up tokenizer for UTF-8 string */

struct tok_state *
PyTokenizer_FromUTF8(const char *str, int exec_input)
_PyTokenizer_FromUTF8(const char *str, int exec_input)
{
struct tok_state *tok = tok_new();
char *translated;
if (tok == NULL)
return NULL;
tok->input = translated = translate_newlines(str, exec_input, tok);
if (translated == NULL) {
PyTokenizer_Free(tok);
_PyTokenizer_Free(tok);
return NULL;
}
tok->decoding_state = STATE_NORMAL;
tok->enc = NULL;
tok->str = translated;
tok->encoding = new_string("utf-8", 5, tok);
if (!tok->encoding) {
PyTokenizer_Free(tok);
_PyTokenizer_Free(tok);
return NULL;
}

Expand All @@ -751,14 +751,14 @@ PyTokenizer_FromUTF8(const char *str, int exec_input)
/* Set up tokenizer for file */

struct tok_state *
PyTokenizer_FromFile(FILE *fp, const char* enc,
const char *ps1, const char *ps2)
_PyTokenizer_FromFile(FILE *fp, const char* enc,
const char *ps1, const char *ps2)
{
struct tok_state *tok = tok_new();
if (tok == NULL)
return NULL;
if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) {
PyTokenizer_Free(tok);
_PyTokenizer_Free(tok);
return NULL;
}
tok->cur = tok->inp = tok->buf;
Expand All @@ -771,7 +771,7 @@ PyTokenizer_FromFile(FILE *fp, const char* enc,
gets copied into the parse tree. */
tok->encoding = new_string(enc, strlen(enc), tok);
if (!tok->encoding) {
PyTokenizer_Free(tok);
_PyTokenizer_Free(tok);
return NULL;
}
tok->decoding_state = STATE_NORMAL;
Expand All @@ -782,7 +782,7 @@ PyTokenizer_FromFile(FILE *fp, const char* enc,
/* Free a tok_state structure */

void
PyTokenizer_Free(struct tok_state *tok)
_PyTokenizer_Free(struct tok_state *tok)
{
if (tok->encoding != NULL) {
PyMem_Free(tok->encoding);
Expand Down Expand Up @@ -2049,7 +2049,8 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
}

int
PyTokenizer_Get(struct tok_state *tok, const char **p_start, const char **p_end)
_PyTokenizer_Get(struct tok_state *tok,
const char **p_start, const char **p_end)
{
int result = tok_get(tok, p_start, p_end);
if (tok->decoding_erred) {
Expand All @@ -2062,15 +2063,15 @@ PyTokenizer_Get(struct tok_state *tok, const char **p_start, const char **p_end)
/* Get the encoding of a Python file. Check for the coding cookie and check if
the file starts with a BOM.

PyTokenizer_FindEncodingFilename() returns NULL when it can't find the
_PyTokenizer_FindEncodingFilename() returns NULL when it can't find the
encoding in the first or second line of the file (in which case the encoding
should be assumed to be UTF-8).

The char* returned is malloc'ed via PyMem_Malloc() and thus must be freed
by the caller. */

char *
PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
_PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
{
struct tok_state *tok;
FILE *fp;
Expand All @@ -2087,7 +2088,7 @@ PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
if (fp == NULL) {
return NULL;
}
tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL);
tok = _PyTokenizer_FromFile(fp, NULL, NULL, NULL);
if (tok == NULL) {
fclose(fp);
return NULL;
Expand All @@ -2100,12 +2101,12 @@ PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
tok->filename = PyUnicode_FromString("<string>");
if (tok->filename == NULL) {
fclose(fp);
PyTokenizer_Free(tok);
_PyTokenizer_Free(tok);
return encoding;
}
}
while (tok->lineno < 2 && tok->done == E_OK) {
PyTokenizer_Get(tok, &p_start, &p_end);
_PyTokenizer_Get(tok, &p_start, &p_end);
}
fclose(fp);
if (tok->encoding) {
Expand All @@ -2114,24 +2115,16 @@ PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
strcpy(encoding, tok->encoding);
}
}
PyTokenizer_Free(tok);
_PyTokenizer_Free(tok);
return encoding;
}

char *
PyTokenizer_FindEncoding(int fd)
{
return PyTokenizer_FindEncodingFilename(fd, NULL);
}

#ifdef Py_DEBUG

void
tok_dump(int type, char *start, char *end)
{
printf("%s", _PyParser_TokenNames[type]);
if (type == NAME || type == NUMBER || type == STRING || type == OP)
printf("(%.*s)", (int)(end - start), start);
}

#endif
#endif // Py_DEBUG
10 changes: 5 additions & 5 deletions Parser/tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,12 @@ struct tok_state {
enum interactive_underflow_t interactive_underflow;
};

extern struct tok_state *PyTokenizer_FromString(const char *, int);
extern struct tok_state *PyTokenizer_FromUTF8(const char *, int);
extern struct tok_state *PyTokenizer_FromFile(FILE *, const char*,
extern struct tok_state *_PyTokenizer_FromString(const char *, int);
extern struct tok_state *_PyTokenizer_FromUTF8(const char *, int);
extern struct tok_state *_PyTokenizer_FromFile(FILE *, const char*,
const char *, const char *);
extern void PyTokenizer_Free(struct tok_state *);
extern int PyTokenizer_Get(struct tok_state *, const char **, const char **);
extern void _PyTokenizer_Free(struct tok_state *);
extern int _PyTokenizer_Get(struct tok_state *, const char **, const char **);

#define tok_dump _Py_tok_dump

Expand Down
6 changes: 3 additions & 3 deletions Python/Python-tokenize.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ tokenizeriter_new_impl(PyTypeObject *type, const char *source)
if (filename == NULL) {
return NULL;
}
self->tok = PyTokenizer_FromUTF8(source, 1);
self->tok = _PyTokenizer_FromUTF8(source, 1);
if (self->tok == NULL) {
Py_DECREF(filename);
return NULL;
Expand All @@ -61,7 +61,7 @@ tokenizeriter_next(tokenizeriterobject *it)
{
const char *start;
const char *end;
int type = PyTokenizer_Get(it->tok, &start, &end);
int type = _PyTokenizer_Get(it->tok, &start, &end);
if (type == ERRORTOKEN && PyErr_Occurred()) {
return NULL;
}
Expand Down Expand Up @@ -105,7 +105,7 @@ static void
tokenizeriter_dealloc(tokenizeriterobject *it)
{
PyTypeObject *tp = Py_TYPE(it);
PyTokenizer_Free(it->tok);
_PyTokenizer_Free(it->tok);
tp->tp_free(it);
Py_DECREF(tp);
}
Expand Down
1 change: 0 additions & 1 deletion Python/import.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#include "pycore_interp.h" // _PyInterpreterState_ClearModules()
#include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "pycore_sysmodule.h"
#include "errcode.h"
#include "marshal.h"
#include "code.h"
#include "importdl.h"
Expand Down
4 changes: 2 additions & 2 deletions Python/traceback.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
#define MAX_NTHREADS 100

/* Function from Parser/tokenizer.c */
extern char * PyTokenizer_FindEncodingFilename(int, PyObject *);
extern char* _PyTokenizer_FindEncodingFilename(int, PyObject *);

_Py_IDENTIFIER(TextIOWrapper);
_Py_IDENTIFIER(close);
Expand Down Expand Up @@ -431,7 +431,7 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent, i
Py_DECREF(binary);
return 0;
}
found_encoding = PyTokenizer_FindEncodingFilename(fd, filename);
found_encoding = _PyTokenizer_FindEncodingFilename(fd, filename);
if (found_encoding == NULL)
PyErr_Clear();
encoding = (found_encoding != NULL) ? found_encoding : "utf-8";
Expand Down