Skip to content

bpo-47164: Add _PyASCIIObject_CAST() macro #32191

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 31, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 34 additions & 33 deletions Include/cpython/unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,15 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
PyObject *op,
int check_content);


#define _PyASCIIObject_CAST(op) \
(assert(PyUnicode_Check(op)), (PyASCIIObject*)(op))
#define _PyCompactUnicodeObject_CAST(op) \
(assert(PyUnicode_Check(op)), (PyCompactUnicodeObject*)(op))
#define _PyUnicodeObject_CAST(op) \
(assert(PyUnicode_Check(op)), (PyUnicodeObject*)(op))


/* Fast access macros */

/* Returns the deprecated Py_UNICODE representation's size in code units
Expand All @@ -243,11 +252,10 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency(

/* Py_DEPRECATED(3.3) */
#define PyUnicode_GET_SIZE(op) \
(assert(PyUnicode_Check(op)), \
(((PyASCIIObject *)(op))->wstr) ? \
(_PyASCIIObject_CAST(op)->wstr ? \
PyUnicode_WSTR_LENGTH(op) : \
((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\
assert(((PyASCIIObject *)(op))->wstr), \
assert(_PyASCIIObject_CAST(op)->wstr), \
PyUnicode_WSTR_LENGTH(op)))

/* Py_DEPRECATED(3.3) */
Expand All @@ -261,9 +269,8 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency(

/* Py_DEPRECATED(3.3) */
#define PyUnicode_AS_UNICODE(op) \
(assert(PyUnicode_Check(op)), \
(((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \
PyUnicode_AsUnicode(_PyObject_CAST(op)))
(_PyASCIIObject_CAST(op)->wstr ? _PyASCIIObject_CAST(op)->wstr : \
PyUnicode_AsUnicode(_PyObject_CAST(op)))

/* Py_DEPRECATED(3.3) */
#define PyUnicode_AS_DATA(op) \
Expand All @@ -281,25 +288,24 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency(

/* Use only if you know it's a string */
#define PyUnicode_CHECK_INTERNED(op) \
(((PyASCIIObject *)(op))->state.interned)
(_PyASCIIObject_CAST(op)->state.interned)

/* Return true if the string contains only ASCII characters, or 0 if not. The
string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
ready. */
#define PyUnicode_IS_ASCII(op) \
(assert(PyUnicode_Check(op)), \
assert(PyUnicode_IS_READY(op)), \
((PyASCIIObject*)op)->state.ascii)
(assert(PyUnicode_IS_READY(op)), \
_PyASCIIObject_CAST(op)->state.ascii)

/* Return true if the string is compact or 0 if not.
No type checks or Ready calls are performed. */
#define PyUnicode_IS_COMPACT(op) \
(((PyASCIIObject*)(op))->state.compact)
(_PyASCIIObject_CAST(op)->state.compact)

/* Return true if the string is a compact ASCII string (use PyASCIIObject
structure), or 0 if not. No type checks or Ready calls are performed. */
#define PyUnicode_IS_COMPACT_ASCII(op) \
(((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op))
(_PyASCIIObject_CAST(op)->state.ascii && PyUnicode_IS_COMPACT(op))

enum PyUnicode_Kind {
/* String contains only wstr byte characters. This is only possible
Expand All @@ -323,23 +329,21 @@ enum PyUnicode_Kind {

/* Return one of the PyUnicode_*_KIND values defined above. */
#define PyUnicode_KIND(op) \
(assert(PyUnicode_Check(op)), \
assert(PyUnicode_IS_READY(op)), \
((PyASCIIObject *)(op))->state.kind)
(assert(PyUnicode_IS_READY(op)), \
_PyASCIIObject_CAST(op)->state.kind)

/* Return a void pointer to the raw unicode buffer. */
#define _PyUnicode_COMPACT_DATA(op) \
(PyUnicode_IS_ASCII(op) ? \
((void*)((PyASCIIObject*)(op) + 1)) : \
((void*)((PyCompactUnicodeObject*)(op) + 1)))
(PyUnicode_IS_ASCII(op) ? \
((void*)(_PyASCIIObject_CAST(op) + 1)) : \
((void*)(_PyCompactUnicodeObject_CAST(op) + 1)))

#define _PyUnicode_NONCOMPACT_DATA(op) \
(assert(((PyUnicodeObject*)(op))->data.any), \
((((PyUnicodeObject *)(op))->data.any)))
(assert(_PyUnicodeObject_CAST(op)->data.any), \
(_PyUnicodeObject_CAST(op)->data.any))

#define PyUnicode_DATA(op) \
(assert(PyUnicode_Check(op)), \
PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \
(PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \
_PyUnicode_NONCOMPACT_DATA(op))

/* In the access macros below, "kind" may be evaluated more than once.
Expand Down Expand Up @@ -386,8 +390,7 @@ enum PyUnicode_Kind {
PyUnicode_READ_CHAR, for multiple consecutive reads callers should
cache kind and use PyUnicode_READ instead. */
#define PyUnicode_READ_CHAR(unicode, index) \
(assert(PyUnicode_Check(unicode)), \
assert(PyUnicode_IS_READY(unicode)), \
(assert(PyUnicode_IS_READY(unicode)), \
(Py_UCS4) \
(PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
Expand All @@ -401,23 +404,21 @@ enum PyUnicode_Kind {
the string has it's canonical representation set before calling
this macro. Call PyUnicode_(FAST_)Ready to ensure that. */
#define PyUnicode_GET_LENGTH(op) \
(assert(PyUnicode_Check(op)), \
assert(PyUnicode_IS_READY(op)), \
((PyASCIIObject *)(op))->length)
(assert(PyUnicode_IS_READY(op)), \
_PyASCIIObject_CAST(op)->length)


/* Fast check to determine whether an object is ready. Equivalent to
PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any */
PyUnicode_IS_COMPACT(op) || _PyUnicodeObject_CAST(op)->data.any */

#define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
#define PyUnicode_IS_READY(op) (_PyASCIIObject_CAST(op)->state.ready)

/* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
case. If the canonical representation is not yet set, it will still call
_PyUnicode_Ready().
Returns 0 on success and -1 on errors. */
#define PyUnicode_READY(op) \
(assert(PyUnicode_Check(op)), \
(PyUnicode_IS_READY(op) ? \
((PyUnicode_IS_READY(op) ? \
0 : _PyUnicode_Ready(_PyObject_CAST(op))))

/* Return a maximum character value which is suitable for creating another
Expand All @@ -436,8 +437,8 @@ enum PyUnicode_Kind {
Py_DEPRECATED(3.3)
static inline Py_ssize_t PyUnicode_WSTR_LENGTH(PyObject *op) {
return PyUnicode_IS_COMPACT_ASCII(op) ?
((PyASCIIObject*)op)->length :
((PyCompactUnicodeObject*)op)->wstr_length;
_PyASCIIObject_CAST(op)->length :
_PyCompactUnicodeObject_CAST(op)->wstr_length;
}
#define PyUnicode_WSTR_LENGTH(op) PyUnicode_WSTR_LENGTH(_PyObject_CAST(op))

Expand Down
2 changes: 1 addition & 1 deletion Include/unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ PyAPI_DATA(PyTypeObject) PyUnicode_Type;
PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;

#define PyUnicode_Check(op) \
PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
#define PyUnicode_CheckExact(op) Py_IS_TYPE(op, &PyUnicode_Type)

/* --- Constants ---------------------------------------------------------- */
Expand Down
2 changes: 1 addition & 1 deletion Modules/_collectionsmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -2352,7 +2352,7 @@ _collections__count_elements_impl(PyObject *module, PyObject *mapping,
break;

if (!PyUnicode_CheckExact(key) ||
(hash = ((PyASCIIObject *) key)->hash) == -1)
(hash = _PyASCIIObject_CAST(key)->hash) == -1)
{
hash = PyObject_Hash(key);
if (hash == -1)
Expand Down
2 changes: 1 addition & 1 deletion Objects/dictobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ static inline Py_hash_t
unicode_get_hash(PyObject *o)
{
assert(PyUnicode_CheckExact(o));
return ((PyASCIIObject*)o)->hash;
return _PyASCIIObject_CAST(o)->hash;
}

/* Print summary info about the state of the optimized allocator */
Expand Down
6 changes: 3 additions & 3 deletions Objects/setobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ set_add_key(PySetObject *so, PyObject *key)
Py_hash_t hash;

if (!PyUnicode_CheckExact(key) ||
(hash = ((PyASCIIObject *) key)->hash) == -1) {
(hash = _PyASCIIObject_CAST(key)->hash) == -1) {
hash = PyObject_Hash(key);
if (hash == -1)
return -1;
Expand All @@ -360,7 +360,7 @@ set_contains_key(PySetObject *so, PyObject *key)
Py_hash_t hash;

if (!PyUnicode_CheckExact(key) ||
(hash = ((PyASCIIObject *) key)->hash) == -1) {
(hash = _PyASCIIObject_CAST(key)->hash) == -1) {
hash = PyObject_Hash(key);
if (hash == -1)
return -1;
Expand All @@ -374,7 +374,7 @@ set_discard_key(PySetObject *so, PyObject *key)
Py_hash_t hash;

if (!PyUnicode_CheckExact(key) ||
(hash = ((PyASCIIObject *) key)->hash) == -1) {
(hash = _PyASCIIObject_CAST(key)->hash) == -1) {
hash = PyObject_Hash(key);
if (hash == -1)
return -1;
Expand Down
6 changes: 3 additions & 3 deletions Objects/typeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -3759,7 +3759,7 @@ find_name_in_mro(PyTypeObject *type, PyObject *name, int *error)
{
Py_hash_t hash;
if (!PyUnicode_CheckExact(name) ||
(hash = ((PyASCIIObject *) name)->hash) == -1)
(hash = _PyASCIIObject_CAST(name)->hash) == -1)
{
hash = PyObject_Hash(name);
if (hash == -1) {
Expand Down Expand Up @@ -3853,7 +3853,7 @@ _PyType_Lookup(PyTypeObject *type, PyObject *name)
struct type_cache_entry *entry = &cache->hashtable[h];
entry->version = type->tp_version_tag;
entry->value = res; /* borrowed */
assert(((PyASCIIObject *)(name))->hash != -1);
assert(_PyASCIIObject_CAST(name)->hash != -1);
#if MCACHE_STATS
if (entry->name != Py_None && entry->name != name) {
cache->collisions++;
Expand Down Expand Up @@ -8951,7 +8951,7 @@ super_init_without_args(_PyInterpreterFrame *cframe, PyCodeObject *co,
if (cframe->f_lasti >= 0) {
// MAKE_CELL and COPY_FREE_VARS have no quickened forms, so no need
// to use _PyOpcode_Deopt here:
assert(_Py_OPCODE(_PyCode_CODE(co)[0]) == MAKE_CELL ||
assert(_Py_OPCODE(_PyCode_CODE(co)[0]) == MAKE_CELL ||
_Py_OPCODE(_PyCode_CODE(co)[0]) == COPY_FREE_VARS);
assert(PyCell_Check(firstarg));
firstarg = PyCell_GET(firstarg);
Expand Down
57 changes: 27 additions & 30 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,46 +113,46 @@ extern "C" {
#endif

#define _PyUnicode_UTF8(op) \
(((PyCompactUnicodeObject*)(op))->utf8)
(_PyCompactUnicodeObject_CAST(op)->utf8)
#define PyUnicode_UTF8(op) \
(assert(_PyUnicode_CHECK(op)), \
assert(PyUnicode_IS_READY(op)), \
PyUnicode_IS_COMPACT_ASCII(op) ? \
((char*)((PyASCIIObject*)(op) + 1)) : \
((char*)(_PyASCIIObject_CAST(op) + 1)) : \
_PyUnicode_UTF8(op))
#define _PyUnicode_UTF8_LENGTH(op) \
(((PyCompactUnicodeObject*)(op))->utf8_length)
(_PyCompactUnicodeObject_CAST(op)->utf8_length)
#define PyUnicode_UTF8_LENGTH(op) \
(assert(_PyUnicode_CHECK(op)), \
assert(PyUnicode_IS_READY(op)), \
PyUnicode_IS_COMPACT_ASCII(op) ? \
((PyASCIIObject*)(op))->length : \
_PyASCIIObject_CAST(op)->length : \
_PyUnicode_UTF8_LENGTH(op))
#define _PyUnicode_WSTR(op) \
(((PyASCIIObject*)(op))->wstr)
(_PyASCIIObject_CAST(op)->wstr)

/* Don't use deprecated macro of unicodeobject.h */
#undef PyUnicode_WSTR_LENGTH
#define PyUnicode_WSTR_LENGTH(op) \
(PyUnicode_IS_COMPACT_ASCII(op) ? \
((PyASCIIObject*)op)->length : \
((PyCompactUnicodeObject*)op)->wstr_length)
(PyUnicode_IS_COMPACT_ASCII(op) ? \
_PyASCIIObject_CAST(op)->length : \
_PyCompactUnicodeObject_CAST(op)->wstr_length)
#define _PyUnicode_WSTR_LENGTH(op) \
(((PyCompactUnicodeObject*)(op))->wstr_length)
(_PyCompactUnicodeObject_CAST(op)->wstr_length)
#define _PyUnicode_LENGTH(op) \
(((PyASCIIObject *)(op))->length)
(_PyASCIIObject_CAST(op)->length)
#define _PyUnicode_STATE(op) \
(((PyASCIIObject *)(op))->state)
(_PyASCIIObject_CAST(op)->state)
#define _PyUnicode_HASH(op) \
(((PyASCIIObject *)(op))->hash)
(_PyASCIIObject_CAST(op)->hash)
#define _PyUnicode_KIND(op) \
(assert(_PyUnicode_CHECK(op)), \
((PyASCIIObject *)(op))->state.kind)
_PyASCIIObject_CAST(op)->state.kind)
#define _PyUnicode_GET_LENGTH(op) \
(assert(_PyUnicode_CHECK(op)), \
((PyASCIIObject *)(op))->length)
_PyASCIIObject_CAST(op)->length)
#define _PyUnicode_DATA_ANY(op) \
(((PyUnicodeObject*)(op))->data.any)
(_PyUnicodeObject_CAST(op)->data.any)

#undef PyUnicode_READY
#define PyUnicode_READY(op) \
Expand Down Expand Up @@ -190,7 +190,7 @@ extern "C" {
buffer where the result characters are written to. */
#define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \
do { \
to_type *_to = (to_type *)(to); \
to_type *_to = (to_type *)(to); \
const from_type *_iter = (const from_type *)(begin);\
const from_type *_end = (const from_type *)(end);\
Py_ssize_t n = (_end) - (_iter); \
Expand Down Expand Up @@ -509,21 +509,18 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
#define CHECK(expr) \
do { if (!(expr)) { _PyObject_ASSERT_FAILED_MSG(op, Py_STRINGIFY(expr)); } } while (0)

PyASCIIObject *ascii;
unsigned int kind;

assert(op != NULL);
CHECK(PyUnicode_Check(op));

ascii = (PyASCIIObject *)op;
kind = ascii->state.kind;
PyASCIIObject *ascii = _PyASCIIObject_CAST(op);
unsigned int kind = ascii->state.kind;

if (ascii->state.ascii == 1 && ascii->state.compact == 1) {
CHECK(kind == PyUnicode_1BYTE_KIND);
CHECK(ascii->state.ready == 1);
}
else {
PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op;
PyCompactUnicodeObject *compact = _PyCompactUnicodeObject_CAST(op);
void *data;

if (ascii->state.compact == 1) {
Expand All @@ -536,7 +533,7 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
CHECK(compact->utf8 != data);
}
else {
PyUnicodeObject *unicode = (PyUnicodeObject *)op;
PyUnicodeObject *unicode = _PyUnicodeObject_CAST(op);

data = unicode->data.any;
if (kind == PyUnicode_WCHAR_KIND) {
Expand Down Expand Up @@ -1330,18 +1327,18 @@ const void *_PyUnicode_data(void *unicode_raw) {
printf("obj %p\n", (void*)unicode);
printf("compact %d\n", PyUnicode_IS_COMPACT(unicode));
printf("compact ascii %d\n", PyUnicode_IS_COMPACT_ASCII(unicode));
printf("ascii op %p\n", ((void*)((PyASCIIObject*)(unicode) + 1)));
printf("compact op %p\n", ((void*)((PyCompactUnicodeObject*)(unicode) + 1)));
printf("ascii op %p\n", (void*)(_PyASCIIObject_CAST(unicode) + 1));
printf("compact op %p\n", (void*)(_PyCompactUnicodeObject_CAST(unicode) + 1));
printf("compact data %p\n", _PyUnicode_COMPACT_DATA(unicode));
return PyUnicode_DATA(unicode);
}

void
_PyUnicode_Dump(PyObject *op)
{
PyASCIIObject *ascii = (PyASCIIObject *)op;
PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op;
PyUnicodeObject *unicode = (PyUnicodeObject *)op;
PyASCIIObject *ascii = _PyASCIIObject_CAST(op);
PyCompactUnicodeObject *compact = _PyCompactUnicodeObject_CAST(op);
PyUnicodeObject *unicode = _PyUnicodeObject_CAST(op);
const void *data;

if (ascii->state.compact)
Expand Down Expand Up @@ -1976,7 +1973,7 @@ unicode_is_singleton(PyObject *unicode)
return 1;
}

PyASCIIObject *ascii = (PyASCIIObject *)unicode;
PyASCIIObject *ascii = _PyASCIIObject_CAST(unicode);
if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1) {
Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
if (ch < 256 && LATIN1(ch) == unicode) {
Expand Down Expand Up @@ -16053,7 +16050,7 @@ _PyUnicode_FiniTypes(PyInterpreterState *interp)

static void unicode_static_dealloc(PyObject *op)
{
PyASCIIObject* ascii = (PyASCIIObject*)op;
PyASCIIObject *ascii = _PyASCIIObject_CAST(op);

assert(ascii->state.compact);

Expand Down
Loading