From d20fbb8d16cba39e41b457b340f0ad2c2ffa1858 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 2 Aug 2023 17:52:29 +0800 Subject: [PATCH 01/48] gh-107557: Tier 2 abstract interpreter barebones --- .gitattributes | 1 + Include/internal/pycore_optimizer.h | 16 + Makefile.pre.in | 9 +- PCbuild/_freeze_module.vcxproj | 1 + PCbuild/_freeze_module.vcxproj.filters | 3 + PCbuild/pythoncore.vcxproj | 3 + PCbuild/pythoncore.vcxproj.filters | 9 + Python/abstract_interp_cases.c.h | 1085 +++++++++++++++++++++++ Python/optimizer.c | 8 +- Python/optimizer_analysis.c | 21 + Tools/cases_generator/generate_cases.py | 47 +- Tools/cases_generator/instructions.py | 31 + 12 files changed, 1229 insertions(+), 5 deletions(-) create mode 100644 Include/internal/pycore_optimizer.h create mode 100644 Python/abstract_interp_cases.c.h create mode 100644 Python/optimizer_analysis.c diff --git a/.gitattributes b/.gitattributes index 5d5558da711b17..e8b6b5bd7fa54f 100644 --- a/.gitattributes +++ b/.gitattributes @@ -87,6 +87,7 @@ Programs/test_frozenmain.h generated Python/Python-ast.c generated Python/executor_cases.c.h generated Python/generated_cases.c.h generated +Python/abstract_interp_cases.c.h generated Python/opcode_targets.h generated Python/stdlib_module_names.h generated Tools/peg_generator/pegen/grammar_parser.py generated diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h new file mode 100644 index 00000000000000..3b5fa634e13972 --- /dev/null +++ b/Include/internal/pycore_optimizer.h @@ -0,0 +1,16 @@ +#ifndef Py_INTERNAL_OPTIMIZER_H +#define Py_INTERNAL_OPTIMIZER_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +extern int uop_analyze_and_optimize(struct _PyUOpInstruction *trace, int trace_len); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_OPTIMIZER_H */ diff --git a/Makefile.pre.in b/Makefile.pre.in index 12409774746a30..a334aae9dec4fc 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -405,6 +405,7 @@ PYTHON_OBJS= \ Python/mysnprintf.o \ Python/mystrtoul.o \ Python/optimizer.o \ + Python/optimizer_analysis.o \ Python/pathconfig.o \ Python/preconfig.o \ Python/pyarena.o \ @@ -1562,6 +1563,7 @@ Python/ceval.o: \ Python/executor.o: \ $(srcdir)/Include/internal/pycore_opcode_metadata.h \ + $(srcdir)/Include/internal/pycore_optimizer.h \ $(srcdir)/Python/ceval_macros.h \ $(srcdir)/Python/executor_cases.c.h @@ -1570,7 +1572,12 @@ Python/flowgraph.o: \ Python/optimizer.o: \ $(srcdir)/Python/executor_cases.c.h \ - $(srcdir)/Include/internal/pycore_opcode_metadata.h + $(srcdir)/Include/internal/pycore_opcode_metadata.h \ + $(srcdir)/Include/internal/pycore_optimizer.h + +Python/optimizer_analysis.o: \ + $(srcdir)/Include/internal/pycore_opcode_metadata.h \ + $(srcdir)/Include/internal/pycore_optimizer.h Python/frozen.o: $(FROZEN_FILES_OUT) diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index e247637a0dfe5c..bdcf29ba44dab5 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -218,6 +218,7 @@ + diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index 2a0e009308022b..45333fa97f1c64 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -283,6 +283,9 @@ Source Files + + Source Files + Source Files diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index bfe59acf12a69d..b0e62864421e17 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -248,6 +248,7 @@ + @@ -279,6 +280,7 @@ + @@ -548,6 +550,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 0a8b0c3faf51e1..d5f61e9c5d7c89 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -648,6 +648,9 @@ Include\internal + + Include\internal + Include\internal @@ -732,6 +735,9 @@ Include\internal + + Include\internal + Modules\zlib @@ -1223,6 +1229,9 @@ Python + + Python + Python diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h new file mode 100644 index 00000000000000..4b05477da45e3a --- /dev/null +++ b/Python/abstract_interp_cases.c.h @@ -0,0 +1,1085 @@ +// This file is generated by Tools/cases_generator/generate_cases.py +// from: +// Python\bytecodes.c +// Do not edit! + + case NOP: { + break; + } + + case RESUME: { + break; + } + + case INSTRUMENTED_RESUME: { + break; + } + + case LOAD_FAST_CHECK: { + STACK_GROW(1); + break; + } + + case LOAD_FAST: { + STACK_GROW(1); + break; + } + + case LOAD_FAST_AND_CLEAR: { + STACK_GROW(1); + break; + } + + case LOAD_FAST_LOAD_FAST: { + STACK_GROW(2); + break; + } + + case LOAD_CONST: { + STACK_GROW(1); + break; + } + + case STORE_FAST: { + STACK_SHRINK(1); + break; + } + + case STORE_FAST_LOAD_FAST: { + break; + } + + case STORE_FAST_STORE_FAST: { + STACK_SHRINK(2); + break; + } + + case POP_TOP: { + STACK_SHRINK(1); + break; + } + + case PUSH_NULL: { + STACK_GROW(1); + break; + } + + case INSTRUMENTED_END_FOR: { + STACK_SHRINK(2); + break; + } + + case END_SEND: { + STACK_SHRINK(1); + break; + } + + case INSTRUMENTED_END_SEND: { + STACK_SHRINK(1); + break; + } + + case UNARY_NEGATIVE: { + break; + } + + case UNARY_NOT: { + break; + } + + case TO_BOOL: { + break; + } + + case TO_BOOL_BOOL: { + break; + } + + case TO_BOOL_INT: { + break; + } + + case TO_BOOL_LIST: { + break; + } + + case TO_BOOL_NONE: { + break; + } + + case TO_BOOL_STR: { + break; + } + + case TO_BOOL_ALWAYS_TRUE: { + break; + } + + case UNARY_INVERT: { + break; + } + + case _GUARD_BOTH_INT: { + break; + } + + case _BINARY_OP_MULTIPLY_INT: { + STACK_SHRINK(1); + break; + } + + case _BINARY_OP_ADD_INT: { + STACK_SHRINK(1); + break; + } + + case _BINARY_OP_SUBTRACT_INT: { + STACK_SHRINK(1); + break; + } + + case _GUARD_BOTH_FLOAT: { + break; + } + + case _BINARY_OP_MULTIPLY_FLOAT: { + STACK_SHRINK(1); + break; + } + + case _BINARY_OP_ADD_FLOAT: { + STACK_SHRINK(1); + break; + } + + case _BINARY_OP_SUBTRACT_FLOAT: { + STACK_SHRINK(1); + break; + } + + case _GUARD_BOTH_UNICODE: { + break; + } + + case _BINARY_OP_ADD_UNICODE: { + STACK_SHRINK(1); + break; + } + + case _BINARY_OP_INPLACE_ADD_UNICODE: { + STACK_SHRINK(2); + break; + } + + case BINARY_SUBSCR: { + STACK_SHRINK(1); + break; + } + + case BINARY_SLICE: { + STACK_SHRINK(2); + break; + } + + case STORE_SLICE: { + STACK_SHRINK(4); + break; + } + + case BINARY_SUBSCR_LIST_INT: { + STACK_SHRINK(1); + break; + } + + case BINARY_SUBSCR_TUPLE_INT: { + STACK_SHRINK(1); + break; + } + + case BINARY_SUBSCR_DICT: { + STACK_SHRINK(1); + break; + } + + case BINARY_SUBSCR_GETITEM: { + STACK_SHRINK(1); + break; + } + + case LIST_APPEND: { + STACK_SHRINK(1); + break; + } + + case SET_ADD: { + STACK_SHRINK(1); + break; + } + + case STORE_SUBSCR: { + STACK_SHRINK(3); + break; + } + + case STORE_SUBSCR_LIST_INT: { + STACK_SHRINK(3); + break; + } + + case STORE_SUBSCR_DICT: { + STACK_SHRINK(3); + break; + } + + case DELETE_SUBSCR: { + STACK_SHRINK(2); + break; + } + + case CALL_INTRINSIC_1: { + break; + } + + case CALL_INTRINSIC_2: { + STACK_SHRINK(1); + break; + } + + case RAISE_VARARGS: { + STACK_SHRINK(oparg); + break; + } + + case INTERPRETER_EXIT: { + STACK_SHRINK(1); + break; + } + + case RETURN_VALUE: { + STACK_SHRINK(1); + break; + } + + case INSTRUMENTED_RETURN_VALUE: { + STACK_SHRINK(1); + break; + } + + case RETURN_CONST: { + break; + } + + case INSTRUMENTED_RETURN_CONST: { + break; + } + + case GET_AITER: { + break; + } + + case GET_ANEXT: { + STACK_GROW(1); + break; + } + + case GET_AWAITABLE: { + break; + } + + case SEND: { + break; + } + + case SEND_GEN: { + break; + } + + case INSTRUMENTED_YIELD_VALUE: { + break; + } + + case YIELD_VALUE: { + break; + } + + case POP_EXCEPT: { + STACK_SHRINK(1); + break; + } + + case RERAISE: { + STACK_SHRINK(1); + break; + } + + case END_ASYNC_FOR: { + STACK_SHRINK(2); + break; + } + + case CLEANUP_THROW: { + STACK_SHRINK(1); + break; + } + + case LOAD_ASSERTION_ERROR: { + STACK_GROW(1); + break; + } + + case LOAD_BUILD_CLASS: { + STACK_GROW(1); + break; + } + + case STORE_NAME: { + STACK_SHRINK(1); + break; + } + + case DELETE_NAME: { + break; + } + + case UNPACK_SEQUENCE: { + STACK_SHRINK(1); + STACK_GROW(oparg); + break; + } + + case UNPACK_SEQUENCE_TWO_TUPLE: { + STACK_SHRINK(1); + STACK_GROW(oparg); + break; + } + + case UNPACK_SEQUENCE_TUPLE: { + STACK_SHRINK(1); + STACK_GROW(oparg); + break; + } + + case UNPACK_SEQUENCE_LIST: { + STACK_SHRINK(1); + STACK_GROW(oparg); + break; + } + + case UNPACK_EX: { + STACK_GROW((oparg & 0xFF) + (oparg >> 8)); + break; + } + + case STORE_ATTR: { + STACK_SHRINK(2); + break; + } + + case DELETE_ATTR: { + STACK_SHRINK(1); + break; + } + + case STORE_GLOBAL: { + STACK_SHRINK(1); + break; + } + + case DELETE_GLOBAL: { + break; + } + + case _LOAD_LOCALS: { + STACK_GROW(1); + break; + } + + case _LOAD_FROM_DICT_OR_GLOBALS: { + break; + } + + case LOAD_GLOBAL: { + STACK_GROW(1); + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case _GUARD_GLOBALS_VERSION: { + break; + } + + case _GUARD_BUILTINS_VERSION: { + break; + } + + case _LOAD_GLOBAL_MODULE: { + STACK_GROW(1); + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case _LOAD_GLOBAL_BUILTINS: { + STACK_GROW(1); + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case DELETE_FAST: { + break; + } + + case MAKE_CELL: { + break; + } + + case DELETE_DEREF: { + break; + } + + case LOAD_FROM_DICT_OR_DEREF: { + break; + } + + case LOAD_DEREF: { + STACK_GROW(1); + break; + } + + case STORE_DEREF: { + STACK_SHRINK(1); + break; + } + + case COPY_FREE_VARS: { + break; + } + + case BUILD_STRING: { + STACK_SHRINK(oparg); + STACK_GROW(1); + break; + } + + case BUILD_TUPLE: { + STACK_SHRINK(oparg); + STACK_GROW(1); + break; + } + + case BUILD_LIST: { + STACK_SHRINK(oparg); + STACK_GROW(1); + break; + } + + case LIST_EXTEND: { + STACK_SHRINK(1); + break; + } + + case SET_UPDATE: { + STACK_SHRINK(1); + break; + } + + case BUILD_SET: { + STACK_SHRINK(oparg); + STACK_GROW(1); + break; + } + + case BUILD_MAP: { + STACK_SHRINK(oparg*2); + STACK_GROW(1); + break; + } + + case SETUP_ANNOTATIONS: { + break; + } + + case BUILD_CONST_KEY_MAP: { + STACK_SHRINK(oparg); + break; + } + + case DICT_UPDATE: { + STACK_SHRINK(1); + break; + } + + case DICT_MERGE: { + STACK_SHRINK(1); + break; + } + + case MAP_ADD: { + STACK_SHRINK(2); + break; + } + + case INSTRUMENTED_LOAD_SUPER_ATTR: { + STACK_SHRINK(2); + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case LOAD_SUPER_ATTR: { + STACK_SHRINK(2); + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case LOAD_SUPER_ATTR_ATTR: { + STACK_SHRINK(2); + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case LOAD_SUPER_ATTR_METHOD: { + STACK_SHRINK(1); + break; + } + + case LOAD_ATTR: { + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case _GUARD_TYPE_VERSION: { + break; + } + + case _CHECK_MANAGED_OBJECT_HAS_VALUES: { + break; + } + + case _LOAD_ATTR_INSTANCE_VALUE: { + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case LOAD_ATTR_MODULE: { + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case LOAD_ATTR_WITH_HINT: { + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case LOAD_ATTR_SLOT: { + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case LOAD_ATTR_CLASS: { + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case LOAD_ATTR_PROPERTY: { + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN: { + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case STORE_ATTR_INSTANCE_VALUE: { + STACK_SHRINK(2); + break; + } + + case STORE_ATTR_WITH_HINT: { + STACK_SHRINK(2); + break; + } + + case STORE_ATTR_SLOT: { + STACK_SHRINK(2); + break; + } + + case COMPARE_OP: { + STACK_SHRINK(1); + break; + } + + case COMPARE_OP_FLOAT: { + STACK_SHRINK(1); + break; + } + + case COMPARE_OP_INT: { + STACK_SHRINK(1); + break; + } + + case COMPARE_OP_STR: { + STACK_SHRINK(1); + break; + } + + case IS_OP: { + STACK_SHRINK(1); + break; + } + + case CONTAINS_OP: { + STACK_SHRINK(1); + break; + } + + case CHECK_EG_MATCH: { + break; + } + + case CHECK_EXC_MATCH: { + break; + } + + case IMPORT_NAME: { + STACK_SHRINK(1); + break; + } + + case IMPORT_FROM: { + STACK_GROW(1); + break; + } + + case JUMP_FORWARD: { + break; + } + + case JUMP_BACKWARD: { + break; + } + + case ENTER_EXECUTOR: { + break; + } + + case POP_JUMP_IF_FALSE: { + STACK_SHRINK(1); + break; + } + + case POP_JUMP_IF_TRUE: { + STACK_SHRINK(1); + break; + } + + case IS_NONE: { + break; + } + + case JUMP_BACKWARD_NO_INTERRUPT: { + break; + } + + case GET_LEN: { + STACK_GROW(1); + break; + } + + case MATCH_CLASS: { + STACK_SHRINK(2); + break; + } + + case MATCH_MAPPING: { + STACK_GROW(1); + break; + } + + case MATCH_SEQUENCE: { + STACK_GROW(1); + break; + } + + case MATCH_KEYS: { + STACK_GROW(1); + break; + } + + case GET_ITER: { + break; + } + + case GET_YIELD_FROM_ITER: { + break; + } + + case FOR_ITER: { + STACK_GROW(1); + break; + } + + case INSTRUMENTED_FOR_ITER: { + break; + } + + case _ITER_CHECK_LIST: { + break; + } + + case _ITER_JUMP_LIST: { + break; + } + + case _IS_ITER_EXHAUSTED_LIST: { + STACK_GROW(1); + break; + } + + case _ITER_NEXT_LIST: { + STACK_GROW(1); + break; + } + + case _ITER_CHECK_TUPLE: { + break; + } + + case _ITER_JUMP_TUPLE: { + break; + } + + case _IS_ITER_EXHAUSTED_TUPLE: { + STACK_GROW(1); + break; + } + + case _ITER_NEXT_TUPLE: { + STACK_GROW(1); + break; + } + + case _ITER_CHECK_RANGE: { + break; + } + + case _ITER_JUMP_RANGE: { + break; + } + + case _IS_ITER_EXHAUSTED_RANGE: { + STACK_GROW(1); + break; + } + + case _ITER_NEXT_RANGE: { + STACK_GROW(1); + break; + } + + case FOR_ITER_GEN: { + STACK_GROW(1); + break; + } + + case BEFORE_ASYNC_WITH: { + STACK_GROW(1); + break; + } + + case BEFORE_WITH: { + STACK_GROW(1); + break; + } + + case WITH_EXCEPT_START: { + STACK_GROW(1); + break; + } + + case PUSH_EXC_INFO: { + STACK_GROW(1); + break; + } + + case LOAD_ATTR_METHOD_WITH_VALUES: { + STACK_GROW(1); + break; + } + + case LOAD_ATTR_METHOD_NO_DICT: { + STACK_GROW(1); + break; + } + + case LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { + break; + } + + case LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { + break; + } + + case LOAD_ATTR_METHOD_LAZY_DICT: { + STACK_GROW(1); + break; + } + + case KW_NAMES: { + break; + } + + case INSTRUMENTED_CALL: { + break; + } + + case CALL: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + CHECK_EVAL_BREAKER(); + break; + } + + case CALL_BOUND_METHOD_EXACT_ARGS: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + break; + } + + case CALL_PY_EXACT_ARGS: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + break; + } + + case CALL_PY_WITH_DEFAULTS: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + break; + } + + case CALL_NO_KW_TYPE_1: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + break; + } + + case CALL_NO_KW_STR_1: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + CHECK_EVAL_BREAKER(); + break; + } + + case CALL_NO_KW_TUPLE_1: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + CHECK_EVAL_BREAKER(); + break; + } + + case CALL_NO_KW_ALLOC_AND_ENTER_INIT: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + break; + } + + case EXIT_INIT_CHECK: { + STACK_SHRINK(1); + break; + } + + case CALL_BUILTIN_CLASS: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + CHECK_EVAL_BREAKER(); + break; + } + + case CALL_NO_KW_BUILTIN_O: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + CHECK_EVAL_BREAKER(); + break; + } + + case CALL_NO_KW_BUILTIN_FAST: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + CHECK_EVAL_BREAKER(); + break; + } + + case CALL_BUILTIN_FAST_WITH_KEYWORDS: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + CHECK_EVAL_BREAKER(); + break; + } + + case CALL_NO_KW_LEN: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + break; + } + + case CALL_NO_KW_ISINSTANCE: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + break; + } + + case CALL_NO_KW_LIST_APPEND: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + break; + } + + case CALL_NO_KW_METHOD_DESCRIPTOR_O: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + CHECK_EVAL_BREAKER(); + break; + } + + case CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + CHECK_EVAL_BREAKER(); + break; + } + + case CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + CHECK_EVAL_BREAKER(); + break; + } + + case CALL_NO_KW_METHOD_DESCRIPTOR_FAST: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + CHECK_EVAL_BREAKER(); + break; + } + + case INSTRUMENTED_CALL_FUNCTION_EX: { + break; + } + + case CALL_FUNCTION_EX: { + STACK_SHRINK(((oparg & 1) ? 1 : 0)); + STACK_SHRINK(2); + CHECK_EVAL_BREAKER(); + break; + } + + case MAKE_FUNCTION: { + break; + } + + case SET_FUNCTION_ATTRIBUTE: { + STACK_SHRINK(1); + break; + } + + case RETURN_GENERATOR: { + break; + } + + case BUILD_SLICE: { + STACK_SHRINK(((oparg == 3) ? 1 : 0)); + STACK_SHRINK(1); + break; + } + + case CONVERT_VALUE: { + break; + } + + case FORMAT_SIMPLE: { + break; + } + + case FORMAT_WITH_SPEC: { + STACK_SHRINK(1); + break; + } + + case COPY: { + STACK_GROW(1); + break; + } + + case BINARY_OP: { + STACK_SHRINK(1); + break; + } + + case SWAP: { + break; + } + + case INSTRUMENTED_INSTRUCTION: { + break; + } + + case INSTRUMENTED_JUMP_FORWARD: { + break; + } + + case INSTRUMENTED_JUMP_BACKWARD: { + break; + } + + case INSTRUMENTED_POP_JUMP_IF_TRUE: { + break; + } + + case INSTRUMENTED_POP_JUMP_IF_FALSE: { + break; + } + + case INSTRUMENTED_POP_JUMP_IF_NONE: { + break; + } + + case INSTRUMENTED_POP_JUMP_IF_NOT_NONE: { + break; + } + + case EXTENDED_ARG: { + break; + } + + case CACHE: { + break; + } + + case RESERVED: { + break; + } + + case _POP_JUMP_IF_FALSE: { + STACK_SHRINK(1); + break; + } + + case _POP_JUMP_IF_TRUE: { + STACK_SHRINK(1); + break; + } + + case JUMP_TO_TOP: { + CHECK_EVAL_BREAKER(); + break; + } + + case SAVE_IP: { + break; + } + + case EXIT_TRACE: { + break; + } diff --git a/Python/optimizer.c b/Python/optimizer.c index 238ab02d09faa7..79280bb18448ff 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -4,6 +4,7 @@ #include "pycore_opcode.h" #include "pycore_opcode_metadata.h" #include "pycore_opcode_utils.h" +#include "pycore_optimizer.h" #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_uops.h" #include "cpython/optimizer.h" @@ -704,10 +705,11 @@ uop_optimize( return -1; } executor->base.execute = _PyUopExecute; + trace_length = uop_analyze_and_optimize(trace, trace_length); memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction)); - if (trace_length < _Py_UOP_MAX_TRACE_LENGTH) { - executor->trace[trace_length].opcode = 0; // Sentinel - } + if (trace_length < _Py_UOP_MAX_TRACE_LENGTH) { + executor->trace[trace_length].opcode = 0; // Sentinel + } *exec_ptr = (_PyExecutorObject *)executor; return 1; } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c new file mode 100644 index 00000000000000..d489fb802fca33 --- /dev/null +++ b/Python/optimizer_analysis.c @@ -0,0 +1,21 @@ +#include "Python.h" +#include "opcode.h" +#include "pycore_interp.h" +#include "pycore_opcode.h" +#include "pycore_opcode_metadata.h" +#include "pycore_opcode_utils.h" +#include "pycore_pystate.h" // _PyInterpreterState_GET() +#include "pycore_uops.h" +#include "cpython/optimizer.h" +#include +#include +#include +#include "pycore_optimizer.h" + +int +uop_analyze_and_optimize( + _PyUOpInstruction *trace, + int trace_len) +{ + return trace_len; +} diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 967e1e2f5b63bb..4407410664c104 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -15,6 +15,7 @@ from flags import InstructionFlags, variable_used from instructions import ( AnyInstruction, + AbstractInstruction, Component, Instruction, MacroInstruction, @@ -43,6 +44,9 @@ DEFAULT_EXECUTOR_OUTPUT = os.path.relpath( os.path.join(ROOT, "Python/executor_cases.c.h") ) +DEFAULT_ABSTRACT_INTERPRETER_OUTPUT = os.path.relpath( + os.path.join(ROOT, "Python/abstract_interp_cases.c.h") +) # Constants used instead of size for macro expansions. # Note: 1, 2, 4 must match actual cache entry sizes. @@ -91,7 +95,13 @@ help="Write executor cases to this file", default=DEFAULT_EXECUTOR_OUTPUT, ) - +arg_parser.add_argument( + "-a", + "--abstract-interpreter-cases", + type=str, + help="Write abstract interpreter cases to this file", + default=DEFAULT_ABSTRACT_INTERPRETER_OUTPUT, +) class Generator(Analyzer): def get_stack_effect_info( @@ -620,6 +630,39 @@ def write_executor_instructions( file=sys.stderr, ) + def write_abstract_interpreter_instructions( + self, abstract_interpreter_filename: str, emit_line_directives: bool + ) -> None: + """Generate cases for the Tier 2 abstract interpreter/analzyer.""" + with open(abstract_interpreter_filename, "w") as f: + self.out = Formatter(f, 8, emit_line_directives) + self.write_provenance_header() + for thing in self.everything: + match thing: + case OverriddenInstructionPlaceHolder(): + # TODO: Is this helpful? + self.write_overridden_instr_place_holder(thing) + case parsing.InstDef(): + instr = AbstractInstruction(self.instrs[thing.name].inst) + self.out.emit("") + with self.out.block(f"case {thing.name}:"): + instr.write(self.out, tier=TIER_TWO) + if instr.check_eval_breaker: + self.out.emit("CHECK_EVAL_BREAKER();") + self.out.emit("break;") + # elif instr.kind != "op": + # print(f"NOTE: {thing.name} is not a viable uop") + case parsing.Macro(): + pass + case parsing.Pseudo(): + pass + case _: + typing.assert_never(thing) + print( + f"Wrote some stuff to {abstract_interpreter_filename}", + file=sys.stderr, + ) + def write_overridden_instr_place_holder( self, place_holder: OverriddenInstructionPlaceHolder ) -> None: @@ -724,6 +767,8 @@ def main(): a.write_instructions(args.output, args.emit_line_directives) a.write_metadata(args.metadata, args.pymetadata) a.write_executor_instructions(args.executor_cases, args.emit_line_directives) + a.write_abstract_interpreter_instructions(args.abstract_interpreter_cases, + args.emit_line_directives) if __name__ == "__main__": diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index 6f42699d900b46..6184bea9850dc7 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -310,6 +310,37 @@ def write_body( StackEffectMapping = list[tuple[StackEffect, StackEffect]] +# Instruction used for abstract interpretation. +class AbstractInstruction(Instruction): + def __init__(self, inst: parsing.InstDef): + super().__init__(inst) + + def write(self, out: Formatter, tier: Tiers = TIER_ONE) -> None: + """Write one abstract instruction, sans prologue and epilogue.""" + # Write a static assertion that a family's cache size is correct + if family := self.family: + if self.name == family.name: + if cache_size := family.size: + out.emit( + f"static_assert({cache_size} == " + f'{self.cache_offset}, "incorrect cache size");' + ) + # Write net stack growth/shrinkage + out.stack_adjust( + [ieff for ieff in self.input_effects], + [oeff for oeff in self.output_effects], + ) + + def write_body( + self, + out: Formatter, + dedent: int, + active_caches: list[ActiveCacheEffect], + tier: Tiers = TIER_ONE, + ) -> None: + pass + + @dataclasses.dataclass class Component: instr: Instruction From 2aeea51c4e94e44767d87869723a05f1c5fd15fb Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Wed, 2 Aug 2023 09:55:27 +0000 Subject: [PATCH 02/48] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2023-08-02-09-55-21.gh-issue-107557.P1z-in.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-08-02-09-55-21.gh-issue-107557.P1z-in.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-08-02-09-55-21.gh-issue-107557.P1z-in.rst b/Misc/NEWS.d/next/Core and Builtins/2023-08-02-09-55-21.gh-issue-107557.P1z-in.rst new file mode 100644 index 00000000000000..392f59c79e8de9 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-08-02-09-55-21.gh-issue-107557.P1z-in.rst @@ -0,0 +1 @@ +Generate the cases needed for the barebones tier 2 abstract interpreter for optimization passes in CPython. From 1a728ab6ce121e30d8f475c9499b2a84eb134ebf Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 2 Aug 2023 19:50:24 +0800 Subject: [PATCH 03/48] Copy Guido's input and output code, and fix build Co-Authored-By: Guido van Rossum --- Include/internal/pycore_optimizer.h | 4 +- Python/abstract_interp_cases.c.h | 412 ++++++++++++++++++++++++++ Tools/cases_generator/instructions.py | 38 +++ 3 files changed, 453 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 3b5fa634e13972..06d05cf00babd3 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -8,7 +8,9 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif -extern int uop_analyze_and_optimize(struct _PyUOpInstruction *trace, int trace_len); +#include "pycore_uops.h" + +int uop_analyze_and_optimize(_PyUOpInstruction *trace, int trace_len); #ifdef __cplusplus } diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 4b05477da45e3a..2adf4deccea4bd 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -17,77 +17,104 @@ case LOAD_FAST_CHECK: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case LOAD_FAST: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case LOAD_FAST_AND_CLEAR: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case LOAD_FAST_LOAD_FAST: { STACK_GROW(2); + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; break; } case LOAD_CONST: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case STORE_FAST: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case STORE_FAST_LOAD_FAST: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case STORE_FAST_STORE_FAST: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case POP_TOP: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case PUSH_NULL: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case INSTRUMENTED_END_FOR: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case END_SEND: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case INSTRUMENTED_END_SEND: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case UNARY_NEGATIVE: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case UNARY_NOT: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case TO_BOOL: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } @@ -96,26 +123,38 @@ } case TO_BOOL_INT: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case TO_BOOL_LIST: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case TO_BOOL_NONE: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case TO_BOOL_STR: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case TO_BOOL_ALWAYS_TRUE: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case UNARY_INVERT: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } @@ -124,17 +163,26 @@ } case _BINARY_OP_MULTIPLY_INT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case _BINARY_OP_ADD_INT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case _BINARY_OP_SUBTRACT_INT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } @@ -143,17 +191,26 @@ } case _BINARY_OP_MULTIPLY_FLOAT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case _BINARY_OP_ADD_FLOAT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case _BINARY_OP_SUBTRACT_FLOAT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } @@ -162,105 +219,155 @@ } case _BINARY_OP_ADD_UNICODE: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case _BINARY_OP_INPLACE_ADD_UNICODE: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case BINARY_SUBSCR: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case BINARY_SLICE: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; + stack_pointer[-3] = NULL; STACK_SHRINK(2); + stack_pointer[-1] = NULL; break; } case STORE_SLICE: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; + stack_pointer[-3] = NULL; + stack_pointer[-4] = NULL; STACK_SHRINK(4); break; } case BINARY_SUBSCR_LIST_INT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case BINARY_SUBSCR_TUPLE_INT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case BINARY_SUBSCR_DICT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case BINARY_SUBSCR_GETITEM: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case LIST_APPEND: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case SET_ADD: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case STORE_SUBSCR: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; + stack_pointer[-3] = NULL; STACK_SHRINK(3); break; } case STORE_SUBSCR_LIST_INT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; + stack_pointer[-3] = NULL; STACK_SHRINK(3); break; } case STORE_SUBSCR_DICT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; + stack_pointer[-3] = NULL; STACK_SHRINK(3); break; } case DELETE_SUBSCR: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case CALL_INTRINSIC_1: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case CALL_INTRINSIC_2: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case RAISE_VARARGS: { + (stack_pointer - oparg) = (PyObject **)NULL; STACK_SHRINK(oparg); break; } case INTERPRETER_EXIT: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case RETURN_VALUE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case INSTRUMENTED_RETURN_VALUE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } @@ -274,65 +381,90 @@ } case GET_AITER: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case GET_ANEXT: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case GET_AWAITABLE: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case SEND: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case SEND_GEN: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case INSTRUMENTED_YIELD_VALUE: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case YIELD_VALUE: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case POP_EXCEPT: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case RERAISE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case END_ASYNC_FOR: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case CLEANUP_THROW: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; + stack_pointer[-3] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; break; } case LOAD_ASSERTION_ERROR: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case LOAD_BUILD_CLASS: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case STORE_NAME: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } @@ -342,45 +474,61 @@ } case UNPACK_SEQUENCE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); STACK_GROW(oparg); + stack_pointer - oparg = (PyObject **)NULL; break; } case UNPACK_SEQUENCE_TWO_TUPLE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); STACK_GROW(oparg); + stack_pointer - oparg = (PyObject **)NULL; break; } case UNPACK_SEQUENCE_TUPLE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); STACK_GROW(oparg); + stack_pointer - oparg = (PyObject **)NULL; break; } case UNPACK_SEQUENCE_LIST: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); STACK_GROW(oparg); + stack_pointer - oparg = (PyObject **)NULL; break; } case UNPACK_EX: { + stack_pointer[-1] = NULL; STACK_GROW((oparg & 0xFF) + (oparg >> 8)); + stack_pointer - ((oparg >> 8)) = (PyObject **)NULL; + stack_pointer[-(1 + (oparg >> 8))] = NULL; + stack_pointer - (1 + (oparg >> 8) + (oparg & 0xFF)) = (PyObject **)NULL; break; } case STORE_ATTR: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case DELETE_ATTR: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case STORE_GLOBAL: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } @@ -391,16 +539,21 @@ case _LOAD_LOCALS: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case _LOAD_FROM_DICT_OR_GLOBALS: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case LOAD_GLOBAL: { STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } @@ -415,12 +568,16 @@ case _LOAD_GLOBAL_MODULE: { STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case _LOAD_GLOBAL_BUILTINS: { STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } @@ -437,15 +594,19 @@ } case LOAD_FROM_DICT_OR_DEREF: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case LOAD_DEREF: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case STORE_DEREF: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } @@ -455,42 +616,54 @@ } case BUILD_STRING: { + (stack_pointer - oparg) = (PyObject **)NULL; STACK_SHRINK(oparg); STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case BUILD_TUPLE: { + (stack_pointer - oparg) = (PyObject **)NULL; STACK_SHRINK(oparg); STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case BUILD_LIST: { + (stack_pointer - oparg) = (PyObject **)NULL; STACK_SHRINK(oparg); STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case LIST_EXTEND: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case SET_UPDATE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case BUILD_SET: { + (stack_pointer - oparg) = (PyObject **)NULL; STACK_SHRINK(oparg); STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case BUILD_MAP: { + (stack_pointer - oparg*2) = (PyObject **)NULL; STACK_SHRINK(oparg*2); STACK_GROW(1); + stack_pointer[-1] = NULL; break; } @@ -499,21 +672,28 @@ } case BUILD_CONST_KEY_MAP: { + stack_pointer[-1] = NULL; + (stack_pointer - (1 + oparg)) = (PyObject **)NULL; STACK_SHRINK(oparg); + stack_pointer[-1] = NULL; break; } case DICT_UPDATE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case DICT_MERGE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case MAP_ADD: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } @@ -525,24 +705,42 @@ } case LOAD_SUPER_ATTR: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; + stack_pointer[-3] = NULL; STACK_SHRINK(2); STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_SUPER_ATTR_ATTR: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; + stack_pointer[-3] = NULL; STACK_SHRINK(2); STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_SUPER_ATTR_METHOD: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; + stack_pointer[-3] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; break; } case LOAD_ATTR: { + stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } @@ -555,100 +753,155 @@ } case _LOAD_ATTR_INSTANCE_VALUE: { + stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_ATTR_MODULE: { + stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_ATTR_WITH_HINT: { + stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_ATTR_SLOT: { + stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_ATTR_CLASS: { + stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_ATTR_PROPERTY: { + stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN: { + stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case STORE_ATTR_INSTANCE_VALUE: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case STORE_ATTR_WITH_HINT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case STORE_ATTR_SLOT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case COMPARE_OP: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case COMPARE_OP_FLOAT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case COMPARE_OP_INT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case COMPARE_OP_STR: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case IS_OP: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case CONTAINS_OP: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case CHECK_EG_MATCH: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; break; } case CHECK_EXC_MATCH: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case IMPORT_NAME: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case IMPORT_FROM: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } @@ -665,16 +918,20 @@ } case POP_JUMP_IF_FALSE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case POP_JUMP_IF_TRUE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case IS_NONE: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } @@ -684,39 +941,52 @@ case GET_LEN: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case MATCH_CLASS: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; + stack_pointer[-3] = NULL; STACK_SHRINK(2); + stack_pointer[-1] = NULL; break; } case MATCH_MAPPING: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case MATCH_SEQUENCE: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case MATCH_KEYS: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case GET_ITER: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case GET_YIELD_FROM_ITER: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case FOR_ITER: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } @@ -734,11 +1004,13 @@ case _IS_ITER_EXHAUSTED_LIST: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case _ITER_NEXT_LIST: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } @@ -752,11 +1024,13 @@ case _IS_ITER_EXHAUSTED_TUPLE: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case _ITER_NEXT_TUPLE: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } @@ -770,59 +1044,87 @@ case _IS_ITER_EXHAUSTED_RANGE: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case _ITER_NEXT_RANGE: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case FOR_ITER_GEN: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case BEFORE_ASYNC_WITH: { + stack_pointer[-1] = NULL; STACK_GROW(1); + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; break; } case BEFORE_WITH: { + stack_pointer[-1] = NULL; STACK_GROW(1); + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; break; } case WITH_EXCEPT_START: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case PUSH_EXC_INFO: { + stack_pointer[-1] = NULL; STACK_GROW(1); + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; break; } case LOAD_ATTR_METHOD_WITH_VALUES: { + stack_pointer[-1] = NULL; STACK_GROW(1); + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; break; } case LOAD_ATTR_METHOD_NO_DICT: { + stack_pointer[-1] = NULL; STACK_GROW(1); + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; break; } case LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case LOAD_ATTR_METHOD_LAZY_DICT: { + stack_pointer[-1] = NULL; STACK_GROW(1); + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; break; } @@ -835,131 +1137,208 @@ } case CALL: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; CHECK_EVAL_BREAKER(); break; } case CALL_BOUND_METHOD_EXACT_ARGS: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case CALL_PY_EXACT_ARGS: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case CALL_PY_WITH_DEFAULTS: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case CALL_NO_KW_TYPE_1: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case CALL_NO_KW_STR_1: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; CHECK_EVAL_BREAKER(); break; } case CALL_NO_KW_TUPLE_1: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; CHECK_EVAL_BREAKER(); break; } case CALL_NO_KW_ALLOC_AND_ENTER_INIT: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case EXIT_INIT_CHECK: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case CALL_BUILTIN_CLASS: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; CHECK_EVAL_BREAKER(); break; } case CALL_NO_KW_BUILTIN_O: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; CHECK_EVAL_BREAKER(); break; } case CALL_NO_KW_BUILTIN_FAST: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; CHECK_EVAL_BREAKER(); break; } case CALL_BUILTIN_FAST_WITH_KEYWORDS: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; CHECK_EVAL_BREAKER(); break; } case CALL_NO_KW_LEN: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case CALL_NO_KW_ISINSTANCE: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case CALL_NO_KW_LIST_APPEND: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case CALL_NO_KW_METHOD_DESCRIPTOR_O: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; CHECK_EVAL_BREAKER(); break; } case CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; CHECK_EVAL_BREAKER(); break; } case CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; CHECK_EVAL_BREAKER(); break; } case CALL_NO_KW_METHOD_DESCRIPTOR_FAST: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; CHECK_EVAL_BREAKER(); break; } @@ -969,18 +1348,28 @@ } case CALL_FUNCTION_EX: { + (oparg & 1) ? stack_pointer[-(((oparg & 1) ? 1 : 0))] : NULL = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; + stack_pointer[-(2 + ((oparg & 1) ? 1 : 0))] = NULL; + stack_pointer[-(3 + ((oparg & 1) ? 1 : 0))] = NULL; STACK_SHRINK(((oparg & 1) ? 1 : 0)); STACK_SHRINK(2); + stack_pointer[-1] = NULL; CHECK_EVAL_BREAKER(); break; } case MAKE_FUNCTION: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case SET_FUNCTION_ATTRIBUTE: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } @@ -989,35 +1378,56 @@ } case BUILD_SLICE: { + (oparg == 3) ? stack_pointer[-(((oparg == 3) ? 1 : 0))] : NULL = NULL; + stack_pointer[-(1 + ((oparg == 3) ? 1 : 0))] = NULL; + stack_pointer[-(2 + ((oparg == 3) ? 1 : 0))] = NULL; STACK_SHRINK(((oparg == 3) ? 1 : 0)); STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case CONVERT_VALUE: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case FORMAT_SIMPLE: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case FORMAT_WITH_SPEC: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case COPY: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case BINARY_OP: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case SWAP: { + stack_pointer[-1] = NULL; + (stack_pointer - (1 + (oparg-2))) = (PyObject **)NULL; + stack_pointer[-(2 + (oparg-2))] = NULL; + stack_pointer[-1] = NULL; + stack_pointer - (1 + (oparg-2)) = (PyObject **)NULL; + stack_pointer[-(2 + (oparg-2))] = NULL; break; } @@ -1062,11 +1472,13 @@ } case _POP_JUMP_IF_FALSE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case _POP_JUMP_IF_TRUE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index 6184bea9850dc7..f3f3093219b65a 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -325,12 +325,50 @@ def write(self, out: Formatter, tier: Tiers = TIER_ONE) -> None: f"static_assert({cache_size} == " f'{self.cache_offset}, "incorrect cache size");' ) + # NULL out inputs, unless it's the same as in the output, + # Write input stack effect variable declarations and initializations + ieffects = list(reversed(self.input_effects)) + for i, ieffect in enumerate(ieffects): + if ieffect.name in self.unmoved_names: + continue + isize = string_effect_size( + list_effect_size([ieff for ieff in ieffects[: i + 1]]) + ) + if ieffect.size: + src = StackEffect( + f"(stack_pointer - {maybe_parenthesize(isize)})", "PyObject **" + ) + elif ieffect.cond: + src = StackEffect( + f"({ieffect.cond}) ? stack_pointer[-{maybe_parenthesize(isize)}] : NULL", + "", + ) + else: + src = StackEffect(f"stack_pointer[-{maybe_parenthesize(isize)}]", "") + out.assign(src, parsing.StackEffect("NULL")) + # Write net stack growth/shrinkage out.stack_adjust( [ieff for ieff in self.input_effects], [oeff for oeff in self.output_effects], ) + # NULL out outputs, unless it's same as input. + oeffects = list(reversed(self.output_effects)) + for i, oeffect in enumerate(oeffects): + if oeffect.name in self.unmoved_names: + continue + osize = string_effect_size( + list_effect_size([oeff for oeff in oeffects[: i + 1]]) + ) + if oeffect.size: + dst = StackEffect( + f"stack_pointer - {maybe_parenthesize(osize)}", "PyObject **" + ) + else: + dst = StackEffect(f"stack_pointer[-{maybe_parenthesize(osize)}]", "") + out.assign(dst, parsing.StackEffect("NULL")) + def write_body( self, out: Formatter, From 17fccbca34ae7bc433e1ba1fff4b6e9d617c16d6 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 2 Aug 2023 19:54:50 +0800 Subject: [PATCH 04/48] fix separator --- Python/abstract_interp_cases.c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 2adf4deccea4bd..ac8c3a367da49f 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -1,6 +1,6 @@ // This file is generated by Tools/cases_generator/generate_cases.py // from: -// Python\bytecodes.c +// Python/bytecodes.c // Do not edit! case NOP: { From a1da69db9529d988d6dc5ff6688105d9d235b6bd Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 2 Aug 2023 19:59:17 +0800 Subject: [PATCH 05/48] credit Jules --- Tools/cases_generator/instructions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index f3f3093219b65a..197a79f539d5e7 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -326,7 +326,7 @@ def write(self, out: Formatter, tier: Tiers = TIER_ONE) -> None: f'{self.cache_offset}, "incorrect cache size");' ) # NULL out inputs, unless it's the same as in the output, - # Write input stack effect variable declarations and initializations + # Write input stack effect variable declarations and initializations. ieffects = list(reversed(self.input_effects)) for i, ieffect in enumerate(ieffects): if ieffect.name in self.unmoved_names: From b458e171cbe24a71cb721da0606b07ff3e3f8ba1 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 2 Aug 2023 20:00:18 +0800 Subject: [PATCH 06/48] add jules to co-authors Co-Authored-By: Jules <57632293+juliapoo@users.noreply.github.com> --- Python/optimizer_analysis.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index d489fb802fca33..13d2b8c48637b1 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -15,7 +15,8 @@ int uop_analyze_and_optimize( _PyUOpInstruction *trace, - int trace_len) + int trace_len +) { return trace_len; } From f81f8889d2cfa1160e106c3cdd0e592adb2d742f Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 2 Aug 2023 23:46:58 +0800 Subject: [PATCH 07/48] add pycore_optimizer.h to headers in makefile --- Makefile.pre.in | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile.pre.in b/Makefile.pre.in index a334aae9dec4fc..ce6e38bacc81ae 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1787,6 +1787,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_obmalloc_init.h \ $(srcdir)/Include/internal/pycore_opcode.h \ $(srcdir)/Include/internal/pycore_opcode_utils.h \ + $(srcdir)/Include/internal/pycore_optimizer.h \ $(srcdir)/Include/internal/pycore_pathconfig.h \ $(srcdir)/Include/internal/pycore_pyarena.h \ $(srcdir)/Include/internal/pycore_pyerrors.h \ From 0020320d0f5aa58f6aa2a36b8f025396f5b7ba55 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 3 Aug 2023 00:03:59 +0800 Subject: [PATCH 08/48] fix: remove whitespace --- Makefile.pre.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.pre.in b/Makefile.pre.in index ce6e38bacc81ae..94beadabce18cc 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1787,7 +1787,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_obmalloc_init.h \ $(srcdir)/Include/internal/pycore_opcode.h \ $(srcdir)/Include/internal/pycore_opcode_utils.h \ - $(srcdir)/Include/internal/pycore_optimizer.h \ + $(srcdir)/Include/internal/pycore_optimizer.h \ $(srcdir)/Include/internal/pycore_pathconfig.h \ $(srcdir)/Include/internal/pycore_pyarena.h \ $(srcdir)/Include/internal/pycore_pyerrors.h \ From 1f93072c596e48f11ca715ac54091e885bebbbf9 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 3 Aug 2023 01:09:26 +0800 Subject: [PATCH 09/48] fix make smelly --- Include/internal/pycore_optimizer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 06d05cf00babd3..ccbe7e52af289a 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -10,7 +10,7 @@ extern "C" { #include "pycore_uops.h" -int uop_analyze_and_optimize(_PyUOpInstruction *trace, int trace_len); +int _Py_uop_analyze_and_optimize(_PyUOpInstruction *trace, int trace_len); #ifdef __cplusplus } From dac63e348441a99ba2844a22abdc214aad3402f1 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 3 Aug 2023 01:11:34 +0800 Subject: [PATCH 10/48] fix: build --- Python/optimizer_analysis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 13d2b8c48637b1..cb399b0beb9129 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -13,7 +13,7 @@ #include "pycore_optimizer.h" int -uop_analyze_and_optimize( +_Py_uop_analyze_and_optimize( _PyUOpInstruction *trace, int trace_len ) From e62e0153cd765bea9786407b32272dcb0e5dc234 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 3 Aug 2023 01:35:01 +0800 Subject: [PATCH 11/48] fix wrong symbol --- Python/optimizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 79280bb18448ff..3f9a82cf1daa13 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -705,7 +705,7 @@ uop_optimize( return -1; } executor->base.execute = _PyUopExecute; - trace_length = uop_analyze_and_optimize(trace, trace_length); + trace_length = _Py_uop_analyze_and_optimize(trace, trace_length); memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction)); if (trace_length < _Py_UOP_MAX_TRACE_LENGTH) { executor->trace[trace_length].opcode = 0; // Sentinel From a7f654cafe24b9dc65188e8514be5fd1c472ab36 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 3 Aug 2023 02:10:11 +0800 Subject: [PATCH 12/48] ignore static globals check for abstract interpreter --- Tools/c-analyzer/cpython/_parser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index 9bc7285e18b2fb..90334d0e79da80 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -84,6 +84,7 @@ def clean_lines(text): Python/frozen_modules/*.h Python/generated_cases.c.h Python/executor_cases.c.h +Python/abstract_interp_cases.c.h # not actually source Python/bytecodes.c From ec58145acf047f75ce9b34418327a97a158c262e Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 5 Aug 2023 01:23:56 +0800 Subject: [PATCH 13/48] merge Guido's changes --- Python/abstract_interp_cases.c.h | 308 +++----------------------- Tools/cases_generator/instructions.py | 43 +--- Tools/cases_generator/stacking.py | 36 +++ 3 files changed, 65 insertions(+), 322 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index ac8c3a367da49f..8356ad15299f8e 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -35,8 +35,8 @@ case LOAD_FAST_LOAD_FAST: { STACK_GROW(2); - stack_pointer[-1] = NULL; stack_pointer[-2] = NULL; + stack_pointer[-1] = NULL; break; } @@ -47,26 +47,21 @@ } case STORE_FAST: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case STORE_FAST_LOAD_FAST: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case STORE_FAST_STORE_FAST: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case POP_TOP: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } @@ -78,42 +73,33 @@ } case INSTRUMENTED_END_FOR: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case END_SEND: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case INSTRUMENTED_END_SEND: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case UNARY_NEGATIVE: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case UNARY_NOT: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case TO_BOOL: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } @@ -123,37 +109,31 @@ } case TO_BOOL_INT: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case TO_BOOL_LIST: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case TO_BOOL_NONE: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case TO_BOOL_STR: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case TO_BOOL_ALWAYS_TRUE: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case UNARY_INVERT: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } @@ -163,24 +143,18 @@ } case _BINARY_OP_MULTIPLY_INT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case _BINARY_OP_ADD_INT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case _BINARY_OP_SUBTRACT_INT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; @@ -191,24 +165,18 @@ } case _BINARY_OP_MULTIPLY_FLOAT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case _BINARY_OP_ADD_FLOAT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case _BINARY_OP_SUBTRACT_FLOAT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; @@ -219,155 +187,114 @@ } case _BINARY_OP_ADD_UNICODE: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case _BINARY_OP_INPLACE_ADD_UNICODE: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case BINARY_SUBSCR: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case BINARY_SLICE: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; - stack_pointer[-3] = NULL; STACK_SHRINK(2); stack_pointer[-1] = NULL; break; } case STORE_SLICE: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; - stack_pointer[-3] = NULL; - stack_pointer[-4] = NULL; STACK_SHRINK(4); break; } case BINARY_SUBSCR_LIST_INT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case BINARY_SUBSCR_TUPLE_INT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case BINARY_SUBSCR_DICT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case BINARY_SUBSCR_GETITEM: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case LIST_APPEND: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case SET_ADD: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case STORE_SUBSCR: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; - stack_pointer[-3] = NULL; STACK_SHRINK(3); break; } case STORE_SUBSCR_LIST_INT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; - stack_pointer[-3] = NULL; STACK_SHRINK(3); break; } case STORE_SUBSCR_DICT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; - stack_pointer[-3] = NULL; STACK_SHRINK(3); break; } case DELETE_SUBSCR: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case CALL_INTRINSIC_1: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case CALL_INTRINSIC_2: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case RAISE_VARARGS: { - (stack_pointer - oparg) = (PyObject **)NULL; STACK_SHRINK(oparg); break; } case INTERPRETER_EXIT: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case RETURN_VALUE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case INSTRUMENTED_RETURN_VALUE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } @@ -381,7 +308,6 @@ } case GET_AITER: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } @@ -393,61 +319,49 @@ } case GET_AWAITABLE: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case SEND: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case SEND_GEN: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case INSTRUMENTED_YIELD_VALUE: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case YIELD_VALUE: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case POP_EXCEPT: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case RERAISE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case END_ASYNC_FOR: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case CLEANUP_THROW: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; - stack_pointer[-3] = NULL; STACK_SHRINK(1); - stack_pointer[-1] = NULL; stack_pointer[-2] = NULL; + stack_pointer[-1] = NULL; break; } @@ -464,7 +378,6 @@ } case STORE_NAME: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } @@ -474,61 +387,46 @@ } case UNPACK_SEQUENCE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); STACK_GROW(oparg); - stack_pointer - oparg = (PyObject **)NULL; break; } case UNPACK_SEQUENCE_TWO_TUPLE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); STACK_GROW(oparg); - stack_pointer - oparg = (PyObject **)NULL; break; } case UNPACK_SEQUENCE_TUPLE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); STACK_GROW(oparg); - stack_pointer - oparg = (PyObject **)NULL; break; } case UNPACK_SEQUENCE_LIST: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); STACK_GROW(oparg); - stack_pointer - oparg = (PyObject **)NULL; break; } case UNPACK_EX: { - stack_pointer[-1] = NULL; STACK_GROW((oparg & 0xFF) + (oparg >> 8)); - stack_pointer - ((oparg >> 8)) = (PyObject **)NULL; - stack_pointer[-(1 + (oparg >> 8))] = NULL; - stack_pointer - (1 + (oparg >> 8) + (oparg & 0xFF)) = (PyObject **)NULL; + stack_pointer[-1 - (oparg >> 8)] = NULL; break; } case STORE_ATTR: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case DELETE_ATTR: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case STORE_GLOBAL: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } @@ -544,7 +442,6 @@ } case _LOAD_FROM_DICT_OR_GLOBALS: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } @@ -552,8 +449,8 @@ case LOAD_GLOBAL: { STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } @@ -568,16 +465,16 @@ case _LOAD_GLOBAL_MODULE: { STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case _LOAD_GLOBAL_BUILTINS: { STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } @@ -594,7 +491,6 @@ } case LOAD_FROM_DICT_OR_DEREF: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } @@ -606,7 +502,6 @@ } case STORE_DEREF: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } @@ -616,7 +511,6 @@ } case BUILD_STRING: { - (stack_pointer - oparg) = (PyObject **)NULL; STACK_SHRINK(oparg); STACK_GROW(1); stack_pointer[-1] = NULL; @@ -624,7 +518,6 @@ } case BUILD_TUPLE: { - (stack_pointer - oparg) = (PyObject **)NULL; STACK_SHRINK(oparg); STACK_GROW(1); stack_pointer[-1] = NULL; @@ -632,7 +525,6 @@ } case BUILD_LIST: { - (stack_pointer - oparg) = (PyObject **)NULL; STACK_SHRINK(oparg); STACK_GROW(1); stack_pointer[-1] = NULL; @@ -640,19 +532,16 @@ } case LIST_EXTEND: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case SET_UPDATE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case BUILD_SET: { - (stack_pointer - oparg) = (PyObject **)NULL; STACK_SHRINK(oparg); STACK_GROW(1); stack_pointer[-1] = NULL; @@ -660,7 +549,6 @@ } case BUILD_MAP: { - (stack_pointer - oparg*2) = (PyObject **)NULL; STACK_SHRINK(oparg*2); STACK_GROW(1); stack_pointer[-1] = NULL; @@ -672,28 +560,22 @@ } case BUILD_CONST_KEY_MAP: { - stack_pointer[-1] = NULL; - (stack_pointer - (1 + oparg)) = (PyObject **)NULL; STACK_SHRINK(oparg); stack_pointer[-1] = NULL; break; } case DICT_UPDATE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case DICT_MERGE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case MAP_ADD: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } @@ -701,46 +583,38 @@ case INSTRUMENTED_LOAD_SUPER_ATTR: { STACK_SHRINK(2); STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; + stack_pointer[-1] = NULL; break; } case LOAD_SUPER_ATTR: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; - stack_pointer[-3] = NULL; STACK_SHRINK(2); STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_SUPER_ATTR_ATTR: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; - stack_pointer[-3] = NULL; STACK_SHRINK(2); STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_SUPER_ATTR_METHOD: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; - stack_pointer[-3] = NULL; STACK_SHRINK(1); - stack_pointer[-1] = NULL; stack_pointer[-2] = NULL; + stack_pointer[-1] = NULL; break; } case LOAD_ATTR: { - stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } @@ -753,147 +627,117 @@ } case _LOAD_ATTR_INSTANCE_VALUE: { - stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_ATTR_MODULE: { - stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_ATTR_WITH_HINT: { - stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_ATTR_SLOT: { - stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_ATTR_CLASS: { - stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_ATTR_PROPERTY: { - stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN: { - stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case STORE_ATTR_INSTANCE_VALUE: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case STORE_ATTR_WITH_HINT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case STORE_ATTR_SLOT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case COMPARE_OP: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case COMPARE_OP_FLOAT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case COMPARE_OP_INT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case COMPARE_OP_STR: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case IS_OP: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case CONTAINS_OP: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case CHECK_EG_MATCH: { - stack_pointer[-1] = NULL; stack_pointer[-2] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; break; } case CHECK_EXC_MATCH: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case IMPORT_NAME: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; @@ -918,19 +762,16 @@ } case POP_JUMP_IF_FALSE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case POP_JUMP_IF_TRUE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case IS_NONE: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } @@ -946,9 +787,6 @@ } case MATCH_CLASS: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; - stack_pointer[-3] = NULL; STACK_SHRINK(2); stack_pointer[-1] = NULL; break; @@ -973,13 +811,11 @@ } case GET_ITER: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case GET_YIELD_FROM_ITER: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } @@ -1061,18 +897,16 @@ } case BEFORE_ASYNC_WITH: { - stack_pointer[-1] = NULL; STACK_GROW(1); - stack_pointer[-1] = NULL; stack_pointer[-2] = NULL; + stack_pointer[-1] = NULL; break; } case BEFORE_WITH: { - stack_pointer[-1] = NULL; STACK_GROW(1); - stack_pointer[-1] = NULL; stack_pointer[-2] = NULL; + stack_pointer[-1] = NULL; break; } @@ -1083,48 +917,42 @@ } case PUSH_EXC_INFO: { - stack_pointer[-1] = NULL; STACK_GROW(1); - stack_pointer[-1] = NULL; stack_pointer[-2] = NULL; + stack_pointer[-1] = NULL; break; } case LOAD_ATTR_METHOD_WITH_VALUES: { - stack_pointer[-1] = NULL; STACK_GROW(1); - stack_pointer[-1] = NULL; stack_pointer[-2] = NULL; + stack_pointer[-1] = NULL; break; } case LOAD_ATTR_METHOD_NO_DICT: { - stack_pointer[-1] = NULL; STACK_GROW(1); - stack_pointer[-1] = NULL; stack_pointer[-2] = NULL; + stack_pointer[-1] = NULL; break; } case LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { - stack_pointer[-1] = NULL; - stack_pointer[-1] = NULL; + stack_pointer[-1 - (0 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; break; } case LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { - stack_pointer[-1] = NULL; - stack_pointer[-1] = NULL; + stack_pointer[-1 - (0 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; break; } case LOAD_ATTR_METHOD_LAZY_DICT: { - stack_pointer[-1] = NULL; STACK_GROW(1); - stack_pointer[-1] = NULL; stack_pointer[-2] = NULL; + stack_pointer[-1] = NULL; break; } @@ -1137,9 +965,6 @@ } case CALL: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1148,9 +973,6 @@ } case CALL_BOUND_METHOD_EXACT_ARGS: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1158,9 +980,6 @@ } case CALL_PY_EXACT_ARGS: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1168,9 +987,6 @@ } case CALL_PY_WITH_DEFAULTS: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1178,9 +994,6 @@ } case CALL_NO_KW_TYPE_1: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1188,9 +1001,6 @@ } case CALL_NO_KW_STR_1: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1199,9 +1009,6 @@ } case CALL_NO_KW_TUPLE_1: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1210,9 +1017,6 @@ } case CALL_NO_KW_ALLOC_AND_ENTER_INIT: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1220,15 +1024,11 @@ } case EXIT_INIT_CHECK: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case CALL_BUILTIN_CLASS: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1237,9 +1037,6 @@ } case CALL_NO_KW_BUILTIN_O: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1248,9 +1045,6 @@ } case CALL_NO_KW_BUILTIN_FAST: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1259,9 +1053,6 @@ } case CALL_BUILTIN_FAST_WITH_KEYWORDS: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1270,9 +1061,6 @@ } case CALL_NO_KW_LEN: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1280,9 +1068,6 @@ } case CALL_NO_KW_ISINSTANCE: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1290,9 +1075,6 @@ } case CALL_NO_KW_LIST_APPEND: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1300,9 +1082,6 @@ } case CALL_NO_KW_METHOD_DESCRIPTOR_O: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1311,9 +1090,6 @@ } case CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1322,9 +1098,6 @@ } case CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1333,9 +1106,6 @@ } case CALL_NO_KW_METHOD_DESCRIPTOR_FAST: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1348,10 +1118,6 @@ } case CALL_FUNCTION_EX: { - (oparg & 1) ? stack_pointer[-(((oparg & 1) ? 1 : 0))] : NULL = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; - stack_pointer[-(2 + ((oparg & 1) ? 1 : 0))] = NULL; - stack_pointer[-(3 + ((oparg & 1) ? 1 : 0))] = NULL; STACK_SHRINK(((oparg & 1) ? 1 : 0)); STACK_SHRINK(2); stack_pointer[-1] = NULL; @@ -1360,14 +1126,11 @@ } case MAKE_FUNCTION: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case SET_FUNCTION_ATTRIBUTE: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; @@ -1378,9 +1141,6 @@ } case BUILD_SLICE: { - (oparg == 3) ? stack_pointer[-(((oparg == 3) ? 1 : 0))] : NULL = NULL; - stack_pointer[-(1 + ((oparg == 3) ? 1 : 0))] = NULL; - stack_pointer[-(2 + ((oparg == 3) ? 1 : 0))] = NULL; STACK_SHRINK(((oparg == 3) ? 1 : 0)); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1388,20 +1148,16 @@ } case CONVERT_VALUE: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case FORMAT_SIMPLE: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case FORMAT_WITH_SPEC: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; @@ -1414,20 +1170,14 @@ } case BINARY_OP: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case SWAP: { + stack_pointer[-2 - (oparg-2)] = NULL; stack_pointer[-1] = NULL; - (stack_pointer - (1 + (oparg-2))) = (PyObject **)NULL; - stack_pointer[-(2 + (oparg-2))] = NULL; - stack_pointer[-1] = NULL; - stack_pointer - (1 + (oparg-2)) = (PyObject **)NULL; - stack_pointer[-(2 + (oparg-2))] = NULL; break; } @@ -1472,13 +1222,11 @@ } case _POP_JUMP_IF_FALSE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case _POP_JUMP_IF_TRUE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index c10dfd241684f5..a10e8f41ab67db 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -263,49 +263,8 @@ def write(self, out: Formatter, tier: Tiers = TIER_ONE) -> None: f"static_assert({cache_size} == " f'{self.cache_offset}, "incorrect cache size");' ) - # NULL out inputs, unless it's the same as in the output, - # Write input stack effect variable declarations and initializations. - ieffects = list(reversed(self.input_effects)) - for i, ieffect in enumerate(ieffects): - if ieffect.name in self.unmoved_names: - continue - isize = string_effect_size( - list_effect_size([ieff for ieff in ieffects[: i + 1]]) - ) - if ieffect.size: - src = StackEffect( - f"(stack_pointer - {maybe_parenthesize(isize)})", "PyObject **" - ) - elif ieffect.cond: - src = StackEffect( - f"({ieffect.cond}) ? stack_pointer[-{maybe_parenthesize(isize)}] : NULL", - "", - ) - else: - src = StackEffect(f"stack_pointer[-{maybe_parenthesize(isize)}]", "") - out.assign(src, parsing.StackEffect("NULL")) - # Write net stack growth/shrinkage - out.stack_adjust( - [ieff for ieff in self.input_effects], - [oeff for oeff in self.output_effects], - ) - - # NULL out outputs, unless it's same as input. - oeffects = list(reversed(self.output_effects)) - for i, oeffect in enumerate(oeffects): - if oeffect.name in self.unmoved_names: - continue - osize = string_effect_size( - list_effect_size([oeff for oeff in oeffects[: i + 1]]) - ) - if oeffect.size: - dst = StackEffect( - f"stack_pointer - {maybe_parenthesize(osize)}", "PyObject **" - ) - else: - dst = StackEffect(f"stack_pointer[-{maybe_parenthesize(osize)}]", "") - out.assign(dst, parsing.StackEffect("NULL")) + stacking.write_single_instr_for_abstract_interp(self, out) def write_body( self, diff --git a/Tools/cases_generator/stacking.py b/Tools/cases_generator/stacking.py index 23eca3037f896d..8ae08f70904305 100644 --- a/Tools/cases_generator/stacking.py +++ b/Tools/cases_generator/stacking.py @@ -398,3 +398,39 @@ def write_components( ), poke.effect, ) + + +def write_single_instr_for_abstract_interp( + instr: Instruction, out: Formatter +): + try: + _write_components_for_abstract_interp( + [Component(instr, instr.active_caches)], + out, + ) + except AssertionError as err: + raise AssertionError(f"Error writing abstract instruction {instr.name}") from err + + +def _write_components_for_abstract_interp( + parts: list[Component], + out: Formatter, +): + managers = get_managers(parts) + for mgr in managers: + if mgr is managers[-1]: + out.stack_adjust(mgr.final_offset.deep, mgr.final_offset.high) + # Use clone() since adjust_inverse() mutates final_offset + mgr.adjust_inverse(mgr.final_offset.clone()) + # NULL out the output stack effects + for poke in mgr.pokes: + if not poke.effect.size and poke.effect.name not in mgr.instr.unmoved_names: + out.assign( + StackEffect( + poke.as_variable(), + poke.effect.type, + poke.effect.cond, + poke.effect.size, + ), + StackEffect("NULL"), + ) From 429276733ff98850eee575c7faf5bcee0b69ae2c Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 5 Aug 2023 01:31:21 +0800 Subject: [PATCH 14/48] remove unused stuff --- Python/abstract_interp_cases.c.h | 505 +++++++----------------- Tools/cases_generator/generate_cases.py | 14 +- 2 files changed, 150 insertions(+), 369 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 8356ad15299f8e..33e726cc78e17d 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -3,17 +3,13 @@ // Python/bytecodes.c // Do not edit! + case NOP: { break; } - case RESUME: { - break; - } - case INSTRUMENTED_RESUME: { - break; - } + case LOAD_FAST_CHECK: { STACK_GROW(1); @@ -21,24 +17,21 @@ break; } + case LOAD_FAST: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } + case LOAD_FAST_AND_CLEAR: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } - case LOAD_FAST_LOAD_FAST: { - STACK_GROW(2); - stack_pointer[-2] = NULL; - stack_pointer[-1] = NULL; - break; - } + case LOAD_CONST: { STACK_GROW(1); @@ -46,36 +39,28 @@ break; } + case STORE_FAST: { STACK_SHRINK(1); break; } - case STORE_FAST_LOAD_FAST: { - stack_pointer[-1] = NULL; - break; - } - case STORE_FAST_STORE_FAST: { - STACK_SHRINK(2); - break; - } + case POP_TOP: { STACK_SHRINK(1); break; } + case PUSH_NULL: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } - case INSTRUMENTED_END_FOR: { - STACK_SHRINK(2); - break; - } + case END_SEND: { STACK_SHRINK(1); @@ -83,119 +68,131 @@ break; } - case INSTRUMENTED_END_SEND: { - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - break; - } + case UNARY_NEGATIVE: { stack_pointer[-1] = NULL; break; } + case UNARY_NOT: { stack_pointer[-1] = NULL; break; } + case TO_BOOL: { stack_pointer[-1] = NULL; break; } + case TO_BOOL_BOOL: { break; } + case TO_BOOL_INT: { stack_pointer[-1] = NULL; break; } + case TO_BOOL_LIST: { stack_pointer[-1] = NULL; break; } + case TO_BOOL_NONE: { stack_pointer[-1] = NULL; break; } + case TO_BOOL_STR: { stack_pointer[-1] = NULL; break; } + case TO_BOOL_ALWAYS_TRUE: { stack_pointer[-1] = NULL; break; } + case UNARY_INVERT: { stack_pointer[-1] = NULL; break; } + case _GUARD_BOTH_INT: { break; } + case _BINARY_OP_MULTIPLY_INT: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case _BINARY_OP_ADD_INT: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case _BINARY_OP_SUBTRACT_INT: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case _GUARD_BOTH_FLOAT: { break; } + case _BINARY_OP_MULTIPLY_FLOAT: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case _BINARY_OP_ADD_FLOAT: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case _BINARY_OP_SUBTRACT_FLOAT: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case _GUARD_BOTH_UNICODE: { break; } + case _BINARY_OP_ADD_UNICODE: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } - case _BINARY_OP_INPLACE_ADD_UNICODE: { - STACK_SHRINK(2); - break; - } + case BINARY_SUBSCR: { STACK_SHRINK(1); @@ -203,167 +200,128 @@ break; } + case BINARY_SLICE: { STACK_SHRINK(2); stack_pointer[-1] = NULL; break; } + case STORE_SLICE: { STACK_SHRINK(4); break; } + case BINARY_SUBSCR_LIST_INT: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case BINARY_SUBSCR_TUPLE_INT: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case BINARY_SUBSCR_DICT: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } - case BINARY_SUBSCR_GETITEM: { - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - break; - } + case LIST_APPEND: { STACK_SHRINK(1); break; } + case SET_ADD: { STACK_SHRINK(1); break; } + case STORE_SUBSCR: { STACK_SHRINK(3); break; } + case STORE_SUBSCR_LIST_INT: { STACK_SHRINK(3); break; } + case STORE_SUBSCR_DICT: { STACK_SHRINK(3); break; } + case DELETE_SUBSCR: { STACK_SHRINK(2); break; } + case CALL_INTRINSIC_1: { stack_pointer[-1] = NULL; break; } + case CALL_INTRINSIC_2: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } - case RAISE_VARARGS: { - STACK_SHRINK(oparg); - break; - } - case INTERPRETER_EXIT: { - STACK_SHRINK(1); - break; - } - case RETURN_VALUE: { - STACK_SHRINK(1); - break; - } - case INSTRUMENTED_RETURN_VALUE: { - STACK_SHRINK(1); - break; - } - case RETURN_CONST: { - break; - } - case INSTRUMENTED_RETURN_CONST: { - break; - } + case GET_AITER: { stack_pointer[-1] = NULL; break; } + case GET_ANEXT: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } + case GET_AWAITABLE: { stack_pointer[-1] = NULL; break; } - case SEND: { - stack_pointer[-1] = NULL; - break; - } - case SEND_GEN: { - stack_pointer[-1] = NULL; - break; - } - case INSTRUMENTED_YIELD_VALUE: { - stack_pointer[-1] = NULL; - break; - } - case YIELD_VALUE: { - stack_pointer[-1] = NULL; - break; - } + case POP_EXCEPT: { STACK_SHRINK(1); break; } - case RERAISE: { - STACK_SHRINK(1); - break; - } - case END_ASYNC_FOR: { - STACK_SHRINK(2); - break; - } - case CLEANUP_THROW: { - STACK_SHRINK(1); - stack_pointer[-2] = NULL; - stack_pointer[-1] = NULL; - break; - } + case LOAD_ASSERTION_ERROR: { STACK_GROW(1); @@ -371,81 +329,96 @@ break; } + case LOAD_BUILD_CLASS: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } + case STORE_NAME: { STACK_SHRINK(1); break; } + case DELETE_NAME: { break; } + case UNPACK_SEQUENCE: { STACK_SHRINK(1); STACK_GROW(oparg); break; } + case UNPACK_SEQUENCE_TWO_TUPLE: { STACK_SHRINK(1); STACK_GROW(oparg); break; } + case UNPACK_SEQUENCE_TUPLE: { STACK_SHRINK(1); STACK_GROW(oparg); break; } + case UNPACK_SEQUENCE_LIST: { STACK_SHRINK(1); STACK_GROW(oparg); break; } + case UNPACK_EX: { STACK_GROW((oparg & 0xFF) + (oparg >> 8)); stack_pointer[-1 - (oparg >> 8)] = NULL; break; } + case STORE_ATTR: { STACK_SHRINK(2); break; } + case DELETE_ATTR: { STACK_SHRINK(1); break; } + case STORE_GLOBAL: { STACK_SHRINK(1); break; } + case DELETE_GLOBAL: { break; } + case _LOAD_LOCALS: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } + case _LOAD_FROM_DICT_OR_GLOBALS: { stack_pointer[-1] = NULL; break; } + case LOAD_GLOBAL: { STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); @@ -454,14 +427,17 @@ break; } + case _GUARD_GLOBALS_VERSION: { break; } + case _GUARD_BUILTINS_VERSION: { break; } + case _LOAD_GLOBAL_MODULE: { STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); @@ -470,6 +446,7 @@ break; } + case _LOAD_GLOBAL_BUILTINS: { STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); @@ -478,38 +455,42 @@ break; } + case DELETE_FAST: { break; } - case MAKE_CELL: { - break; - } + case DELETE_DEREF: { break; } + case LOAD_FROM_DICT_OR_DEREF: { stack_pointer[-1] = NULL; break; } + case LOAD_DEREF: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } + case STORE_DEREF: { STACK_SHRINK(1); break; } + case COPY_FREE_VARS: { break; } + case BUILD_STRING: { STACK_SHRINK(oparg); STACK_GROW(1); @@ -517,6 +498,7 @@ break; } + case BUILD_TUPLE: { STACK_SHRINK(oparg); STACK_GROW(1); @@ -524,6 +506,7 @@ break; } + case BUILD_LIST: { STACK_SHRINK(oparg); STACK_GROW(1); @@ -531,16 +514,19 @@ break; } + case LIST_EXTEND: { STACK_SHRINK(1); break; } + case SET_UPDATE: { STACK_SHRINK(1); break; } + case BUILD_SET: { STACK_SHRINK(oparg); STACK_GROW(1); @@ -548,6 +534,7 @@ break; } + case BUILD_MAP: { STACK_SHRINK(oparg*2); STACK_GROW(1); @@ -555,46 +542,38 @@ break; } + case SETUP_ANNOTATIONS: { break; } + case BUILD_CONST_KEY_MAP: { STACK_SHRINK(oparg); stack_pointer[-1] = NULL; break; } + case DICT_UPDATE: { STACK_SHRINK(1); break; } + case DICT_MERGE: { STACK_SHRINK(1); break; } + case MAP_ADD: { STACK_SHRINK(2); break; } - case INSTRUMENTED_LOAD_SUPER_ATTR: { - STACK_SHRINK(2); - STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; - break; - } - case LOAD_SUPER_ATTR: { - STACK_SHRINK(2); - STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; - break; - } + case LOAD_SUPER_ATTR_ATTR: { STACK_SHRINK(2); @@ -604,6 +583,7 @@ break; } + case LOAD_SUPER_ATTR_METHOD: { STACK_SHRINK(1); stack_pointer[-2] = NULL; @@ -611,6 +591,7 @@ break; } + case LOAD_ATTR: { STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; @@ -618,14 +599,17 @@ break; } + case _GUARD_TYPE_VERSION: { break; } + case _CHECK_MANAGED_OBJECT_HAS_VALUES: { break; } + case _LOAD_ATTR_INSTANCE_VALUE: { STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; @@ -633,62 +617,15 @@ break; } - case LOAD_ATTR_MODULE: { - STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; - break; - } - case LOAD_ATTR_WITH_HINT: { - STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; - break; - } - case LOAD_ATTR_SLOT: { - STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; - break; - } - case LOAD_ATTR_CLASS: { - STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; - break; - } - case LOAD_ATTR_PROPERTY: { - STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; - break; - } - case LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN: { - STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; - break; - } - case STORE_ATTR_INSTANCE_VALUE: { - STACK_SHRINK(2); - break; - } - case STORE_ATTR_WITH_HINT: { - STACK_SHRINK(2); - break; - } - case STORE_ATTR_SLOT: { - STACK_SHRINK(2); - break; - } + case COMPARE_OP: { STACK_SHRINK(1); @@ -696,89 +633,68 @@ break; } + case COMPARE_OP_FLOAT: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case COMPARE_OP_INT: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case COMPARE_OP_STR: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case IS_OP: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case CONTAINS_OP: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case CHECK_EG_MATCH: { stack_pointer[-2] = NULL; stack_pointer[-1] = NULL; break; } + case CHECK_EXC_MATCH: { stack_pointer[-1] = NULL; break; } - case IMPORT_NAME: { - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - break; - } - case IMPORT_FROM: { - STACK_GROW(1); - stack_pointer[-1] = NULL; - break; - } - case JUMP_FORWARD: { - break; - } - case JUMP_BACKWARD: { - break; - } - case ENTER_EXECUTOR: { - break; - } - case POP_JUMP_IF_FALSE: { - STACK_SHRINK(1); - break; - } - case POP_JUMP_IF_TRUE: { - STACK_SHRINK(1); - break; - } + case IS_NONE: { stack_pointer[-1] = NULL; break; } - case JUMP_BACKWARD_NO_INTERRUPT: { - break; - } + case GET_LEN: { STACK_GROW(1); @@ -786,57 +702,54 @@ break; } + case MATCH_CLASS: { STACK_SHRINK(2); stack_pointer[-1] = NULL; break; } + case MATCH_MAPPING: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } + case MATCH_SEQUENCE: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } + case MATCH_KEYS: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } + case GET_ITER: { stack_pointer[-1] = NULL; break; } + case GET_YIELD_FROM_ITER: { stack_pointer[-1] = NULL; break; } - case FOR_ITER: { - STACK_GROW(1); - stack_pointer[-1] = NULL; - break; - } - case INSTRUMENTED_FOR_ITER: { - break; - } + case _ITER_CHECK_LIST: { break; } - case _ITER_JUMP_LIST: { - break; - } + case _IS_ITER_EXHAUSTED_LIST: { STACK_GROW(1); @@ -844,19 +757,19 @@ break; } + case _ITER_NEXT_LIST: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } + case _ITER_CHECK_TUPLE: { break; } - case _ITER_JUMP_TUPLE: { - break; - } + case _IS_ITER_EXHAUSTED_TUPLE: { STACK_GROW(1); @@ -864,19 +777,19 @@ break; } + case _ITER_NEXT_TUPLE: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } + case _ITER_CHECK_RANGE: { break; } - case _ITER_JUMP_RANGE: { - break; - } + case _IS_ITER_EXHAUSTED_RANGE: { STACK_GROW(1); @@ -884,31 +797,16 @@ break; } + case _ITER_NEXT_RANGE: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } - case FOR_ITER_GEN: { - STACK_GROW(1); - stack_pointer[-1] = NULL; - break; - } - case BEFORE_ASYNC_WITH: { - STACK_GROW(1); - stack_pointer[-2] = NULL; - stack_pointer[-1] = NULL; - break; - } - case BEFORE_WITH: { - STACK_GROW(1); - stack_pointer[-2] = NULL; - stack_pointer[-1] = NULL; - break; - } + case WITH_EXCEPT_START: { STACK_GROW(1); @@ -916,6 +814,7 @@ break; } + case PUSH_EXC_INFO: { STACK_GROW(1); stack_pointer[-2] = NULL; @@ -923,75 +822,17 @@ break; } - case LOAD_ATTR_METHOD_WITH_VALUES: { - STACK_GROW(1); - stack_pointer[-2] = NULL; - stack_pointer[-1] = NULL; - break; - } - case LOAD_ATTR_METHOD_NO_DICT: { - STACK_GROW(1); - stack_pointer[-2] = NULL; - stack_pointer[-1] = NULL; - break; - } - case LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { - stack_pointer[-1 - (0 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; - break; - } - case LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { - stack_pointer[-1 - (0 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; - break; - } - case LOAD_ATTR_METHOD_LAZY_DICT: { - STACK_GROW(1); - stack_pointer[-2] = NULL; - stack_pointer[-1] = NULL; - break; - } - case KW_NAMES: { - break; - } - case INSTRUMENTED_CALL: { - break; - } - case CALL: { - STACK_SHRINK(oparg); - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - CHECK_EVAL_BREAKER(); - break; - } - case CALL_BOUND_METHOD_EXACT_ARGS: { - STACK_SHRINK(oparg); - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - break; - } - case CALL_PY_EXACT_ARGS: { - STACK_SHRINK(oparg); - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - break; - } - case CALL_PY_WITH_DEFAULTS: { - STACK_SHRINK(oparg); - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - break; - } + case CALL_NO_KW_TYPE_1: { STACK_SHRINK(oparg); @@ -1000,65 +841,47 @@ break; } + case CALL_NO_KW_STR_1: { STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; - CHECK_EVAL_BREAKER(); break; } + case CALL_NO_KW_TUPLE_1: { STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; - CHECK_EVAL_BREAKER(); break; } - case CALL_NO_KW_ALLOC_AND_ENTER_INIT: { - STACK_SHRINK(oparg); - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - break; - } + case EXIT_INIT_CHECK: { STACK_SHRINK(1); break; } - case CALL_BUILTIN_CLASS: { - STACK_SHRINK(oparg); - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - CHECK_EVAL_BREAKER(); - break; - } + case CALL_NO_KW_BUILTIN_O: { STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; - CHECK_EVAL_BREAKER(); break; } + case CALL_NO_KW_BUILTIN_FAST: { STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; - CHECK_EVAL_BREAKER(); break; } - case CALL_BUILTIN_FAST_WITH_KEYWORDS: { - STACK_SHRINK(oparg); - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - CHECK_EVAL_BREAKER(); - break; - } + case CALL_NO_KW_LEN: { STACK_SHRINK(oparg); @@ -1067,6 +890,7 @@ break; } + case CALL_NO_KW_ISINSTANCE: { STACK_SHRINK(oparg); STACK_SHRINK(1); @@ -1074,71 +898,48 @@ break; } - case CALL_NO_KW_LIST_APPEND: { - STACK_SHRINK(oparg); - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - break; - } + case CALL_NO_KW_METHOD_DESCRIPTOR_O: { STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; - CHECK_EVAL_BREAKER(); break; } - case CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: { - STACK_SHRINK(oparg); - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - CHECK_EVAL_BREAKER(); - break; - } + case CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS: { STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; - CHECK_EVAL_BREAKER(); break; } + case CALL_NO_KW_METHOD_DESCRIPTOR_FAST: { STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; - CHECK_EVAL_BREAKER(); break; } - case INSTRUMENTED_CALL_FUNCTION_EX: { - break; - } - case CALL_FUNCTION_EX: { - STACK_SHRINK(((oparg & 1) ? 1 : 0)); - STACK_SHRINK(2); - stack_pointer[-1] = NULL; - CHECK_EVAL_BREAKER(); - break; - } + case MAKE_FUNCTION: { stack_pointer[-1] = NULL; break; } + case SET_FUNCTION_ATTRIBUTE: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } - case RETURN_GENERATOR: { - break; - } + case BUILD_SLICE: { STACK_SHRINK(((oparg == 3) ? 1 : 0)); @@ -1147,99 +948,79 @@ break; } + case CONVERT_VALUE: { stack_pointer[-1] = NULL; break; } + case FORMAT_SIMPLE: { stack_pointer[-1] = NULL; break; } + case FORMAT_WITH_SPEC: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case COPY: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } + case BINARY_OP: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case SWAP: { stack_pointer[-2 - (oparg-2)] = NULL; stack_pointer[-1] = NULL; break; } - case INSTRUMENTED_INSTRUCTION: { - break; - } - case INSTRUMENTED_JUMP_FORWARD: { - break; - } - case INSTRUMENTED_JUMP_BACKWARD: { - break; - } - case INSTRUMENTED_POP_JUMP_IF_TRUE: { - break; - } - case INSTRUMENTED_POP_JUMP_IF_FALSE: { - break; - } - case INSTRUMENTED_POP_JUMP_IF_NONE: { - break; - } - case INSTRUMENTED_POP_JUMP_IF_NOT_NONE: { - break; - } - case EXTENDED_ARG: { - break; - } - case CACHE: { - break; - } - case RESERVED: { - break; - } + case _POP_JUMP_IF_FALSE: { STACK_SHRINK(1); break; } + case _POP_JUMP_IF_TRUE: { STACK_SHRINK(1); break; } + case JUMP_TO_TOP: { - CHECK_EVAL_BREAKER(); break; } + case SAVE_IP: { break; } + case EXIT_TRACE: { break; } diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 832beedec05460..10351e7a768a56 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -629,13 +629,13 @@ def write_abstract_interpreter_instructions( case parsing.InstDef(): instr = AbstractInstruction(self.instrs[thing.name].inst) self.out.emit("") - with self.out.block(f"case {thing.name}:"): - instr.write(self.out, tier=TIER_TWO) - if instr.check_eval_breaker: - self.out.emit("CHECK_EVAL_BREAKER();") - self.out.emit("break;") - # elif instr.kind != "op": - # print(f"NOTE: {thing.name} is not a viable uop") + if instr.is_viable_uop(): + self.out.emit("") + with self.out.block(f"case {thing.name}:"): + instr.write(self.out, tier=TIER_TWO) + self.out.emit("break;") + # elif instr.kind != "op": + # print(f"NOTE: {thing.name} is not a viable uop") case parsing.Macro(): pass case parsing.Pseudo(): From fdcca9036ba949185c716a3dbfcf48c9fea8e533 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 5 Aug 2023 02:34:31 +0800 Subject: [PATCH 15/48] Turn on the abstract interpreter --- Include/cpython/optimizer.h | 2 +- Include/internal/pycore_optimizer.h | 4 +- Python/optimizer.c | 14 +- Python/optimizer_analysis.c | 174 +++++++++++++++++++++++- Tools/cases_generator/generate_cases.py | 14 +- Tools/cases_generator/stacking.py | 2 +- 6 files changed, 199 insertions(+), 11 deletions(-) diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index 2260501bfd608e..5ceb57eb6f34cd 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -22,7 +22,7 @@ typedef struct _PyExecutorObject { typedef struct _PyOptimizerObject _PyOptimizerObject; /* Should return > 0 if a new executor is created. O if no executor is produced and < 0 if an error occurred. */ -typedef int (*optimize_func)(_PyOptimizerObject* self, PyCodeObject *code, _Py_CODEUNIT *instr, _PyExecutorObject **); +typedef int (*optimize_func)(_PyOptimizerObject* self, PyCodeObject *code, _Py_CODEUNIT *instr, _PyExecutorObject **, int curr_stackentries); typedef struct _PyOptimizerObject { PyObject_HEAD diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index ccbe7e52af289a..2ae657c4e117ff 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -10,7 +10,9 @@ extern "C" { #include "pycore_uops.h" -int _Py_uop_analyze_and_optimize(_PyUOpInstruction *trace, int trace_len); +int _Py_uop_analyze_and_optimize(PyCodeObject *code, + _PyUOpInstruction *trace, int trace_len, int curr_stackentries); + #ifdef __cplusplus } diff --git a/Python/optimizer.c b/Python/optimizer.c index 3f9a82cf1daa13..4ca0959eb4a45d 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -104,7 +104,8 @@ error_optimize( _PyOptimizerObject* self, PyCodeObject *code, _Py_CODEUNIT *instr, - _PyExecutorObject **exec) + _PyExecutorObject **exec, + int Py_UNUSED(stack_entries)) { PyErr_Format(PyExc_SystemError, "Should never call error_optimize"); return -1; @@ -165,7 +166,7 @@ _PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNI } _PyOptimizerObject *opt = interp->optimizer; _PyExecutorObject *executor = NULL; - int err = opt->optimize(opt, code, dest, &executor); + int err = opt->optimize(opt, code, dest, &executor, (int)(stack_pointer - _PyFrame_Stackbase(frame))); if (err <= 0) { assert(executor == NULL); if (err < 0) { @@ -255,7 +256,9 @@ counter_optimize( _PyOptimizerObject* self, PyCodeObject *code, _Py_CODEUNIT *instr, - _PyExecutorObject **exec_ptr) + _PyExecutorObject **exec_ptr, + int Py_UNUSED(curr_stackentries) +) { _PyCounterExecutorObject *executor = (_PyCounterExecutorObject *)_PyObject_New(&CounterExecutor_Type); if (executor == NULL) { @@ -691,7 +694,8 @@ uop_optimize( _PyOptimizerObject *self, PyCodeObject *code, _Py_CODEUNIT *instr, - _PyExecutorObject **exec_ptr) + _PyExecutorObject **exec_ptr, + int curr_stackentries) { _PyUOpInstruction trace[_Py_UOP_MAX_TRACE_LENGTH]; int trace_length = translate_bytecode_to_trace(code, instr, trace, _Py_UOP_MAX_TRACE_LENGTH); @@ -705,7 +709,7 @@ uop_optimize( return -1; } executor->base.execute = _PyUopExecute; - trace_length = _Py_uop_analyze_and_optimize(trace, trace_length); + trace_length = _Py_uop_analyze_and_optimize(code, trace, trace_length, curr_stackentries); memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction)); if (trace_length < _Py_UOP_MAX_TRACE_LENGTH) { executor->trace[trace_length].opcode = 0; // Sentinel diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index cb399b0beb9129..4bc1295a72a40f 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -12,11 +12,183 @@ #include #include "pycore_optimizer.h" +// TYPENODE is a tagged pointer that uses the last 2 LSB as the tag +#define _Py_PARTITIONNODE_t uintptr_t + +// PARTITIONNODE Tags +typedef enum _Py_TypeNodeTags { + // Node is unused + TYPE_NULL = 0, + // TYPE_ROOT_POSITIVE can point to a root struct or be a NULL + TYPE_ROOT= 1, + // TYPE_REF points to a TYPE_ROOT or a TYPE_REF + TYPE_REF = 2, +} _Py_TypeNodeTags; + +typedef struct _Py_PartitionRootNode { + PyObject_HEAD + // For partial evaluation + uint8_t static_or_dyanmic; + // For types (TODO) +} _Py_PartitionRootNode; + +PyTypeObject _Py_PartitionRootNode_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + .tp_name = "uops abstract interpreter's root node", + .tp_basicsize = sizeof(_Py_PartitionRootNode), + .tp_dealloc = PyObject_Del, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION +}; + +static inline _Py_PARTITIONNODE_t +partitionnode_get_tag(_Py_PARTITIONNODE_t node) +{ + return node & 0b11; +} + +static inline _Py_PARTITIONNODE_t +partitionnode_clear_tag(_Py_PARTITIONNODE_t node) +{ + return node & (~(uintptr_t)(0b11)); +} + +static inline _Py_PARTITIONNODE_t +partitionnode_make_root(uint8_t static_or_dynamic) +{ + _Py_PartitionRootNode *root = PyObject_New(_Py_PartitionRootNode, &_Py_PartitionRootNode_Type); + if (root == NULL) { + return 0; + } + root->static_or_dyanmic = static_or_dynamic; + return (_Py_PARTITIONNODE_t)root; +} + +static inline _Py_PARTITIONNODE_t +partitionnode_make_ref(_Py_PARTITIONNODE_t node) +{ + return partitionnode_clear_tag(node) | TYPE_REF; +} + +static inline _Py_PARTITIONNODE_t +partitionnode_null() +{ + return 0; +} + + +// Tier 2 types meta interpreter +typedef struct _Py_UOpsAbstractInterpContext { + PyObject_HEAD + // points to one element after the abstract stack + _Py_PARTITIONNODE_t *stack_pointer; + int stack_len; + _Py_PARTITIONNODE_t *stack; + int locals_len; + _Py_PARTITIONNODE_t *locals; +} _Py_UOpsAbstractInterpContext; + +static void +abstractinterp_dealloc(PyObject *o) +{ + _Py_UOpsAbstractInterpContext *self = (_Py_UOpsAbstractInterpContext *)o; + PyMem_Free(self->stack); + PyMem_Free(self->locals); + // TODO traverse the nodes and decref all roots too. + Py_TYPE(self)->tp_free((PyObject *)self); +} + +PyTypeObject _Py_UOpsAbstractInterpContext_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + .tp_name = "uops abstract interpreter's context", + .tp_basicsize = sizeof(_Py_UOpsAbstractInterpContext), + .tp_dealloc = abstractinterp_dealloc, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION +}; + +_Py_UOpsAbstractInterpContext * +_Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stacklen) +{ + _Py_UOpsAbstractInterpContext *self = (_Py_UOpsAbstractInterpContext *)PyType_GenericAlloc( + (PyTypeObject *)&_Py_UOpsAbstractInterpContext_Type, 0); + if (self == NULL) { + return NULL; + } + + // Setup + self->stack_len = stack_len; + self->locals_len = locals_len; + + _Py_PARTITIONNODE_t *locals_with_stack = PyMem_New(_Py_PARTITIONNODE_t, locals_len + stack_len); + if (locals_with_stack == NULL) { + Py_DECREF(self); + return NULL; + } + + + for (int i = 0; i < locals_len + stack_len; i++) { + locals_with_stack[i] = partitionnode_null(); + } + + self->locals = locals_with_stack; + self->stack = locals_with_stack + locals_len; + self->stack_pointer = self->stack + curr_stacklen; + + return self; +} + int _Py_uop_analyze_and_optimize( + PyCodeObject *co, _PyUOpInstruction *trace, - int trace_len + int trace_len, + int curr_stacklen ) { +#define STACK_LEVEL() ((int)(stack_pointer - ctx->stack)) +#define STACK_SIZE() (co->co_stacksize) +#define BASIC_STACKADJ(n) (stack_pointer += n) + +#ifdef Py_DEBUG +#define STACK_GROW(n) do { \ + assert(n >= 0); \ + BASIC_STACKADJ(n); \ + assert(STACK_LEVEL() <= STACK_SIZE()); \ + } while (0) +#define STACK_SHRINK(n) do { \ + assert(n >= 0); \ + assert(STACK_LEVEL() >= n); \ + BASIC_STACKADJ(-(n)); \ + } while (0) +#else +#define STACK_GROW(n) BASIC_STACKADJ(n) +#define STACK_SHRINK(n) BASIC_STACKADJ(-(n)) +#endif + _PyUOpInstruction *temp_writebuffer = PyMem_New(_PyUOpInstruction, trace_len); + if (temp_writebuffer == NULL) { + return trace_len; + } + + _Py_UOpsAbstractInterpContext *ctx = _Py_UOpsAbstractInterpContext_New(co->co_stacksize, co->co_nlocals, curr_stacklen); + if (ctx == NULL) { + PyMem_Free(temp_writebuffer); + return trace_len; + } + + int oparg; + int opcode; + _Py_PARTITIONNODE_t *stack_pointer = ctx->stack_pointer; + for (int i = 0; i < trace_len; i++) { + oparg = trace[i].oparg; + opcode = trace[i].opcode; + switch (opcode) { +#include "abstract_interp_cases.c.h" + default: + fprintf(stderr, "Unknown opcode in abstract interpreter\n"); + Py_UNREACHABLE(); + } + ctx->stack_pointer = stack_pointer; + + } + assert(STACK_SIZE() >= 0); return trace_len; } diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 10351e7a768a56..636f8b0600d052 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -61,6 +61,17 @@ INSTR_FMT_PREFIX = "INSTR_FMT_" +# @TODO generate all these after updating the DSL +SPECIALLY_HANDLED_ABSTRACT_INSTR = { + # "LOAD_FAST", + # "LOAD_FAST_CHECK", + # "LOAD_FAST_AND_CLEAR", + # "LOAD_CONST", + # "STORE_FAST", + # "STORE_FAST_MAYBE_NULL", + # "COPY", +} + arg_parser = argparse.ArgumentParser( description="Generate the code for the interpreter switch.", formatter_class=argparse.ArgumentDefaultsHelpFormatter, @@ -628,8 +639,7 @@ def write_abstract_interpreter_instructions( self.write_overridden_instr_place_holder(thing) case parsing.InstDef(): instr = AbstractInstruction(self.instrs[thing.name].inst) - self.out.emit("") - if instr.is_viable_uop(): + if instr.is_viable_uop() and instr.name not in SPECIALLY_HANDLED_ABSTRACT_INSTR: self.out.emit("") with self.out.block(f"case {thing.name}:"): instr.write(self.out, tier=TIER_TWO) diff --git a/Tools/cases_generator/stacking.py b/Tools/cases_generator/stacking.py index 8ae08f70904305..8c8b5d09e4fcda 100644 --- a/Tools/cases_generator/stacking.py +++ b/Tools/cases_generator/stacking.py @@ -432,5 +432,5 @@ def _write_components_for_abstract_interp( poke.effect.cond, poke.effect.size, ), - StackEffect("NULL"), + StackEffect("partitionnode_null()"), ) From 7632ed1ffdfc0049ab48a4db444d6944fc98689f Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 6 Aug 2023 03:46:53 +0800 Subject: [PATCH 16/48] (leaky) data structures for constant propagation --- Python/optimizer_analysis.c | 316 +++++++++++++++++++++++- Tools/cases_generator/generate_cases.py | 14 +- Tools/cases_generator/stacking.py | 19 +- 3 files changed, 322 insertions(+), 27 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 4bc1295a72a40f..19543df8bcf8f4 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -29,18 +29,28 @@ typedef struct _Py_PartitionRootNode { PyObject_HEAD // For partial evaluation uint8_t static_or_dyanmic; + PyObject *const_val; // For types (TODO) } _Py_PartitionRootNode; +static void +partitionnode_dealloc(PyObject *o) +{ + _Py_PartitionRootNode *self = (_Py_PartitionRootNode *)o; + Py_CLEAR(self->const_val); + Py_TYPE(self)->tp_free(o); +} + PyTypeObject _Py_PartitionRootNode_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "uops abstract interpreter's root node", .tp_basicsize = sizeof(_Py_PartitionRootNode), - .tp_dealloc = PyObject_Del, + .tp_dealloc = partitionnode_dealloc, + .tp_free = PyObject_Free, .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION }; -static inline _Py_PARTITIONNODE_t +static inline _Py_TypeNodeTags partitionnode_get_tag(_Py_PARTITIONNODE_t node) { return node & 0b11; @@ -52,14 +62,20 @@ partitionnode_clear_tag(_Py_PARTITIONNODE_t node) return node & (~(uintptr_t)(0b11)); } +// static_or_dynamic +// 0 - static +// 1 - dynamic +// If static, const_value must be set! static inline _Py_PARTITIONNODE_t -partitionnode_make_root(uint8_t static_or_dynamic) +partitionnode_make_root(uint8_t static_or_dynamic, PyObject *const_val) { _Py_PartitionRootNode *root = PyObject_New(_Py_PartitionRootNode, &_Py_PartitionRootNode_Type); if (root == NULL) { return 0; } root->static_or_dyanmic = static_or_dynamic; + root->const_val = Py_NewRef(const_val); + fprintf(stderr, "allocating ROOT\n"); return (_Py_PARTITIONNODE_t)root; } @@ -69,11 +85,8 @@ partitionnode_make_ref(_Py_PARTITIONNODE_t node) return partitionnode_clear_tag(node) | TYPE_REF; } -static inline _Py_PARTITIONNODE_t -partitionnode_null() -{ - return 0; -} + +static _Py_PARTITIONNODE_t PARTITIONNODE_NULLROOT = (_Py_PARTITIONNODE_t)_Py_NULL | TYPE_ROOT; // Tier 2 types meta interpreter @@ -91,9 +104,20 @@ static void abstractinterp_dealloc(PyObject *o) { _Py_UOpsAbstractInterpContext *self = (_Py_UOpsAbstractInterpContext *)o; - PyMem_Free(self->stack); + // Traverse all nodes and decref the root objects (if they are not NULL). + // Note: stack is after locals so this is safe + int total = self->locals_len + self->stack_len; + for (int i = 0; i < total; i++) { + _Py_PARTITIONNODE_t node = self->locals[i]; + if (partitionnode_get_tag(node) == TYPE_ROOT) { + if (node != PARTITIONNODE_NULLROOT) { + fprintf(stderr, "DEALLOCATING ROOT\n"); + } + Py_XDECREF(partitionnode_clear_tag(node)); + } + } PyMem_Free(self->locals); - // TODO traverse the nodes and decref all roots too. + // No need to free stack because it is allocated together with the locals. Py_TYPE(self)->tp_free((PyObject *)self); } @@ -102,6 +126,7 @@ PyTypeObject _Py_UOpsAbstractInterpContext_Type = { .tp_name = "uops abstract interpreter's context", .tp_basicsize = sizeof(_Py_UOpsAbstractInterpContext), .tp_dealloc = abstractinterp_dealloc, + .tp_free = PyObject_Free, .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION }; @@ -126,7 +151,7 @@ _Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stackl for (int i = 0; i < locals_len + stack_len; i++) { - locals_with_stack[i] = partitionnode_null(); + locals_with_stack[i] = PARTITIONNODE_NULLROOT; } self->locals = locals_with_stack; @@ -136,6 +161,226 @@ _Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stackl return self; } +static inline _Py_PARTITIONNODE_t * +partitionnode_get_rootptr(_Py_PARTITIONNODE_t *ref) +{ + _Py_TypeNodeTags tag = partitionnode_get_tag(*ref); + while (tag != TYPE_ROOT) { + ref = (_Py_PARTITIONNODE_t *)(partitionnode_clear_tag(*ref)); + tag = partitionnode_get_tag(*ref); + } + return ref; +} + +/** + * @brief Performs SET operation. dst tree becomes part of src tree + * + * If src_is_new is set, src is interpreted as a TYPE_ROOT + * not part of the type_context. Otherwise, it is interpreted as a pointer + * to a _Py_PARTITIONNODE_t. + * + * If src_is_new: + * Overwrites the root of the dst tree with the src node + * else: + * Makes the root of the dst tree a TYPE_REF to src + * +*/ +static void +partitionnode_set(_Py_PARTITIONNODE_t *src, _Py_PARTITIONNODE_t *dst, bool src_is_new) +{ + { + +#ifdef Py_DEBUG + // If `src_is_new` is set: + // - `src` doesn't belong inside the type context yet. + // - `src` has to be a TYPE_ROOT + // - `src` is to be interpreted as a _Py_TYPENODE_t + if (src_is_new) { + assert(partitionnode_get_tag(*src) == TYPE_ROOT); + } +#endif + + _Py_TypeNodeTags tag = partitionnode_get_tag(*dst); + switch (tag) { + case TYPE_ROOT: { + _Py_PARTITIONNODE_t old_root = partitionnode_clear_tag(*dst); + if (!src_is_new) { + // Make dst a reference to src + *dst = partitionnode_make_ref(*src); + Py_XDECREF(old_root); + break; + } + // Make dst the src + *dst = *src; + Py_XDECREF(old_root); + break; + } + case TYPE_REF: { + _Py_PARTITIONNODE_t *rootptr = partitionnode_get_rootptr(dst); + _Py_PARTITIONNODE_t old_root = partitionnode_clear_tag(*rootptr); + if (!src_is_new) { + // Traverse up to the root of dst, make root a reference to src + *rootptr = partitionnode_make_ref(*src); + // Old root no longer used. + Py_XDECREF(old_root); + break; + } + // Make root of dst the src + *rootptr = *src; + // Old root no longer used. + Py_XDECREF(old_root); + break; + } + default: + Py_UNREACHABLE(); + } + } +} + + +/** + * @brief Performs OVERWRITE operation. dst node gets overwritten by src node + * + * If src_is_new is set, src is interpreted as a TYPE_ROOT + * not part of the ctx. Otherwise, it is interpreted as a pointer + * to a _Py_PARTITIONNODE_t. + * + * If src_is_new: + * Removes dst node from its tree (+fixes all the references to dst) + * Overwrite the dst node with the src node + * else: + * Removes dst node from its tree (+fixes all the references to dst) + * Makes the root of the dst tree a TYPE_REF to src + * +*/ +static void +partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, + _Py_PARTITIONNODE_t *src, _Py_PARTITIONNODE_t *dst, bool src_is_new) +{ +#ifdef Py_DEBUG + if (src_is_new) { + assert(partitionnode_get_tag(*src) == TYPE_ROOT); + } +#endif + _Py_TypeNodeTags tag = partitionnode_get_tag(*dst); + switch (tag) { + case TYPE_ROOT: { + + _Py_PARTITIONNODE_t old_dst = *dst; + if (!src_is_new) { + // Make dst a reference to src + *dst = partitionnode_make_ref(*src); + } + else { + // Make dst the src + *dst = *src; + } + + // No longer need the old root. + Py_XDECREF(partitionnode_clear_tag(old_dst)); + + /* Pick one child of dst and make that the new root of the dst tree */ + + // Children of dst will have this form + _Py_PARTITIONNODE_t child_test = partitionnode_make_ref( + partitionnode_clear_tag(*dst)); + // Will be initialised to the first child we find (ptr to the new root) + _Py_PARTITIONNODE_t *new_root_ptr = NULL; + + // Search locals for children + int nlocals = ctx->locals_len; + for (int i = 0; i < nlocals; i++) { + _Py_PARTITIONNODE_t *node_ptr = &(ctx->locals[i]); + if (*node_ptr == child_test) { + if (new_root_ptr == NULL) { + // First child encountered! initialise root + new_root_ptr = node_ptr; + *node_ptr = *dst; + } + else { + // Not the first child encounted, point it to the new root + *node_ptr = partitionnode_make_ref(*new_root_ptr); + } + } + } + + // Search stack for children + int nstack = ctx->stack_len; + for (int i = 0; i < nstack; i++) { + _Py_PARTITIONNODE_t *node_ptr = &(ctx->stack[i]); + if (*node_ptr == child_test) { + if (new_root_ptr == NULL) { + // First child encountered! initialise root + new_root_ptr = node_ptr; + *node_ptr = *dst; + } + else { + // Not the first child encounted, point it to the new root + *node_ptr = partitionnode_make_ref(*new_root_ptr); + } + } + } + + break; + } + case TYPE_REF: { + + _Py_PARTITIONNODE_t old_dst = *dst; + // Make dst a reference to src + if (!src_is_new) { + // Make dst a reference to src + *dst = partitionnode_make_ref(*src); + } + else { + // Make dst the src + *dst = *src; + } + + /* Make all child of src be a reference to the parent of dst */ + + // Children of dst will have this form + _Py_PARTITIONNODE_t child_test = partitionnode_make_ref( + partitionnode_clear_tag(*dst)); + + // Search locals for children + int nlocals = ctx->locals_len; + for (int i = 0; i < nlocals; i++) { + _Py_PARTITIONNODE_t *node_ptr = &(ctx->locals[i]); + if (*node_ptr == child_test) { + // Is a child of dst. Point it to the parent of dst + *node_ptr = old_dst; + } + } + + // Search stack for children + int nstack = ctx->stack_len; + for (int i = 0; i < nstack; i++) { + _Py_PARTITIONNODE_t *node_ptr = &(ctx->stack[i]); + if (*node_ptr == child_test) { + // Is a child of dst. Point it to the parent of dst + *node_ptr = old_dst; + } + } + break; + } + default: + Py_UNREACHABLE(); + } +} + + +#ifndef Py_DEBUG +#define GETITEM(v, i) PyTuple_GET_ITEM((v), (i)) +#else +static inline PyObject * +GETITEM(PyObject *v, Py_ssize_t i) { + assert(PyTuple_Check(v)); + assert(i >= 0); + assert(i < PyTuple_GET_SIZE(v)); + return PyTuple_GET_ITEM(v, i); +} +#endif + int _Py_uop_analyze_and_optimize( PyCodeObject *co, @@ -163,6 +408,12 @@ _Py_uop_analyze_and_optimize( #define STACK_GROW(n) BASIC_STACKADJ(n) #define STACK_SHRINK(n) BASIC_STACKADJ(-(n)) #endif +#define PEEK(idx) (&(stack_pointer[-(idx)])) +#define GETLOCAL(idx) (&(locals[idx])) + +#define PARTITIONNODE_SET(src, dst, flag) partitionnode_set((src), (dst), (flag)) +#define PARTITIONNODE_OVERWRITE(src, dst, flag) partitionnode_overwrite(ctx, (src), (dst), (flag)) +#define MAKE_STATIC_ROOT(val) partitionnode_make_root(0, (val)) _PyUOpInstruction *temp_writebuffer = PyMem_New(_PyUOpInstruction, trace_len); if (temp_writebuffer == NULL) { return trace_len; @@ -177,11 +428,52 @@ _Py_uop_analyze_and_optimize( int oparg; int opcode; _Py_PARTITIONNODE_t *stack_pointer = ctx->stack_pointer; + _Py_PARTITIONNODE_t *locals = ctx->locals; for (int i = 0; i < trace_len; i++) { oparg = trace[i].oparg; opcode = trace[i].opcode; + /* + "LOAD_FAST", + "LOAD_FAST_CHECK", + "LOAD_FAST_AND_CLEAR", + "LOAD_CONST", + "STORE_FAST", + "STORE_FAST_MAYBE_NULL", + "COPY", + */ switch (opcode) { #include "abstract_interp_cases.c.h" + // @TODO convert these to autogenerated using DSL + case LOAD_FAST: + case LOAD_FAST_CHECK: + STACK_GROW(1); + PARTITIONNODE_OVERWRITE(GETLOCAL(oparg), PEEK(1), false); + break; + case LOAD_FAST_AND_CLEAR: { + STACK_GROW(1); + PARTITIONNODE_OVERWRITE(GETLOCAL(oparg), PEEK(1), false); + PARTITIONNODE_OVERWRITE(&PARTITIONNODE_NULLROOT, GETLOCAL(oparg), false); + break; + } + case LOAD_CONST: { + _Py_PARTITIONNODE_t value = MAKE_STATIC_ROOT(GETITEM(co->co_consts, oparg)); + STACK_GROW(1); + PARTITIONNODE_OVERWRITE(&value, PEEK(1), false); + break; + } + case STORE_FAST: + case STORE_FAST_MAYBE_NULL: { + _Py_PARTITIONNODE_t *value = PEEK(1); + PARTITIONNODE_OVERWRITE(value, GETLOCAL(oparg), false); + STACK_SHRINK(1); + break; + } + case COPY: { + _Py_PARTITIONNODE_t *bottom = PEEK(1 + (oparg - 1)); + STACK_GROW(1); + PARTITIONNODE_SET(bottom, PEEK(1), false); + break; + } default: fprintf(stderr, "Unknown opcode in abstract interpreter\n"); Py_UNREACHABLE(); @@ -190,5 +482,7 @@ _Py_uop_analyze_and_optimize( } assert(STACK_SIZE() >= 0); + Py_DECREF(ctx); + PyMem_Free(temp_writebuffer); return trace_len; } diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 636f8b0600d052..58da498f0d1d26 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -63,13 +63,13 @@ # @TODO generate all these after updating the DSL SPECIALLY_HANDLED_ABSTRACT_INSTR = { - # "LOAD_FAST", - # "LOAD_FAST_CHECK", - # "LOAD_FAST_AND_CLEAR", - # "LOAD_CONST", - # "STORE_FAST", - # "STORE_FAST_MAYBE_NULL", - # "COPY", + "LOAD_FAST", + "LOAD_FAST_CHECK", + "LOAD_FAST_AND_CLEAR", + "LOAD_CONST", + "STORE_FAST", + "STORE_FAST_MAYBE_NULL", + "COPY", } arg_parser = argparse.ArgumentParser( diff --git a/Tools/cases_generator/stacking.py b/Tools/cases_generator/stacking.py index 8c8b5d09e4fcda..eb54dd65a394d9 100644 --- a/Tools/cases_generator/stacking.py +++ b/Tools/cases_generator/stacking.py @@ -425,12 +425,13 @@ def _write_components_for_abstract_interp( # NULL out the output stack effects for poke in mgr.pokes: if not poke.effect.size and poke.effect.name not in mgr.instr.unmoved_names: - out.assign( - StackEffect( - poke.as_variable(), - poke.effect.type, - poke.effect.cond, - poke.effect.size, - ), - StackEffect("partitionnode_null()"), - ) + out.emit(f"PARTITIONNODE_OVERWRITE(&PARTITIONNODE_NULLROOT, PEEK(-({poke.offset.as_index()})), true);") + # out.assign( + # StackEffect( + # poke.as_variable(), + # poke.effect.type, + # poke.effect.cond, + # poke.effect.size, + # ), + # StackEffect("partitionnode_nullroot()"), + # ) From 0d0c4c45e5b4aa19da4bb06a59f7fe4d780c6e8a Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 6 Aug 2023 16:59:27 +0800 Subject: [PATCH 17/48] (with cycles) try to fix the type prop Co-Authored-By: Jules <57632293+juliapoo@users.noreply.github.com> --- Python/optimizer_analysis.c | 182 +++++++++++++++++++++++------- Tools/cases_generator/stacking.py | 2 +- 2 files changed, 144 insertions(+), 40 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 19543df8bcf8f4..378c321767dbdd 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -12,6 +12,8 @@ #include #include "pycore_optimizer.h" +#define PARTITION_DEBUG 1 + // TYPENODE is a tagged pointer that uses the last 2 LSB as the tag #define _Py_PARTITIONNODE_t uintptr_t @@ -66,7 +68,7 @@ partitionnode_clear_tag(_Py_PARTITIONNODE_t node) // 0 - static // 1 - dynamic // If static, const_value must be set! -static inline _Py_PARTITIONNODE_t +static _Py_PARTITIONNODE_t partitionnode_make_root(uint8_t static_or_dynamic, PyObject *const_val) { _Py_PartitionRootNode *root = PyObject_New(_Py_PartitionRootNode, &_Py_PartitionRootNode_Type); @@ -75,20 +77,18 @@ partitionnode_make_root(uint8_t static_or_dynamic, PyObject *const_val) } root->static_or_dyanmic = static_or_dynamic; root->const_val = Py_NewRef(const_val); - fprintf(stderr, "allocating ROOT\n"); return (_Py_PARTITIONNODE_t)root; } static inline _Py_PARTITIONNODE_t -partitionnode_make_ref(_Py_PARTITIONNODE_t node) +partitionnode_make_ref(_Py_PARTITIONNODE_t *node) { - return partitionnode_clear_tag(node) | TYPE_REF; + return partitionnode_clear_tag((_Py_PARTITIONNODE_t)node) | TYPE_REF; } static _Py_PARTITIONNODE_t PARTITIONNODE_NULLROOT = (_Py_PARTITIONNODE_t)_Py_NULL | TYPE_ROOT; - // Tier 2 types meta interpreter typedef struct _Py_UOpsAbstractInterpContext { PyObject_HEAD @@ -110,9 +110,6 @@ abstractinterp_dealloc(PyObject *o) for (int i = 0; i < total; i++) { _Py_PARTITIONNODE_t node = self->locals[i]; if (partitionnode_get_tag(node) == TYPE_ROOT) { - if (node != PARTITIONNODE_NULLROOT) { - fprintf(stderr, "DEALLOCATING ROOT\n"); - } Py_XDECREF(partitionnode_clear_tag(node)); } } @@ -161,7 +158,11 @@ _Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stackl return self; } -static inline _Py_PARTITIONNODE_t * +#if PARTITION_DEBUG +static void print_ctx(_Py_UOpsAbstractInterpContext *ctx); +#endif + +static _Py_PARTITIONNODE_t * partitionnode_get_rootptr(_Py_PARTITIONNODE_t *ref) { _Py_TypeNodeTags tag = partitionnode_get_tag(*ref); @@ -204,31 +205,27 @@ partitionnode_set(_Py_PARTITIONNODE_t *src, _Py_PARTITIONNODE_t *dst, bool src_i switch (tag) { case TYPE_ROOT: { _Py_PARTITIONNODE_t old_root = partitionnode_clear_tag(*dst); + Py_XDECREF(old_root); if (!src_is_new) { // Make dst a reference to src - *dst = partitionnode_make_ref(*src); - Py_XDECREF(old_root); + *dst = partitionnode_make_ref(src); break; } // Make dst the src *dst = *src; - Py_XDECREF(old_root); break; } case TYPE_REF: { _Py_PARTITIONNODE_t *rootptr = partitionnode_get_rootptr(dst); _Py_PARTITIONNODE_t old_root = partitionnode_clear_tag(*rootptr); + Py_XDECREF(old_root); if (!src_is_new) { // Traverse up to the root of dst, make root a reference to src - *rootptr = partitionnode_make_ref(*src); - // Old root no longer used. - Py_XDECREF(old_root); + *rootptr = partitionnode_make_ref(src); break; } // Make root of dst the src *rootptr = *src; - // Old root no longer used. - Py_XDECREF(old_root); break; } default: @@ -259,7 +256,7 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, { #ifdef Py_DEBUG if (src_is_new) { - assert(partitionnode_get_tag(*src) == TYPE_ROOT); + assert(partitionnode_get_tag((_Py_PARTITIONNODE_t)src) == TYPE_ROOT); } #endif _Py_TypeNodeTags tag = partitionnode_get_tag(*dst); @@ -269,37 +266,40 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, _Py_PARTITIONNODE_t old_dst = *dst; if (!src_is_new) { // Make dst a reference to src - *dst = partitionnode_make_ref(*src); + *dst = partitionnode_make_ref(src); + assert(partitionnode_get_tag(*dst) == TYPE_REF); + assert(partitionnode_clear_tag(*dst) != (_Py_PARTITIONNODE_t)_Py_NULL); + fprintf(stderr, "START\n"); + print_ctx(ctx); } else { // Make dst the src - *dst = *src; + *dst = (_Py_PARTITIONNODE_t)src; } - // No longer need the old root. - Py_XDECREF(partitionnode_clear_tag(old_dst)); /* Pick one child of dst and make that the new root of the dst tree */ // Children of dst will have this form _Py_PARTITIONNODE_t child_test = partitionnode_make_ref( - partitionnode_clear_tag(*dst)); - // Will be initialised to the first child we find (ptr to the new root) - _Py_PARTITIONNODE_t *new_root_ptr = NULL; + (_Py_PARTITIONNODE_t *)partitionnode_clear_tag((_Py_PARTITIONNODE_t)dst)); + // Will be initialised to the first child we find + _Py_PARTITIONNODE_t *new_root = (_Py_PARTITIONNODE_t *)NULL; // Search locals for children int nlocals = ctx->locals_len; for (int i = 0; i < nlocals; i++) { _Py_PARTITIONNODE_t *node_ptr = &(ctx->locals[i]); if (*node_ptr == child_test) { - if (new_root_ptr == NULL) { + if (new_root == (_Py_PARTITIONNODE_t)NULL) { // First child encountered! initialise root - new_root_ptr = node_ptr; - *node_ptr = *dst; + new_root = node_ptr; + *node_ptr = old_dst; + Py_XINCREF(partitionnode_clear_tag(old_dst)); } else { // Not the first child encounted, point it to the new root - *node_ptr = partitionnode_make_ref(*new_root_ptr); + *node_ptr = partitionnode_make_ref(new_root); } } } @@ -309,18 +309,23 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, for (int i = 0; i < nstack; i++) { _Py_PARTITIONNODE_t *node_ptr = &(ctx->stack[i]); if (*node_ptr == child_test) { - if (new_root_ptr == NULL) { + if (new_root == (_Py_PARTITIONNODE_t)NULL) { // First child encountered! initialise root - new_root_ptr = node_ptr; - *node_ptr = *dst; + new_root = node_ptr; + *node_ptr = old_dst; + Py_XINCREF(partitionnode_clear_tag(old_dst)); } else { // Not the first child encounted, point it to the new root - *node_ptr = partitionnode_make_ref(*new_root_ptr); + *node_ptr = partitionnode_make_ref(new_root); } } } + // This ndoe is no longer referencing the old root. + Py_XDECREF(partitionnode_clear_tag(old_dst)); + fprintf(stderr, "END\n"); + print_ctx(ctx); break; } case TYPE_REF: { @@ -329,18 +334,20 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, // Make dst a reference to src if (!src_is_new) { // Make dst a reference to src - *dst = partitionnode_make_ref(*src); + *dst = partitionnode_make_ref(src); + assert(partitionnode_get_tag(*dst) == TYPE_REF); + assert(partitionnode_clear_tag(*dst) != (_Py_PARTITIONNODE_t)_Py_NULL); } else { // Make dst the src - *dst = *src; + *dst = (_Py_PARTITIONNODE_t)src; } /* Make all child of src be a reference to the parent of dst */ // Children of dst will have this form _Py_PARTITIONNODE_t child_test = partitionnode_make_ref( - partitionnode_clear_tag(*dst)); + (_Py_PARTITIONNODE_t *)partitionnode_clear_tag(*dst)); // Search locals for children int nlocals = ctx->locals_len; @@ -368,6 +375,100 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, } } +#if PARTITION_DEBUG + +/** + * @brief Print the entries in the abstract interpreter context (along with locals). +*/ +static void +print_ctx(_Py_UOpsAbstractInterpContext *ctx) +{ + _Py_PARTITIONNODE_t *locals = ctx->locals; + _Py_PARTITIONNODE_t *stackptr = ctx->stack_pointer; + + int nstack_use = (int)(stackptr - ctx->stack); + int nstack = ctx->stack_len; + int nlocals = ctx->locals_len; + + bool is_local = false; + bool is_stack = false; + + int locals_offset = -1; + int stack_offset = -1; + int parent_idx = -1; + + fprintf(stderr, " Stack: %p: [", ctx->stack); + for (int i = 0; i < nstack; i++) { + _Py_PARTITIONNODE_t *node = &ctx->stack[i]; + _Py_PARTITIONNODE_t tag = partitionnode_get_tag(*node); + + _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); + + fprintf(stderr, "%s", i == nstack_use ? "." : " "); + + if (tag == TYPE_REF) { + _Py_PARTITIONNODE_t *parent = (_Py_PARTITIONNODE_t *)(partitionnode_clear_tag(*node)); + is_local = parent >= ctx->locals && parent < ctx->stack; + is_stack = parent >= ctx->stack && parent < (ctx->stack + nstack); + parent_idx = is_local + ? (int)(parent - ctx->locals) + : is_stack + ? (int)(parent - ctx->locals) + : -1; + } + + + _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); + fprintf(stderr, "%s", + ptr == NULL ? "?" : (ptr->static_or_dyanmic ? "dynamic" : "static")); + + if (tag == TYPE_REF) { + const char *wher = is_local + ? "locals" + : is_stack + ? "stack" + : "const"; + fprintf(stderr, "->%s[%d]", + wher, parent_idx); + } + } + fprintf(stderr, "]\n"); + + fprintf(stderr, " Locals %p: [", locals); + for (int i = 0; i < nlocals; i++) { + _Py_PARTITIONNODE_t *node = &ctx->locals[i]; + _Py_PARTITIONNODE_t tag = partitionnode_get_tag(*node); + + _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); + + if (tag == TYPE_REF) { + _Py_PARTITIONNODE_t *parent = (_Py_PARTITIONNODE_t *)(partitionnode_clear_tag(*node)); + is_local = parent >= ctx->locals && parent < ctx->stack; + is_stack = parent >= ctx->stack && parent < (ctx->stack + nstack); + parent_idx = is_local + ? (int)(parent - ctx->locals) + : is_stack + ? (int)(parent - ctx->locals) + : -1; + } + + _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); + fprintf(stderr, "%s", + ptr == NULL ? "?" : (ptr->static_or_dyanmic ? "dynamic" : "static")); + + if (tag == TYPE_REF) { + const char *wher = is_local + ? "locals" + : is_stack + ? "stack" + : "const"; + fprintf(stderr, "->%s[%d]", + wher, parent_idx); + } + } + fprintf(stderr, "]\n"); +} +#endif #ifndef Py_DEBUG #define GETITEM(v, i) PyTuple_GET_ITEM((v), (i)) @@ -398,6 +499,7 @@ _Py_uop_analyze_and_optimize( assert(n >= 0); \ BASIC_STACKADJ(n); \ assert(STACK_LEVEL() <= STACK_SIZE()); \ + ctx->stack_pointer = stack_pointer; \ } while (0) #define STACK_SHRINK(n) do { \ assert(n >= 0); \ @@ -458,7 +560,10 @@ _Py_uop_analyze_and_optimize( case LOAD_CONST: { _Py_PARTITIONNODE_t value = MAKE_STATIC_ROOT(GETITEM(co->co_consts, oparg)); STACK_GROW(1); - PARTITIONNODE_OVERWRITE(&value, PEEK(1), false); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)value, PEEK(1), false); +#if PARTITION_DEBUG + print_ctx(ctx); +#endif break; } case STORE_FAST: @@ -478,8 +583,7 @@ _Py_uop_analyze_and_optimize( fprintf(stderr, "Unknown opcode in abstract interpreter\n"); Py_UNREACHABLE(); } - ctx->stack_pointer = stack_pointer; - + //print_ctx(ctx); } assert(STACK_SIZE() >= 0); Py_DECREF(ctx); diff --git a/Tools/cases_generator/stacking.py b/Tools/cases_generator/stacking.py index eb54dd65a394d9..d2b178ea908757 100644 --- a/Tools/cases_generator/stacking.py +++ b/Tools/cases_generator/stacking.py @@ -425,7 +425,7 @@ def _write_components_for_abstract_interp( # NULL out the output stack effects for poke in mgr.pokes: if not poke.effect.size and poke.effect.name not in mgr.instr.unmoved_names: - out.emit(f"PARTITIONNODE_OVERWRITE(&PARTITIONNODE_NULLROOT, PEEK(-({poke.offset.as_index()})), true);") + out.emit(f"PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-({poke.offset.as_index()})), true);") # out.assign( # StackEffect( # poke.as_variable(), From 4c8953e437646efbaf59b5590e9489b83e3fb466 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 6 Aug 2023 17:19:35 +0800 Subject: [PATCH 18/48] fix: cycles Co-Authored-By: Jules <57632293+juliapoo@users.noreply.github.com> --- Python/optimizer_analysis.c | 40 +++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 378c321767dbdd..01723afe7f55af 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -173,6 +173,35 @@ partitionnode_get_rootptr(_Py_PARTITIONNODE_t *ref) return ref; } +/** + * @brief Checks if two nodes are in the same partition. +*/ +static bool +partitionnode_is_same_partition(_Py_PARTITIONNODE_t *x, _Py_PARTITIONNODE_t *y) +{ + _Py_PARTITIONNODE_t *x_rootref = x; + _Py_PARTITIONNODE_t *y_rootref = y; + uintptr_t x_tag = partitionnode_get_tag(*x); + uintptr_t y_tag = partitionnode_get_tag(*y); + switch (y_tag) { + case TYPE_REF: + y_rootref = partitionnode_get_rootptr(y); + case TYPE_ROOT: + break; + default: + Py_UNREACHABLE(); + } + switch (x_tag) { + case TYPE_REF: + x_rootref = partitionnode_get_rootptr(x); + case TYPE_ROOT: + break; + default: + Py_UNREACHABLE(); + } + return x_rootref == y_rootref; +} + /** * @brief Performs SET operation. dst tree becomes part of src tree * @@ -201,6 +230,11 @@ partitionnode_set(_Py_PARTITIONNODE_t *src, _Py_PARTITIONNODE_t *dst, bool src_i } #endif + // This prevents cycles from forming + if (!src_is_new && partitionnode_is_same_partition(src, dst)) { + return; + } + _Py_TypeNodeTags tag = partitionnode_get_tag(*dst); switch (tag) { case TYPE_ROOT: { @@ -259,6 +293,12 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, assert(partitionnode_get_tag((_Py_PARTITIONNODE_t)src) == TYPE_ROOT); } #endif + + // This prevents cycles from forming + if (!src_is_new && partitionnode_is_same_partition(src, dst)) { + return; + } + _Py_TypeNodeTags tag = partitionnode_get_tag(*dst); switch (tag) { case TYPE_ROOT: { From 3bd36fa49ea5f3493e589a56af850a3c1aa4115e Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 6 Aug 2023 17:21:46 +0800 Subject: [PATCH 19/48] cleanup Co-Authored-By: Jules <57632293+juliapoo@users.noreply.github.com> --- Python/optimizer_analysis.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 01723afe7f55af..461f4f010d9b48 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -309,8 +309,6 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, *dst = partitionnode_make_ref(src); assert(partitionnode_get_tag(*dst) == TYPE_REF); assert(partitionnode_clear_tag(*dst) != (_Py_PARTITIONNODE_t)_Py_NULL); - fprintf(stderr, "START\n"); - print_ctx(ctx); } else { // Make dst the src @@ -364,8 +362,6 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, // This ndoe is no longer referencing the old root. Py_XDECREF(partitionnode_clear_tag(old_dst)); - fprintf(stderr, "END\n"); - print_ctx(ctx); break; } case TYPE_REF: { @@ -575,13 +571,14 @@ _Py_uop_analyze_and_optimize( oparg = trace[i].oparg; opcode = trace[i].opcode; /* + * The following are special cased: "LOAD_FAST", - "LOAD_FAST_CHECK", - "LOAD_FAST_AND_CLEAR", - "LOAD_CONST", - "STORE_FAST", - "STORE_FAST_MAYBE_NULL", - "COPY", + "LOAD_FAST_CHECK", + "LOAD_FAST_AND_CLEAR", + "LOAD_CONST", + "STORE_FAST", + "STORE_FAST_MAYBE_NULL", + "COPY", */ switch (opcode) { #include "abstract_interp_cases.c.h" @@ -623,7 +620,9 @@ _Py_uop_analyze_and_optimize( fprintf(stderr, "Unknown opcode in abstract interpreter\n"); Py_UNREACHABLE(); } - //print_ctx(ctx); +#if PARTITION_DEBUG + print_ctx(ctx); +#endif } assert(STACK_SIZE() >= 0); Py_DECREF(ctx); From 229097fa7b427799aec8623eb3a3f5658bf090ef Mon Sep 17 00:00:00 2001 From: Jules <57632293+JuliaPoo@users.noreply.github.com> Date: Mon, 7 Aug 2023 23:34:21 +0800 Subject: [PATCH 20/48] Fix+Refactor: Handling of root nodes in special-cased type prop (#40) * Fix+Refactor: Handling of root nodes in special-cased type prop * Style: Removed trailing space --- Python/optimizer_analysis.c | 62 ++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 36 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 461f4f010d9b48..1f3425f15fe009 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -77,7 +77,7 @@ partitionnode_make_root(uint8_t static_or_dynamic, PyObject *const_val) } root->static_or_dyanmic = static_or_dynamic; root->const_val = Py_NewRef(const_val); - return (_Py_PARTITIONNODE_t)root; + return (_Py_PARTITIONNODE_t)root | TYPE_ROOT; } static inline _Py_PARTITIONNODE_t @@ -179,27 +179,7 @@ partitionnode_get_rootptr(_Py_PARTITIONNODE_t *ref) static bool partitionnode_is_same_partition(_Py_PARTITIONNODE_t *x, _Py_PARTITIONNODE_t *y) { - _Py_PARTITIONNODE_t *x_rootref = x; - _Py_PARTITIONNODE_t *y_rootref = y; - uintptr_t x_tag = partitionnode_get_tag(*x); - uintptr_t y_tag = partitionnode_get_tag(*y); - switch (y_tag) { - case TYPE_REF: - y_rootref = partitionnode_get_rootptr(y); - case TYPE_ROOT: - break; - default: - Py_UNREACHABLE(); - } - switch (x_tag) { - case TYPE_REF: - x_rootref = partitionnode_get_rootptr(x); - case TYPE_ROOT: - break; - default: - Py_UNREACHABLE(); - } - return x_rootref == y_rootref; + return partitionnode_get_rootptr(x) == partitionnode_get_rootptr(y); } /** @@ -307,7 +287,6 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, if (!src_is_new) { // Make dst a reference to src *dst = partitionnode_make_ref(src); - assert(partitionnode_get_tag(*dst) == TYPE_REF); assert(partitionnode_clear_tag(*dst) != (_Py_PARTITIONNODE_t)_Py_NULL); } else { @@ -444,19 +423,21 @@ print_ctx(_Py_UOpsAbstractInterpContext *ctx) if (tag == TYPE_REF) { _Py_PARTITIONNODE_t *parent = (_Py_PARTITIONNODE_t *)(partitionnode_clear_tag(*node)); - is_local = parent >= ctx->locals && parent < ctx->stack; - is_stack = parent >= ctx->stack && parent < (ctx->stack + nstack); + int local_index = (int)(parent - ctx->locals); + int stack_index = (int)(parent - ctx->stack); + is_local = local_index >= 0 && local_index < ctx->locals_len; + is_stack = stack_index >= 0 && stack_index < nstack; parent_idx = is_local - ? (int)(parent - ctx->locals) + ? local_index : is_stack - ? (int)(parent - ctx->locals) + ? stack_index : -1; } _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); fprintf(stderr, "%s", - ptr == NULL ? "?" : (ptr->static_or_dyanmic ? "dynamic" : "static")); + ptr == NULL ? "?" : (ptr->static_or_dyanmic == 0 ? "static" : "dynamic")); if (tag == TYPE_REF) { const char *wher = is_local @@ -479,18 +460,20 @@ print_ctx(_Py_UOpsAbstractInterpContext *ctx) if (tag == TYPE_REF) { _Py_PARTITIONNODE_t *parent = (_Py_PARTITIONNODE_t *)(partitionnode_clear_tag(*node)); - is_local = parent >= ctx->locals && parent < ctx->stack; - is_stack = parent >= ctx->stack && parent < (ctx->stack + nstack); + int local_index = (int)(parent - ctx->locals); + int stack_index = (int)(parent - ctx->stack); + is_local = local_index >= 0 && local_index < ctx->locals_len; + is_stack = stack_index >= 0 && stack_index < nstack; parent_idx = is_local - ? (int)(parent - ctx->locals) + ? local_index : is_stack - ? (int)(parent - ctx->locals) + ? stack_index : -1; } _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); fprintf(stderr, "%s", - ptr == NULL ? "?" : (ptr->static_or_dyanmic ? "dynamic" : "static")); + ptr == NULL ? "?" : (ptr->static_or_dyanmic == 0 ? "static" : "dynamic")); if (tag == TYPE_REF) { const char *wher = is_local @@ -570,6 +553,13 @@ _Py_uop_analyze_and_optimize( for (int i = 0; i < trace_len; i++) { oparg = trace[i].oparg; opcode = trace[i].opcode; +#ifdef PARTITION_DEBUG +#ifdef Py_DEBUG + fprintf(stderr, " [-] Type propagating across: %s{%d} : %d\n", + (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode], + opcode, oparg); +#endif +#endif /* * The following are special cased: "LOAD_FAST", @@ -591,13 +581,13 @@ _Py_uop_analyze_and_optimize( case LOAD_FAST_AND_CLEAR: { STACK_GROW(1); PARTITIONNODE_OVERWRITE(GETLOCAL(oparg), PEEK(1), false); - PARTITIONNODE_OVERWRITE(&PARTITIONNODE_NULLROOT, GETLOCAL(oparg), false); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, GETLOCAL(oparg), true); break; } case LOAD_CONST: { - _Py_PARTITIONNODE_t value = MAKE_STATIC_ROOT(GETITEM(co->co_consts, oparg)); + _Py_PARTITIONNODE_t* value = (_Py_PARTITIONNODE_t *)MAKE_STATIC_ROOT(GETITEM(co->co_consts, oparg)); STACK_GROW(1); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)value, PEEK(1), false); + PARTITIONNODE_OVERWRITE(value, PEEK(1), true); #if PARTITION_DEBUG print_ctx(ctx); #endif From ca0fab79b2cb84b09fc7f4e9bf9aaca6516c6423 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 8 Aug 2023 17:45:57 +0800 Subject: [PATCH 21/48] partially partially evaluate --- Python/abstract_interp_cases.c.h | 473 +++++------------------- Python/bytecodes.c | 9 + Python/executor_cases.c.h | 10 + Python/optimizer_analysis.c | 327 ++++++++++++++-- Tools/cases_generator/generate_cases.py | 8 +- 5 files changed, 433 insertions(+), 394 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 33e726cc78e17d..6e8448d3dc4412 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -3,1024 +3,753 @@ // Python/bytecodes.c // Do not edit! - case NOP: { break; } - - - - case LOAD_FAST_CHECK: { - STACK_GROW(1); - stack_pointer[-1] = NULL; - break; - } - - - case LOAD_FAST: { - STACK_GROW(1); - stack_pointer[-1] = NULL; - break; - } - - - case LOAD_FAST_AND_CLEAR: { - STACK_GROW(1); - stack_pointer[-1] = NULL; - break; - } - - - - case LOAD_CONST: { - STACK_GROW(1); - stack_pointer[-1] = NULL; - break; - } - - - case STORE_FAST: { - STACK_SHRINK(1); - break; - } - - - - case POP_TOP: { STACK_SHRINK(1); break; } - case PUSH_NULL: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - case END_SEND: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - case UNARY_NEGATIVE: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case UNARY_NOT: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case TO_BOOL: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case TO_BOOL_BOOL: { break; } - case TO_BOOL_INT: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case TO_BOOL_LIST: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case TO_BOOL_NONE: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case TO_BOOL_STR: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case TO_BOOL_ALWAYS_TRUE: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case UNARY_INVERT: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _GUARD_BOTH_INT: { break; } - - case _BINARY_OP_MULTIPLY_INT: { - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - break; - } - - - case _BINARY_OP_ADD_INT: { - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - break; - } - - - case _BINARY_OP_SUBTRACT_INT: { - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - break; - } - - case _GUARD_BOTH_FLOAT: { break; } - case _BINARY_OP_MULTIPLY_FLOAT: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _BINARY_OP_ADD_FLOAT: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _BINARY_OP_SUBTRACT_FLOAT: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _GUARD_BOTH_UNICODE: { break; } - case _BINARY_OP_ADD_UNICODE: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - case BINARY_SUBSCR: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case BINARY_SLICE: { STACK_SHRINK(2); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case STORE_SLICE: { STACK_SHRINK(4); break; } - case BINARY_SUBSCR_LIST_INT: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case BINARY_SUBSCR_TUPLE_INT: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case BINARY_SUBSCR_DICT: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - case LIST_APPEND: { STACK_SHRINK(1); break; } - case SET_ADD: { STACK_SHRINK(1); break; } - case STORE_SUBSCR: { STACK_SHRINK(3); break; } - case STORE_SUBSCR_LIST_INT: { STACK_SHRINK(3); break; } - case STORE_SUBSCR_DICT: { STACK_SHRINK(3); break; } - case DELETE_SUBSCR: { STACK_SHRINK(2); break; } - case CALL_INTRINSIC_1: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case CALL_INTRINSIC_2: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - - - - - - case GET_AITER: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case GET_ANEXT: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case GET_AWAITABLE: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - - - - case POP_EXCEPT: { STACK_SHRINK(1); break; } - - - - case LOAD_ASSERTION_ERROR: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case LOAD_BUILD_CLASS: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case STORE_NAME: { STACK_SHRINK(1); break; } - case DELETE_NAME: { break; } - case UNPACK_SEQUENCE: { STACK_SHRINK(1); STACK_GROW(oparg); break; } - case UNPACK_SEQUENCE_TWO_TUPLE: { STACK_SHRINK(1); STACK_GROW(oparg); break; } - case UNPACK_SEQUENCE_TUPLE: { STACK_SHRINK(1); STACK_GROW(oparg); break; } - case UNPACK_SEQUENCE_LIST: { STACK_SHRINK(1); STACK_GROW(oparg); break; } - case UNPACK_EX: { STACK_GROW((oparg & 0xFF) + (oparg >> 8)); - stack_pointer[-1 - (oparg >> 8)] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg >> 8))), true); break; } - case STORE_ATTR: { STACK_SHRINK(2); break; } - case DELETE_ATTR: { STACK_SHRINK(1); break; } - case STORE_GLOBAL: { STACK_SHRINK(1); break; } - case DELETE_GLOBAL: { break; } - case _LOAD_LOCALS: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _LOAD_FROM_DICT_OR_GLOBALS: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case LOAD_GLOBAL: { STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _GUARD_GLOBALS_VERSION: { break; } - case _GUARD_BUILTINS_VERSION: { break; } - case _LOAD_GLOBAL_MODULE: { STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _LOAD_GLOBAL_BUILTINS: { STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case DELETE_FAST: { break; } - - case DELETE_DEREF: { break; } - case LOAD_FROM_DICT_OR_DEREF: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case LOAD_DEREF: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case STORE_DEREF: { STACK_SHRINK(1); break; } - case COPY_FREE_VARS: { break; } - case BUILD_STRING: { STACK_SHRINK(oparg); STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case BUILD_TUPLE: { STACK_SHRINK(oparg); STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case BUILD_LIST: { STACK_SHRINK(oparg); STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case LIST_EXTEND: { STACK_SHRINK(1); break; } - case SET_UPDATE: { STACK_SHRINK(1); break; } - case BUILD_SET: { STACK_SHRINK(oparg); STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case BUILD_MAP: { STACK_SHRINK(oparg*2); STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case SETUP_ANNOTATIONS: { break; } - case BUILD_CONST_KEY_MAP: { STACK_SHRINK(oparg); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case DICT_UPDATE: { STACK_SHRINK(1); break; } - case DICT_MERGE: { STACK_SHRINK(1); break; } - case MAP_ADD: { STACK_SHRINK(2); break; } - - - case LOAD_SUPER_ATTR_ATTR: { STACK_SHRINK(2); STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case LOAD_SUPER_ATTR_METHOD: { STACK_SHRINK(1); - stack_pointer[-2] = NULL; - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-2)), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case LOAD_ATTR: { STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _GUARD_TYPE_VERSION: { break; } - case _CHECK_MANAGED_OBJECT_HAS_VALUES: { break; } - case _LOAD_ATTR_INSTANCE_VALUE: { STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - - - - - - - - - case COMPARE_OP: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case COMPARE_OP_FLOAT: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case COMPARE_OP_INT: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case COMPARE_OP_STR: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case IS_OP: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case CONTAINS_OP: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case CHECK_EG_MATCH: { - stack_pointer[-2] = NULL; - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-2)), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case CHECK_EXC_MATCH: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - - - - - - - case IS_NONE: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - case GET_LEN: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case MATCH_CLASS: { STACK_SHRINK(2); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case MATCH_MAPPING: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case MATCH_SEQUENCE: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case MATCH_KEYS: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case GET_ITER: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case GET_YIELD_FROM_ITER: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - - case _ITER_CHECK_LIST: { break; } - - case _IS_ITER_EXHAUSTED_LIST: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _ITER_NEXT_LIST: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _ITER_CHECK_TUPLE: { break; } - - case _IS_ITER_EXHAUSTED_TUPLE: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _ITER_NEXT_TUPLE: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _ITER_CHECK_RANGE: { break; } - - case _IS_ITER_EXHAUSTED_RANGE: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _ITER_NEXT_RANGE: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - - - case WITH_EXCEPT_START: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case PUSH_EXC_INFO: { STACK_GROW(1); - stack_pointer[-2] = NULL; - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-2)), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - - - - - - - - - - - case CALL_NO_KW_TYPE_1: { STACK_SHRINK(oparg); STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case CALL_NO_KW_STR_1: { STACK_SHRINK(oparg); STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case CALL_NO_KW_TUPLE_1: { STACK_SHRINK(oparg); STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - case EXIT_INIT_CHECK: { STACK_SHRINK(1); break; } - - case CALL_NO_KW_BUILTIN_O: { STACK_SHRINK(oparg); STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case CALL_NO_KW_BUILTIN_FAST: { STACK_SHRINK(oparg); STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - case CALL_NO_KW_LEN: { STACK_SHRINK(oparg); STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case CALL_NO_KW_ISINSTANCE: { STACK_SHRINK(oparg); STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - case CALL_NO_KW_METHOD_DESCRIPTOR_O: { STACK_SHRINK(oparg); STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - case CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS: { STACK_SHRINK(oparg); STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case CALL_NO_KW_METHOD_DESCRIPTOR_FAST: { STACK_SHRINK(oparg); STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - - case MAKE_FUNCTION: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case SET_FUNCTION_ATTRIBUTE: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - case BUILD_SLICE: { STACK_SHRINK(((oparg == 3) ? 1 : 0)); STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case CONVERT_VALUE: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case FORMAT_SIMPLE: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case FORMAT_WITH_SPEC: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; - break; - } - - - case COPY: { - STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case BINARY_OP: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case SWAP: { - stack_pointer[-2 - (oparg-2)] = NULL; - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-2 - (oparg-2))), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - - - - - - - - - - case _POP_JUMP_IF_FALSE: { STACK_SHRINK(1); break; } - case _POP_JUMP_IF_TRUE: { STACK_SHRINK(1); break; } - case JUMP_TO_TOP: { break; } - case SAVE_IP: { break; } - case EXIT_TRACE: { break; } + + case INSERT: { + break; + } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 90e26d3c86b380..1d9f36248fa65e 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3755,6 +3755,15 @@ dummy_func( return frame; } + op(INSERT, (--)) { + // Inserts TOS at position specified by oparg + PyObject *tos = TOP(); + for (int i = 1; i < oparg + 1; i++) { + stack_pointer[i] = stack_pointer[i - 1]; + } + POKE(oparg, tos); + } + // END BYTECODES // diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 9363b4955087db..a66a3a1d715648 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2717,3 +2717,13 @@ return frame; break; } + + case INSERT: { + // Inserts TOS at position specified by oparg + PyObject *tos = TOP(); + for (int i = 1; i < oparg + 1; i++) { + stack_pointer[i] = stack_pointer[i - 1]; + } + POKE(oparg, tos); + break; + } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 1f3425f15fe009..5134d5a0baaf3f 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -6,6 +6,7 @@ #include "pycore_opcode_utils.h" #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_uops.h" +#include "pycore_long.h" #include "cpython/optimizer.h" #include #include @@ -30,7 +31,9 @@ typedef enum _Py_TypeNodeTags { typedef struct _Py_PartitionRootNode { PyObject_HEAD // For partial evaluation - uint8_t static_or_dyanmic; + // 0 - static + // 1 - dynamic + uint8_t static_or_dynamic; PyObject *const_val; // For types (TODO) } _Py_PartitionRootNode; @@ -68,14 +71,14 @@ partitionnode_clear_tag(_Py_PARTITIONNODE_t node) // 0 - static // 1 - dynamic // If static, const_value must be set! -static _Py_PARTITIONNODE_t +static inline _Py_PARTITIONNODE_t partitionnode_make_root(uint8_t static_or_dynamic, PyObject *const_val) { _Py_PartitionRootNode *root = PyObject_New(_Py_PartitionRootNode, &_Py_PartitionRootNode_Type); if (root == NULL) { return 0; } - root->static_or_dyanmic = static_or_dynamic; + root->static_or_dynamic = static_or_dynamic; root->const_val = Py_NewRef(const_val); return (_Py_PARTITIONNODE_t)root | TYPE_ROOT; } @@ -92,12 +95,18 @@ static _Py_PARTITIONNODE_t PARTITIONNODE_NULLROOT = (_Py_PARTITIONNODE_t)_Py_NUL // Tier 2 types meta interpreter typedef struct _Py_UOpsAbstractInterpContext { PyObject_HEAD + // The following are abstract stack and locals. // points to one element after the abstract stack _Py_PARTITIONNODE_t *stack_pointer; int stack_len; _Py_PARTITIONNODE_t *stack; int locals_len; _Py_PARTITIONNODE_t *locals; + // The following represent the real (emitted instructions) stack and locals. + // points to one element after the abstract stack + _Py_PARTITIONNODE_t *real_stack_pointer; + _Py_PARTITIONNODE_t *real_stack; + _Py_PARTITIONNODE_t *real_locals; } _Py_UOpsAbstractInterpContext; static void @@ -140,14 +149,15 @@ _Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stackl self->stack_len = stack_len; self->locals_len = locals_len; - _Py_PARTITIONNODE_t *locals_with_stack = PyMem_New(_Py_PARTITIONNODE_t, locals_len + stack_len); + // Double the size needed because we also need a representation for the real stack and locals. + _Py_PARTITIONNODE_t *locals_with_stack = PyMem_New(_Py_PARTITIONNODE_t, (locals_len + stack_len) * 2); if (locals_with_stack == NULL) { Py_DECREF(self); return NULL; } - for (int i = 0; i < locals_len + stack_len; i++) { + for (int i = 0; i < (locals_len + stack_len) * 2; i++) { locals_with_stack[i] = PARTITIONNODE_NULLROOT; } @@ -155,6 +165,9 @@ _Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stackl self->stack = locals_with_stack + locals_len; self->stack_pointer = self->stack + curr_stacklen; + self->real_locals = self->locals + locals_len + stack_len; + self->real_stack = self->stack + locals_len + stack_len; + self->real_stack_pointer = self->stack_pointer + locals_len + stack_len; return self; } @@ -162,7 +175,7 @@ _Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stackl static void print_ctx(_Py_UOpsAbstractInterpContext *ctx); #endif -static _Py_PARTITIONNODE_t * +static inline _Py_PARTITIONNODE_t * partitionnode_get_rootptr(_Py_PARTITIONNODE_t *ref) { _Py_TypeNodeTags tag = partitionnode_get_tag(*ref); @@ -489,15 +502,53 @@ print_ctx(_Py_UOpsAbstractInterpContext *ctx) } #endif +static bool +partitionnode_is_static(_Py_PARTITIONNODE_t *node) +{ + _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); + _Py_PartitionRootNode *root_obj = (_Py_PartitionRootNode *)partitionnode_clear_tag((_Py_PARTITIONNODE_t)root); + if (root_obj == _Py_NULL) { + return false; + } + return !root_obj->static_or_dynamic; +} + +// MUST BE GUARDED BY partitionnode_is_static BEFORE CALLING THIS +static inline PyObject * +get_const(_Py_PARTITIONNODE_t *node) +{ + assert(partitionnode_is_static(node)); + _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); + _Py_PartitionRootNode *root_obj = (_Py_PartitionRootNode * )partitionnode_clear_tag((_Py_PARTITIONNODE_t)root); + return root_obj->const_val; +} + +// Hardcoded for now, @TODO autogenerate these from the DSL. +static inline bool +op_is_pure(int opcode) +{ + switch (opcode) { + case LOAD_CONST: + case _BINARY_OP_MULTIPLY_INT: + case _BINARY_OP_ADD_INT: + case _BINARY_OP_SUBTRACT_INT: + case SAVE_IP: + return true; + default: + return false; + } +} + + #ifndef Py_DEBUG -#define GETITEM(v, i) PyTuple_GET_ITEM((v), (i)) +#define GETITEM(v, i) PyList_GET_ITEM((v), (i)) #else static inline PyObject * GETITEM(PyObject *v, Py_ssize_t i) { - assert(PyTuple_Check(v)); + assert(PyList_CheckExact(v)); assert(i >= 0); - assert(i < PyTuple_GET_SIZE(v)); - return PyTuple_GET_ITEM(v, i); + assert(i < PyList_GET_SIZE(v)); + return PyList_GET_ITEM(v, i); } #endif @@ -509,7 +560,7 @@ _Py_uop_analyze_and_optimize( int curr_stacklen ) { -#define STACK_LEVEL() ((int)(stack_pointer - ctx->stack)) +#define STACK_LEVEL() ((int)(stack_pointer - stack)) #define STACK_SIZE() (co->co_stacksize) #define BASIC_STACKADJ(n) (stack_pointer += n) @@ -518,7 +569,6 @@ _Py_uop_analyze_and_optimize( assert(n >= 0); \ BASIC_STACKADJ(n); \ assert(STACK_LEVEL() <= STACK_SIZE()); \ - ctx->stack_pointer = stack_pointer; \ } while (0) #define STACK_SHRINK(n) do { \ assert(n >= 0); \ @@ -540,17 +590,33 @@ _Py_uop_analyze_and_optimize( return trace_len; } + int buffer_trace_len = 0; + _Py_UOpsAbstractInterpContext *ctx = _Py_UOpsAbstractInterpContext_New(co->co_stacksize, co->co_nlocals, curr_stacklen); if (ctx == NULL) { PyMem_Free(temp_writebuffer); return trace_len; } + PyObject *co_const_copy = PyList_New(PyTuple_Size(co->co_consts)); + if (co_const_copy == NULL) { + goto abstract_error; + } + // Copy over the co_const tuple + for (int x = 0; x < PyTuple_GET_SIZE(co->co_consts); x++) { + PyList_SET_ITEM(co_const_copy, x, Py_NewRef(PyTuple_GET_ITEM(co->co_consts, x))); + } + int oparg; int opcode; - _Py_PARTITIONNODE_t *stack_pointer = ctx->stack_pointer; - _Py_PARTITIONNODE_t *locals = ctx->locals; + _Py_PARTITIONNODE_t *stack_pointer; + _Py_PARTITIONNODE_t *locals; + _Py_PARTITIONNODE_t *stack; + for (int i = 0; i < trace_len; i++) { + stack_pointer = ctx->stack_pointer; + stack = ctx->stack; + locals = ctx->locals; oparg = trace[i].oparg; opcode = trace[i].opcode; #ifdef PARTITION_DEBUG @@ -560,15 +626,69 @@ _Py_uop_analyze_and_optimize( opcode, oparg); #endif #endif + + // Partial evaluation - the partition nodes already gave us the static-dynamic variable split. + // For partial evaluation, we simply need to follow these rules: + // 1. Operations on dynamic variables need to be emitted. + // If an operand was previously partially evaluated and not yet emitted, then emit the residual with a LOAD_CONST. + // 2. Operations on static variables are a no-op as the abstract interpreter already analyzed their results. + + bool should_emit = false; + // For all stack inputs, are their variables static? + int num_inputs = _PyOpcode_num_popped(opcode, oparg, false); + int num_dynamic_operands = 0; + assert(num_inputs >= 0); + for (int x = num_inputs + 1; x > 0; x--) { + if (!partitionnode_is_static(PEEK(x))) { + should_emit = true; + num_dynamic_operands++; + } + } + int num_static_operands = num_inputs - num_dynamic_operands; + + // We need to also check if this operation is "pure". That it can accept + // constant nodes, output constant nodes, and does not cause any side effects. + should_emit = should_emit || !op_is_pure(opcode); + + + if (should_emit) { + if (num_static_operands > 0) { + for (int x = num_inputs + 1; x > 0; x--) { + // Re-materialise all virtual (partially-evaluated) constants + if (partitionnode_is_static(PEEK(x))) { + PyObject *const_val = get_const(PEEK(x)); + _PyUOpInstruction load_const; + load_const.opcode = LOAD_CONST; + load_const.oparg = (int)PyList_GET_SIZE(co_const_copy); + if (PyList_Append(co_const_copy, const_val) < 0) { + goto abstract_error; + } + + temp_writebuffer[buffer_trace_len] = load_const; + buffer_trace_len++; + + // INSERT to the correct position in the stack + int offset_from_target = num_dynamic_operands - x - 1; + assert(offset_from_target >= 0); + if (offset_from_target) { + _PyUOpInstruction insert; + insert.opcode = INSERT; + insert.oparg = offset_from_target; + + temp_writebuffer[buffer_trace_len] = insert; + buffer_trace_len++; + } + num_dynamic_operands++; + } + + } + } + temp_writebuffer[buffer_trace_len] = trace[i]; + buffer_trace_len++; + } /* * The following are special cased: - "LOAD_FAST", - "LOAD_FAST_CHECK", - "LOAD_FAST_AND_CLEAR", - "LOAD_CONST", - "STORE_FAST", - "STORE_FAST_MAYBE_NULL", - "COPY", + * @TODO: shift these to the DSL */ switch (opcode) { #include "abstract_interp_cases.c.h" @@ -606,6 +726,72 @@ _Py_uop_analyze_and_optimize( PARTITIONNODE_SET(bottom, PEEK(1), false); break; } + + // Arithmetic operations + + case _BINARY_OP_MULTIPLY_INT: { + if (!should_emit) { + PyObject *right; + PyObject *left; + PyObject *res; + right = get_const(&stack_pointer[-1]); + left = get_const(&stack_pointer[-2]); + STAT_INC(BINARY_OP, hit); + res = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); + if (res == NULL) goto abstract_error; + STACK_SHRINK(1); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)MAKE_STATIC_ROOT(res), PEEK(-(-1)), true); + break; + } + else { + STACK_SHRINK(1); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + break; + } + + } + + case _BINARY_OP_ADD_INT: { + if (!should_emit) { + PyObject *right; + PyObject *left; + PyObject *res; + right = get_const(&stack_pointer[-1]); + left = get_const(&stack_pointer[-2]); + STAT_INC(BINARY_OP, hit); + res = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); + if (res == NULL) goto abstract_error; + STACK_SHRINK(1); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)MAKE_STATIC_ROOT(res), PEEK(-(-1)), true); + break; + } + else { + STACK_SHRINK(1); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + break; + } + } + + case _BINARY_OP_SUBTRACT_INT: { + if (!should_emit) { + PyObject *right; + PyObject *left; + PyObject *res; + right = get_const(&stack_pointer[-1]); + left = get_const(&stack_pointer[-2]); + STAT_INC(BINARY_OP, hit); + res = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); + if (res == NULL) goto abstract_error; + STACK_SHRINK(1); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)MAKE_STATIC_ROOT(res), PEEK(-(-1)), true); + break; + } + else { + STACK_SHRINK(1); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + break; + } + } default: fprintf(stderr, "Unknown opcode in abstract interpreter\n"); Py_UNREACHABLE(); @@ -613,9 +799,108 @@ _Py_uop_analyze_and_optimize( #if PARTITION_DEBUG print_ctx(ctx); #endif + ctx->stack_pointer = stack_pointer; + if (opcode == EXIT_TRACE) { + break; + } +// if (should_emit) { +// +// // Emit instruction +// temp_writebuffer[buffer_trace_len] = trace[i]; +// buffer_trace_len++; +// +// // Update the real abstract interpreter +// stack_pointer = ctx->real_stack_pointer; +// locals = ctx->real_locals; +// stack = ctx->real_stack; +// +// /* +// * The following are special cased: +// * @TODO: shift these to the DSL +// */ +// switch (opcode) { +//#include "abstract_interp_cases.c.h" +// // @TODO convert these to autogenerated using DSL +// case LOAD_FAST: +// case LOAD_FAST_CHECK: +// STACK_GROW(1); +// PARTITIONNODE_OVERWRITE(GETLOCAL(oparg), PEEK(1), false); +// break; +// case LOAD_FAST_AND_CLEAR: { +// STACK_GROW(1); +// PARTITIONNODE_OVERWRITE(GETLOCAL(oparg), PEEK(1), false); +// PARTITIONNODE_OVERWRITE(&PARTITIONNODE_NULLROOT, GETLOCAL(oparg), false); +// break; +// } +// case LOAD_CONST: { +// _Py_PARTITIONNODE_t value = MAKE_STATIC_ROOT(GETITEM(co->co_consts, oparg)); +// STACK_GROW(1); +// PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)value, PEEK(1), false); +// break; +// } +// case STORE_FAST: +// case STORE_FAST_MAYBE_NULL: { +// _Py_PARTITIONNODE_t *value = PEEK(1); +// PARTITIONNODE_OVERWRITE(value, GETLOCAL(oparg), false); +// STACK_SHRINK(1); +// break; +// } +// case COPY: { +// _Py_PARTITIONNODE_t *bottom = PEEK(1 + (oparg - 1)); +// STACK_GROW(1); +// PARTITIONNODE_SET(bottom, PEEK(1), false); +// break; +// } +// +// case _BINARY_OP_MULTIPLY_INT: { +// STACK_SHRINK(1); +// PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); +// break; +// +// } +// +// case _BINARY_OP_ADD_INT: { +// STACK_SHRINK(1); +// PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); +// break; +// } +// +// case _BINARY_OP_SUBTRACT_INT: { +// STACK_SHRINK(1); +// PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); +// break; +// } +// default: +// fprintf(stderr, "Unknown opcode in abstract interpreter\n"); +// Py_UNREACHABLE(); +// } +// +// ctx->real_stack_pointer = stack_pointer; +// } } assert(STACK_SIZE() >= 0); + assert(buffer_trace_len <= trace_len); Py_DECREF(ctx); + + PyObject *co_const_final = PyTuple_New(PyList_Size(co_const_copy)); + if (co_const_final == NULL) { + goto abstract_error; + } + // Copy over the co_const tuple + for (int x = 0; x < PyList_GET_SIZE(co_const_copy); x++) { + PyTuple_SET_ITEM(co_const_final, x, Py_NewRef(PyList_GET_ITEM(co_const_copy, x))); + } + + Py_SETREF(co->co_consts, co_const_final); + Py_XDECREF(co_const_copy); + memcpy(trace, temp_writebuffer, buffer_trace_len * sizeof(_PyUOpInstruction)); PyMem_Free(temp_writebuffer); + return buffer_trace_len; + +abstract_error: + Py_XDECREF(co_const_copy); + Py_DECREF(ctx); + assert(PyErr_Occurred()); + PyErr_Clear(); return trace_len; } diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 58da498f0d1d26..ca7f67ca1f6627 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -70,6 +70,12 @@ "STORE_FAST", "STORE_FAST_MAYBE_NULL", "COPY", + + # Arithmetic + "_BINARY_OP_MULTIPLY_INT", + "_BINARY_OP_ADD_INT", + "_BINARY_OP_SUBTRACT_INT", + } arg_parser = argparse.ArgumentParser( @@ -129,7 +135,7 @@ def effect_str(effects: list[StackEffect]) -> str: pushed: str | None match thing: case parsing.InstDef(): - if thing.kind != "op": + if thing.kind != "op" or (thing.kind != "inst" and self.instrs[thing.name].is_viable_uop()): instr = self.instrs[thing.name] popped = effect_str(instr.input_effects) pushed = effect_str(instr.output_effects) From 68c684febb7ce7b09e57145767025580bcdb9f49 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 8 Aug 2023 18:01:58 +0800 Subject: [PATCH 22/48] rename vars --- Python/optimizer_analysis.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 5134d5a0baaf3f..724f1de5e49491 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -450,7 +450,7 @@ print_ctx(_Py_UOpsAbstractInterpContext *ctx) _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); fprintf(stderr, "%s", - ptr == NULL ? "?" : (ptr->static_or_dyanmic == 0 ? "static" : "dynamic")); + ptr == NULL ? "?" : (ptr->static_or_dynamic == 0 ? "static" : "dynamic")); if (tag == TYPE_REF) { const char *wher = is_local @@ -486,7 +486,7 @@ print_ctx(_Py_UOpsAbstractInterpContext *ctx) _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); fprintf(stderr, "%s", - ptr == NULL ? "?" : (ptr->static_or_dyanmic == 0 ? "static" : "dynamic")); + ptr == NULL ? "?" : (ptr->static_or_dynamic == 0 ? "static" : "dynamic")); if (tag == TYPE_REF) { const char *wher = is_local From 46c577755be2835c1b3ab22273636c72d0380458 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 8 Aug 2023 18:42:10 +0800 Subject: [PATCH 23/48] fixx off by one --- Python/optimizer_analysis.c | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 724f1de5e49491..7a469e10bc0917 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -532,13 +532,26 @@ op_is_pure(int opcode) case _BINARY_OP_MULTIPLY_INT: case _BINARY_OP_ADD_INT: case _BINARY_OP_SUBTRACT_INT: - case SAVE_IP: - return true; default: return false; } } +static int +remove_duplicate_save_ips(_PyUOpInstruction *trace, int trace_len) +{ + return trace_len; +} + +/** + * Fixes all side exits due to jumps. This MUST be called as the last + * pass over the trace. Otherwise jumps will point to invalid ends. +*/ +static int +fix_jump_side_exits(_PyUOpInstruction *trace, int trace_len) +{ + return trace_len; +} #ifndef Py_DEBUG #define GETITEM(v, i) PyList_GET_ITEM((v), (i)) @@ -638,7 +651,7 @@ _Py_uop_analyze_and_optimize( int num_inputs = _PyOpcode_num_popped(opcode, oparg, false); int num_dynamic_operands = 0; assert(num_inputs >= 0); - for (int x = num_inputs + 1; x > 0; x--) { + for (int x = num_inputs; x > 0; x--) { if (!partitionnode_is_static(PEEK(x))) { should_emit = true; num_dynamic_operands++; @@ -646,6 +659,7 @@ _Py_uop_analyze_and_optimize( } int num_static_operands = num_inputs - num_dynamic_operands; + assert(num_static_operands >= 0); // We need to also check if this operation is "pure". That it can accept // constant nodes, output constant nodes, and does not cause any side effects. should_emit = should_emit || !op_is_pure(opcode); @@ -653,7 +667,7 @@ _Py_uop_analyze_and_optimize( if (should_emit) { if (num_static_operands > 0) { - for (int x = num_inputs + 1; x > 0; x--) { + for (int x = num_inputs; x > 0; x--) { // Re-materialise all virtual (partially-evaluated) constants if (partitionnode_is_static(PEEK(x))) { PyObject *const_val = get_const(PEEK(x)); @@ -705,7 +719,7 @@ _Py_uop_analyze_and_optimize( break; } case LOAD_CONST: { - _Py_PARTITIONNODE_t* value = (_Py_PARTITIONNODE_t *)MAKE_STATIC_ROOT(GETITEM(co->co_consts, oparg)); + _Py_PARTITIONNODE_t* value = (_Py_PARTITIONNODE_t *)MAKE_STATIC_ROOT(GETITEM(co_const_copy, oparg)); STACK_GROW(1); PARTITIONNODE_OVERWRITE(value, PEEK(1), true); #if PARTITION_DEBUG @@ -800,9 +814,6 @@ _Py_uop_analyze_and_optimize( print_ctx(ctx); #endif ctx->stack_pointer = stack_pointer; - if (opcode == EXIT_TRACE) { - break; - } // if (should_emit) { // // // Emit instruction @@ -880,6 +891,15 @@ _Py_uop_analyze_and_optimize( } assert(STACK_SIZE() >= 0); assert(buffer_trace_len <= trace_len); + + buffer_trace_len = remove_duplicate_save_ips(temp_writebuffer, buffer_trace_len); + buffer_trace_len = fix_jump_side_exits(temp_writebuffer, buffer_trace_len); + +#if PARTITION_DEBUG + if (buffer_trace_len < trace_len) { + fprintf(stderr, "Shortened trace by %d instructions\n", trace_len - buffer_trace_len); + } +#endif Py_DECREF(ctx); PyObject *co_const_final = PyTuple_New(PyList_Size(co_const_copy)); @@ -891,6 +911,7 @@ _Py_uop_analyze_and_optimize( PyTuple_SET_ITEM(co_const_final, x, Py_NewRef(PyList_GET_ITEM(co_const_copy, x))); } + Py_SETREF(co->co_consts, co_const_final); Py_XDECREF(co_const_copy); memcpy(trace, temp_writebuffer, buffer_trace_len * sizeof(_PyUOpInstruction)); From b839ee4e610f44c2d650360b85c1e7d1fc3da6d8 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 9 Aug 2023 23:31:33 +0800 Subject: [PATCH 24/48] partial eval working for real this time --- Include/internal/pycore_opcode_metadata.h | 142 ++++++++++++++++++++++ Include/internal/pycore_uops.h | 2 +- Lib/test/test_capi/test_misc.py | 25 ++++ Python/optimizer_analysis.c | 108 +++++++++++++--- 4 files changed, 258 insertions(+), 19 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 1cab6c984f3ace..b0285751a7ad80 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -55,6 +55,7 @@ #define _POP_JUMP_IF_FALSE 331 #define _POP_JUMP_IF_TRUE 332 #define JUMP_TO_TOP 333 +#define INSERT 334 #ifndef NEED_OPCODE_METADATA extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump); @@ -120,18 +121,38 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case UNARY_INVERT: return 1; + case _GUARD_BOTH_INT: + return 2; + case _BINARY_OP_MULTIPLY_INT: + return 2; + case _BINARY_OP_ADD_INT: + return 2; + case _BINARY_OP_SUBTRACT_INT: + return 2; case BINARY_OP_MULTIPLY_INT: return 2; case BINARY_OP_ADD_INT: return 2; case BINARY_OP_SUBTRACT_INT: return 2; + case _GUARD_BOTH_FLOAT: + return 2; + case _BINARY_OP_MULTIPLY_FLOAT: + return 2; + case _BINARY_OP_ADD_FLOAT: + return 2; + case _BINARY_OP_SUBTRACT_FLOAT: + return 2; case BINARY_OP_MULTIPLY_FLOAT: return 2; case BINARY_OP_ADD_FLOAT: return 2; case BINARY_OP_SUBTRACT_FLOAT: return 2; + case _GUARD_BOTH_UNICODE: + return 2; + case _BINARY_OP_ADD_UNICODE: + return 2; case BINARY_OP_ADD_UNICODE: return 2; case BINARY_OP_INPLACE_ADD_UNICODE: @@ -226,14 +247,26 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case DELETE_GLOBAL: return 0; + case _LOAD_LOCALS: + return 0; case LOAD_LOCALS: return 0; + case _LOAD_FROM_DICT_OR_GLOBALS: + return 1; case LOAD_NAME: return 0; case LOAD_FROM_DICT_OR_GLOBALS: return 1; case LOAD_GLOBAL: return 0; + case _GUARD_GLOBALS_VERSION: + return 0; + case _GUARD_BUILTINS_VERSION: + return 0; + case _LOAD_GLOBAL_MODULE: + return 0; + case _LOAD_GLOBAL_BUILTINS: + return 0; case LOAD_GLOBAL_MODULE: return 0; case LOAD_GLOBAL_BUILTIN: @@ -294,6 +327,12 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case LOAD_METHOD: return 1; + case _GUARD_TYPE_VERSION: + return 1; + case _CHECK_MANAGED_OBJECT_HAS_VALUES: + return 1; + case _LOAD_ATTR_INSTANCE_VALUE: + return 1; case LOAD_ATTR_INSTANCE_VALUE: return 1; case LOAD_ATTR_MODULE: @@ -348,6 +387,8 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case POP_JUMP_IF_TRUE: return 1; + case IS_NONE: + return 1; case POP_JUMP_IF_NONE: return 1; case POP_JUMP_IF_NOT_NONE: @@ -372,10 +413,28 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case INSTRUMENTED_FOR_ITER: return 0; + case _ITER_CHECK_LIST: + return 1; + case _IS_ITER_EXHAUSTED_LIST: + return 1; + case _ITER_NEXT_LIST: + return 1; case FOR_ITER_LIST: return 1; + case _ITER_CHECK_TUPLE: + return 1; + case _IS_ITER_EXHAUSTED_TUPLE: + return 1; + case _ITER_NEXT_TUPLE: + return 1; case FOR_ITER_TUPLE: return 1; + case _ITER_CHECK_RANGE: + return 1; + case _IS_ITER_EXHAUSTED_RANGE: + return 1; + case _ITER_NEXT_RANGE: + return 1; case FOR_ITER_RANGE: return 1; case FOR_ITER_GEN: @@ -494,6 +553,18 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case RESERVED: return 0; + case _POP_JUMP_IF_FALSE: + return 1; + case _POP_JUMP_IF_TRUE: + return 1; + case JUMP_TO_TOP: + return 0; + case SAVE_IP: + return 0; + case EXIT_TRACE: + return 0; + case INSERT: + return 0; default: return -1; } @@ -564,18 +635,38 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case UNARY_INVERT: return 1; + case _GUARD_BOTH_INT: + return 2; + case _BINARY_OP_MULTIPLY_INT: + return 1; + case _BINARY_OP_ADD_INT: + return 1; + case _BINARY_OP_SUBTRACT_INT: + return 1; case BINARY_OP_MULTIPLY_INT: return 1; case BINARY_OP_ADD_INT: return 1; case BINARY_OP_SUBTRACT_INT: return 1; + case _GUARD_BOTH_FLOAT: + return 2; + case _BINARY_OP_MULTIPLY_FLOAT: + return 1; + case _BINARY_OP_ADD_FLOAT: + return 1; + case _BINARY_OP_SUBTRACT_FLOAT: + return 1; case BINARY_OP_MULTIPLY_FLOAT: return 1; case BINARY_OP_ADD_FLOAT: return 1; case BINARY_OP_SUBTRACT_FLOAT: return 1; + case _GUARD_BOTH_UNICODE: + return 2; + case _BINARY_OP_ADD_UNICODE: + return 1; case BINARY_OP_ADD_UNICODE: return 1; case BINARY_OP_INPLACE_ADD_UNICODE: @@ -670,14 +761,26 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case DELETE_GLOBAL: return 0; + case _LOAD_LOCALS: + return 1; case LOAD_LOCALS: return 1; + case _LOAD_FROM_DICT_OR_GLOBALS: + return 1; case LOAD_NAME: return 1; case LOAD_FROM_DICT_OR_GLOBALS: return 1; case LOAD_GLOBAL: return ((oparg & 1) ? 1 : 0) + 1; + case _GUARD_GLOBALS_VERSION: + return 0; + case _GUARD_BUILTINS_VERSION: + return 0; + case _LOAD_GLOBAL_MODULE: + return ((oparg & 1) ? 1 : 0) + 1; + case _LOAD_GLOBAL_BUILTINS: + return ((oparg & 1) ? 1 : 0) + 1; case LOAD_GLOBAL_MODULE: return (oparg & 1 ? 1 : 0) + 1; case LOAD_GLOBAL_BUILTIN: @@ -738,6 +841,12 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return ((oparg & 1) ? 1 : 0) + 1; case LOAD_METHOD: return ((oparg & 1) ? 1 : 0) + 1; + case _GUARD_TYPE_VERSION: + return 1; + case _CHECK_MANAGED_OBJECT_HAS_VALUES: + return 1; + case _LOAD_ATTR_INSTANCE_VALUE: + return ((oparg & 1) ? 1 : 0) + 1; case LOAD_ATTR_INSTANCE_VALUE: return (oparg & 1 ? 1 : 0) + 1; case LOAD_ATTR_MODULE: @@ -792,6 +901,8 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case POP_JUMP_IF_TRUE: return 0; + case IS_NONE: + return 1; case POP_JUMP_IF_NONE: return 0; case POP_JUMP_IF_NOT_NONE: @@ -816,10 +927,28 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 2; case INSTRUMENTED_FOR_ITER: return 0; + case _ITER_CHECK_LIST: + return 1; + case _IS_ITER_EXHAUSTED_LIST: + return 2; + case _ITER_NEXT_LIST: + return 2; case FOR_ITER_LIST: return 2; + case _ITER_CHECK_TUPLE: + return 1; + case _IS_ITER_EXHAUSTED_TUPLE: + return 2; + case _ITER_NEXT_TUPLE: + return 2; case FOR_ITER_TUPLE: return 2; + case _ITER_CHECK_RANGE: + return 1; + case _IS_ITER_EXHAUSTED_RANGE: + return 2; + case _ITER_NEXT_RANGE: + return 2; case FOR_ITER_RANGE: return 2; case FOR_ITER_GEN: @@ -938,6 +1067,18 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case RESERVED: return 0; + case _POP_JUMP_IF_FALSE: + return 0; + case _POP_JUMP_IF_TRUE: + return 0; + case JUMP_TO_TOP: + return 0; + case SAVE_IP: + return 0; + case EXIT_TRACE: + return 0; + case INSERT: + return 0; default: return -1; } @@ -1386,5 +1527,6 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = { [_POP_JUMP_IF_FALSE] = "_POP_JUMP_IF_FALSE", [_POP_JUMP_IF_TRUE] = "_POP_JUMP_IF_TRUE", [JUMP_TO_TOP] = "JUMP_TO_TOP", + [INSERT] = "INSERT", }; #endif // NEED_OPCODE_METADATA diff --git a/Include/internal/pycore_uops.h b/Include/internal/pycore_uops.h index 57a5970353b360..59451bc7826698 100644 --- a/Include/internal/pycore_uops.h +++ b/Include/internal/pycore_uops.h @@ -8,7 +8,7 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif -#define _Py_UOP_MAX_TRACE_LENGTH 32 +#define _Py_UOP_MAX_TRACE_LENGTH 256 typedef struct { uint32_t opcode; diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index e7cdd4be002a14..26b0501d067a20 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2707,5 +2707,30 @@ def testfunc(it): with self.assertRaises(StopIteration): next(it) + +class TestUopsOptimization(unittest.TestCase): + + def test_int_constant_propagation(self): + def testfunc(loops): + num = 0 + while num < loops: + x = 0 + y = 1 + z = 2 + a = x + y + z + x + y + z + x + y + z + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(3) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + self.assertEqual(res, 9) + binop_count = [opname == "_BINARY_OP_ADD_INT" for opname, _, _ in ex] + self.assertEqual(binop_count, 1) + if __name__ == "__main__": unittest.main() diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 7a469e10bc0917..a189555fbd7d21 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -102,6 +102,10 @@ typedef struct _Py_UOpsAbstractInterpContext { _Py_PARTITIONNODE_t *stack; int locals_len; _Py_PARTITIONNODE_t *locals; + + // Indicates whether the stack entry is real or virtualised. + // true - virtual false - real + bool *stack_virtual_or_real; // The following represent the real (emitted instructions) stack and locals. // points to one element after the abstract stack _Py_PARTITIONNODE_t *real_stack_pointer; @@ -156,6 +160,12 @@ _Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stackl return NULL; } + bool *virtual_or_real = PyMem_New(bool, stack_len); + if (virtual_or_real == NULL) { + Py_DECREF(self); + PyMem_Free(locals_with_stack); + return NULL; + } for (int i = 0; i < (locals_len + stack_len) * 2; i++) { locals_with_stack[i] = PARTITIONNODE_NULLROOT; @@ -165,6 +175,8 @@ _Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stackl self->stack = locals_with_stack + locals_len; self->stack_pointer = self->stack + curr_stacklen; + self->stack_virtual_or_real = virtual_or_real; + self->real_locals = self->locals + locals_len + stack_len; self->real_stack = self->stack + locals_len + stack_len; self->real_stack_pointer = self->stack_pointer + locals_len + stack_len; @@ -175,7 +187,7 @@ _Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stackl static void print_ctx(_Py_UOpsAbstractInterpContext *ctx); #endif -static inline _Py_PARTITIONNODE_t * +static _Py_PARTITIONNODE_t * partitionnode_get_rootptr(_Py_PARTITIONNODE_t *ref) { _Py_TypeNodeTags tag = partitionnode_get_tag(*ref); @@ -461,6 +473,7 @@ print_ctx(_Py_UOpsAbstractInterpContext *ctx) fprintf(stderr, "->%s[%d]", wher, parent_idx); } + fprintf(stderr, " | "); } fprintf(stderr, "]\n"); @@ -497,6 +510,7 @@ print_ctx(_Py_UOpsAbstractInterpContext *ctx) fprintf(stderr, "->%s[%d]", wher, parent_idx); } + fprintf(stderr, " | "); } fprintf(stderr, "]\n"); } @@ -506,11 +520,11 @@ static bool partitionnode_is_static(_Py_PARTITIONNODE_t *node) { _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); - _Py_PartitionRootNode *root_obj = (_Py_PartitionRootNode *)partitionnode_clear_tag((_Py_PARTITIONNODE_t)root); + _Py_PartitionRootNode *root_obj = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); if (root_obj == _Py_NULL) { return false; } - return !root_obj->static_or_dynamic; + return root_obj->static_or_dynamic == 0; } // MUST BE GUARDED BY partitionnode_is_static BEFORE CALLING THIS @@ -519,28 +533,54 @@ get_const(_Py_PARTITIONNODE_t *node) { assert(partitionnode_is_static(node)); _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); - _Py_PartitionRootNode *root_obj = (_Py_PartitionRootNode * )partitionnode_clear_tag((_Py_PARTITIONNODE_t)root); + _Py_PartitionRootNode *root_obj = (_Py_PartitionRootNode * )partitionnode_clear_tag(*root); return root_obj->const_val; } // Hardcoded for now, @TODO autogenerate these from the DSL. static inline bool -op_is_pure(int opcode) +op_is_pure(int opcode, int oparg, _Py_PARTITIONNODE_t *locals) { switch (opcode) { case LOAD_CONST: case _BINARY_OP_MULTIPLY_INT: case _BINARY_OP_ADD_INT: case _BINARY_OP_SUBTRACT_INT: + case _GUARD_BOTH_INT: + return true; + case LOAD_FAST: + return partitionnode_is_static(&locals[oparg]) && get_const(&locals[oparg]) != _Py_NULL; default: return false; } } +// Remove contiguous SAVE_IPs, leaving only the last one before a non-SAVE_IP instruction. static int remove_duplicate_save_ips(_PyUOpInstruction *trace, int trace_len) { - return trace_len; + _PyUOpInstruction *temp_trace = PyMem_New(_PyUOpInstruction, trace_len); + if (temp_trace == NULL) { + return trace_len; + } + int temp_trace_len = 0; + + _PyUOpInstruction curr; + for (int i = 0; i < trace_len; i++) { + curr = trace[i]; + if (i < trace_len && curr.opcode == SAVE_IP && trace[i+1].opcode == SAVE_IP) { + continue; + } + temp_trace[temp_trace_len] = curr; + temp_trace_len++; + } + memcpy(trace, temp_trace, temp_trace_len * sizeof(_PyUOpInstruction)); + PyMem_Free(temp_trace); + +#if PARTITION_DEBUG + fprintf(stderr, "Removed %d SAVE_IPs\n", trace_len - temp_trace_len); +#endif + return temp_trace_len; } /** @@ -550,6 +590,15 @@ remove_duplicate_save_ips(_PyUOpInstruction *trace, int trace_len) static int fix_jump_side_exits(_PyUOpInstruction *trace, int trace_len) { + for (int i = 0; i < trace_len; i++) { + int oparg = trace[i].oparg; + int opcode = trace[i].opcode; + switch (opcode) { + case _POP_JUMP_IF_TRUE: + case _POP_JUMP_IF_FALSE: + trace[i].oparg = trace_len - 2; + } + } return trace_len; } @@ -625,6 +674,7 @@ _Py_uop_analyze_and_optimize( _Py_PARTITIONNODE_t *stack_pointer; _Py_PARTITIONNODE_t *locals; _Py_PARTITIONNODE_t *stack; + bool *stack_virtual_or_real = ctx->stack_virtual_or_real; for (int i = 0; i < trace_len; i++) { stack_pointer = ctx->stack_pointer; @@ -646,30 +696,40 @@ _Py_uop_analyze_and_optimize( // If an operand was previously partially evaluated and not yet emitted, then emit the residual with a LOAD_CONST. // 2. Operations on static variables are a no-op as the abstract interpreter already analyzed their results. - bool should_emit = false; // For all stack inputs, are their variables static? int num_inputs = _PyOpcode_num_popped(opcode, oparg, false); int num_dynamic_operands = 0; + + // We need to also check if this operation is "pure". That it can accept + // constant nodes, output constant nodes, and does not cause any side effects. + bool should_emit = !op_is_pure(opcode, oparg, locals); + + int virtual_objects = 0; assert(num_inputs >= 0); for (int x = num_inputs; x > 0; x--) { if (!partitionnode_is_static(PEEK(x))) { should_emit = true; num_dynamic_operands++; } + if (stack_virtual_or_real[STACK_LEVEL() - num_inputs]) { + virtual_objects++; + } } + int num_static_operands = num_inputs - num_dynamic_operands; assert(num_static_operands >= 0); - // We need to also check if this operation is "pure". That it can accept - // constant nodes, output constant nodes, and does not cause any side effects. - should_emit = should_emit || !op_is_pure(opcode); if (should_emit) { if (num_static_operands > 0) { + int real_stack_size = num_dynamic_operands; + int virtual_stack_size = (int)(ctx->stack_pointer - ctx->stack); + assert(virtual_stack_size >= real_stack_size); for (int x = num_inputs; x > 0; x--) { // Re-materialise all virtual (partially-evaluated) constants - if (partitionnode_is_static(PEEK(x))) { + if (partitionnode_is_static(PEEK(x)) && stack_virtual_or_real[STACK_LEVEL() - x]) { + stack_virtual_or_real[STACK_LEVEL() - x] = false; PyObject *const_val = get_const(PEEK(x)); _PyUOpInstruction load_const; load_const.opcode = LOAD_CONST; @@ -678,17 +738,23 @@ _Py_uop_analyze_and_optimize( goto abstract_error; } +#if PARTITION_DEBUG + fprintf(stderr, "Emitting LOAD_CONST\n"); +#endif temp_writebuffer[buffer_trace_len] = load_const; buffer_trace_len++; + // INSERT to the correct position in the stack - int offset_from_target = num_dynamic_operands - x - 1; - assert(offset_from_target >= 0); - if (offset_from_target) { + int offset_from_target = x - num_dynamic_operands - 1; + if (offset_from_target > 0) { _PyUOpInstruction insert; insert.opcode = INSERT; - insert.oparg = offset_from_target; + insert.oparg = -offset_from_target; +#if PARTITION_DEBUG + fprintf(stderr, "Emitting INSERT %d\n", offset_from_target); +#endif temp_writebuffer[buffer_trace_len] = insert; buffer_trace_len++; } @@ -697,6 +763,9 @@ _Py_uop_analyze_and_optimize( } } +#if PARTITION_DEBUG + fprintf(stderr, "Emitting %s\n", (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode]); +#endif temp_writebuffer[buffer_trace_len] = trace[i]; buffer_trace_len++; } @@ -722,9 +791,6 @@ _Py_uop_analyze_and_optimize( _Py_PARTITIONNODE_t* value = (_Py_PARTITIONNODE_t *)MAKE_STATIC_ROOT(GETITEM(co_const_copy, oparg)); STACK_GROW(1); PARTITIONNODE_OVERWRITE(value, PEEK(1), true); -#if PARTITION_DEBUG - print_ctx(ctx); -#endif break; } case STORE_FAST: @@ -814,6 +880,12 @@ _Py_uop_analyze_and_optimize( print_ctx(ctx); #endif ctx->stack_pointer = stack_pointer; + + // Mark all stack outputs as virtual or real + int stack_outputs = _PyOpcode_num_pushed(opcode, oparg, false); + for (int y = stack_outputs; y > 0; y--) { + stack_virtual_or_real[STACK_LEVEL() - y] = !should_emit; + } // if (should_emit) { // // // Emit instruction From 6ecf3d253de44bb70833c2b274aac0968d36909f Mon Sep 17 00:00:00 2001 From: Jules <57632293+JuliaPoo@users.noreply.github.com> Date: Thu, 10 Aug 2023 02:16:31 +0800 Subject: [PATCH 25/48] Fix: Inconsistent `AbstractInterpContext` used in `PARTITIONNODE_OVERWRITE` and mis-port of `PARTITIONNODE_OVERWRITE` (#41) * Fix: Inconsistent AbstractInterpContext used in PARTITIONNODE_OVERWRITE and typo in PARTITIONNODE_OVERWRITE * Style: Removed whitespace --- Python/optimizer_analysis.c | 60 ++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index a189555fbd7d21..f810b160527f3f 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -387,7 +387,7 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, // Children of dst will have this form _Py_PARTITIONNODE_t child_test = partitionnode_make_ref( - (_Py_PARTITIONNODE_t *)partitionnode_clear_tag(*dst)); + (_Py_PARTITIONNODE_t *)partitionnode_clear_tag((_Py_PARTITIONNODE_t)dst)); // Search locals for children int nlocals = ctx->locals_len; @@ -461,8 +461,11 @@ print_ctx(_Py_UOpsAbstractInterpContext *ctx) _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); - fprintf(stderr, "%s", + fprintf(stderr, "%s:", ptr == NULL ? "?" : (ptr->static_or_dynamic == 0 ? "static" : "dynamic")); + if (ptr != NULL && ptr->static_or_dynamic == 0) { + PyObject_Print(ptr->const_val, stderr, 0); + } if (tag == TYPE_REF) { const char *wher = is_local @@ -498,8 +501,11 @@ print_ctx(_Py_UOpsAbstractInterpContext *ctx) } _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); - fprintf(stderr, "%s", + fprintf(stderr, "%s:", ptr == NULL ? "?" : (ptr->static_or_dynamic == 0 ? "static" : "dynamic")); + if (ptr != NULL && ptr->static_or_dynamic == 0) { + PyObject_Print(ptr->const_val, stderr, 0); + } if (tag == TYPE_REF) { const char *wher = is_local @@ -622,9 +628,9 @@ _Py_uop_analyze_and_optimize( int curr_stacklen ) { -#define STACK_LEVEL() ((int)(stack_pointer - stack)) +#define STACK_LEVEL() ((int)(*stack_pointer - stack)) #define STACK_SIZE() (co->co_stacksize) -#define BASIC_STACKADJ(n) (stack_pointer += n) +#define BASIC_STACKADJ(n) (*stack_pointer += n) #ifdef Py_DEBUG #define STACK_GROW(n) do { \ @@ -641,7 +647,7 @@ _Py_uop_analyze_and_optimize( #define STACK_GROW(n) BASIC_STACKADJ(n) #define STACK_SHRINK(n) BASIC_STACKADJ(-(n)) #endif -#define PEEK(idx) (&(stack_pointer[-(idx)])) +#define PEEK(idx) (&((*stack_pointer)[-(idx)])) #define GETLOCAL(idx) (&(locals[idx])) #define PARTITIONNODE_SET(src, dst, flag) partitionnode_set((src), (dst), (flag)) @@ -671,24 +677,14 @@ _Py_uop_analyze_and_optimize( int oparg; int opcode; - _Py_PARTITIONNODE_t *stack_pointer; - _Py_PARTITIONNODE_t *locals; - _Py_PARTITIONNODE_t *stack; bool *stack_virtual_or_real = ctx->stack_virtual_or_real; + _Py_PARTITIONNODE_t **stack_pointer = &ctx->stack_pointer; + _Py_PARTITIONNODE_t *stack = ctx->stack; + _Py_PARTITIONNODE_t *locals = ctx->locals; for (int i = 0; i < trace_len; i++) { - stack_pointer = ctx->stack_pointer; - stack = ctx->stack; - locals = ctx->locals; oparg = trace[i].oparg; opcode = trace[i].opcode; -#ifdef PARTITION_DEBUG -#ifdef Py_DEBUG - fprintf(stderr, " [-] Type propagating across: %s{%d} : %d\n", - (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode], - opcode, oparg); -#endif -#endif // Partial evaluation - the partition nodes already gave us the static-dynamic variable split. // For partial evaluation, we simply need to follow these rules: @@ -773,6 +769,14 @@ _Py_uop_analyze_and_optimize( * The following are special cased: * @TODO: shift these to the DSL */ + +#ifdef PARTITION_DEBUG +#ifdef Py_DEBUG + fprintf(stderr, " [-] Type propagating across: %s{%d} : %d\n", + (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode], + opcode, oparg); +#endif +#endif switch (opcode) { #include "abstract_interp_cases.c.h" // @TODO convert these to autogenerated using DSL @@ -803,7 +807,7 @@ _Py_uop_analyze_and_optimize( case COPY: { _Py_PARTITIONNODE_t *bottom = PEEK(1 + (oparg - 1)); STACK_GROW(1); - PARTITIONNODE_SET(bottom, PEEK(1), false); + PARTITIONNODE_OVERWRITE(bottom, PEEK(1), false); break; } @@ -814,8 +818,8 @@ _Py_uop_analyze_and_optimize( PyObject *right; PyObject *left; PyObject *res; - right = get_const(&stack_pointer[-1]); - left = get_const(&stack_pointer[-2]); + right = get_const(PEEK(1)); + left = get_const(PEEK(2)); STAT_INC(BINARY_OP, hit); res = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); if (res == NULL) goto abstract_error; @@ -836,8 +840,8 @@ _Py_uop_analyze_and_optimize( PyObject *right; PyObject *left; PyObject *res; - right = get_const(&stack_pointer[-1]); - left = get_const(&stack_pointer[-2]); + right = get_const(PEEK(1)); + left = get_const(PEEK(2)); STAT_INC(BINARY_OP, hit); res = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); if (res == NULL) goto abstract_error; @@ -857,8 +861,8 @@ _Py_uop_analyze_and_optimize( PyObject *right; PyObject *left; PyObject *res; - right = get_const(&stack_pointer[-1]); - left = get_const(&stack_pointer[-2]); + right = get_const(PEEK(1)); + left = get_const(PEEK(2)); STAT_INC(BINARY_OP, hit); res = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); if (res == NULL) goto abstract_error; @@ -876,10 +880,10 @@ _Py_uop_analyze_and_optimize( fprintf(stderr, "Unknown opcode in abstract interpreter\n"); Py_UNREACHABLE(); } + #if PARTITION_DEBUG print_ctx(ctx); #endif - ctx->stack_pointer = stack_pointer; // Mark all stack outputs as virtual or real int stack_outputs = _PyOpcode_num_pushed(opcode, oparg, false); @@ -983,7 +987,7 @@ _Py_uop_analyze_and_optimize( PyTuple_SET_ITEM(co_const_final, x, Py_NewRef(PyList_GET_ITEM(co_const_copy, x))); } - + Py_SETREF(co->co_consts, co_const_final); Py_XDECREF(co_const_copy); memcpy(trace, temp_writebuffer, buffer_trace_len * sizeof(_PyUOpInstruction)); From b6eeb25d11717c46c87516b58a10538a3af4bc95 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 10 Aug 2023 02:52:28 +0800 Subject: [PATCH 26/48] fix test, refactor, bugfix --- Lib/test/test_capi/test_misc.py | 4 +- Python/optimizer_analysis.c | 220 ++++++++++---------------------- 2 files changed, 69 insertions(+), 155 deletions(-) diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 26b0501d067a20..e4ecb8128eea1b 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2729,8 +2729,8 @@ def testfunc(loops): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) self.assertEqual(res, 9) - binop_count = [opname == "_BINARY_OP_ADD_INT" for opname, _, _ in ex] - self.assertEqual(binop_count, 1) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 1) if __name__ == "__main__": unittest.main() diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index f810b160527f3f..0baf527f767dd9 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -15,6 +15,9 @@ #define PARTITION_DEBUG 1 +#define STATIC 0 +#define DYNAMIC 1 + // TYPENODE is a tagged pointer that uses the last 2 LSB as the tag #define _Py_PARTITIONNODE_t uintptr_t @@ -417,6 +420,57 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, #if PARTITION_DEBUG +void +print_ctx_node(_Py_UOpsAbstractInterpContext *ctx, int i, bool is_printing_stack, int nstack_use, int nstack) +{ + bool is_local = false; + bool is_stack = false; + + int locals_offset = -1; + int stack_offset = -1; + int parent_idx = -1; + + _Py_PARTITIONNODE_t *node = is_printing_stack ? &ctx->stack[i] : &ctx->locals[i]; + _Py_PARTITIONNODE_t tag = partitionnode_get_tag(*node); + + _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); + + if (is_printing_stack) { + fprintf(stderr, "%s", i == nstack_use - 1 ? "." : " "); + } + + if (tag == TYPE_REF) { + _Py_PARTITIONNODE_t *parent = (_Py_PARTITIONNODE_t *)(partitionnode_clear_tag(*node)); + int local_index = (int)(parent - ctx->locals); + int stack_index = (int)(parent - ctx->stack); + is_local = local_index >= 0 && local_index < ctx->locals_len; + is_stack = stack_index >= 0 && stack_index < nstack; + parent_idx = is_local + ? local_index + : is_stack + ? stack_index + : -1; + } + + + _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); + fprintf(stderr, "%s:", + ptr == NULL ? "?" : (ptr->static_or_dynamic == STATIC ? "static" : "dynamic")); + if (ptr != NULL && ptr->static_or_dynamic == STATIC) { + PyObject_Print(ptr->const_val, stderr, 0); + } + + if (tag == TYPE_REF) { + const char *wher = is_local + ? "locals" + : is_stack + ? "stack" + : "const"; + fprintf(stderr, "->%s[%d]", + wher, parent_idx); + } +} + /** * @brief Print the entries in the abstract interpreter context (along with locals). */ @@ -430,92 +484,16 @@ print_ctx(_Py_UOpsAbstractInterpContext *ctx) int nstack = ctx->stack_len; int nlocals = ctx->locals_len; - bool is_local = false; - bool is_stack = false; - - int locals_offset = -1; - int stack_offset = -1; - int parent_idx = -1; - fprintf(stderr, " Stack: %p: [", ctx->stack); for (int i = 0; i < nstack; i++) { - _Py_PARTITIONNODE_t *node = &ctx->stack[i]; - _Py_PARTITIONNODE_t tag = partitionnode_get_tag(*node); - - _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); - - fprintf(stderr, "%s", i == nstack_use ? "." : " "); - - if (tag == TYPE_REF) { - _Py_PARTITIONNODE_t *parent = (_Py_PARTITIONNODE_t *)(partitionnode_clear_tag(*node)); - int local_index = (int)(parent - ctx->locals); - int stack_index = (int)(parent - ctx->stack); - is_local = local_index >= 0 && local_index < ctx->locals_len; - is_stack = stack_index >= 0 && stack_index < nstack; - parent_idx = is_local - ? local_index - : is_stack - ? stack_index - : -1; - } - - - _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); - fprintf(stderr, "%s:", - ptr == NULL ? "?" : (ptr->static_or_dynamic == 0 ? "static" : "dynamic")); - if (ptr != NULL && ptr->static_or_dynamic == 0) { - PyObject_Print(ptr->const_val, stderr, 0); - } - - if (tag == TYPE_REF) { - const char *wher = is_local - ? "locals" - : is_stack - ? "stack" - : "const"; - fprintf(stderr, "->%s[%d]", - wher, parent_idx); - } + print_ctx_node(ctx, i, true, nstack_use, nstack); fprintf(stderr, " | "); } fprintf(stderr, "]\n"); fprintf(stderr, " Locals %p: [", locals); for (int i = 0; i < nlocals; i++) { - _Py_PARTITIONNODE_t *node = &ctx->locals[i]; - _Py_PARTITIONNODE_t tag = partitionnode_get_tag(*node); - - _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); - - if (tag == TYPE_REF) { - _Py_PARTITIONNODE_t *parent = (_Py_PARTITIONNODE_t *)(partitionnode_clear_tag(*node)); - int local_index = (int)(parent - ctx->locals); - int stack_index = (int)(parent - ctx->stack); - is_local = local_index >= 0 && local_index < ctx->locals_len; - is_stack = stack_index >= 0 && stack_index < nstack; - parent_idx = is_local - ? local_index - : is_stack - ? stack_index - : -1; - } - - _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); - fprintf(stderr, "%s:", - ptr == NULL ? "?" : (ptr->static_or_dynamic == 0 ? "static" : "dynamic")); - if (ptr != NULL && ptr->static_or_dynamic == 0) { - PyObject_Print(ptr->const_val, stderr, 0); - } - - if (tag == TYPE_REF) { - const char *wher = is_local - ? "locals" - : is_stack - ? "stack" - : "const"; - fprintf(stderr, "->%s[%d]", - wher, parent_idx); - } + print_ctx_node(ctx, i, false, nstack_use, nstack); fprintf(stderr, " | "); } fprintf(stderr, "]\n"); @@ -530,7 +508,7 @@ partitionnode_is_static(_Py_PARTITIONNODE_t *node) if (root_obj == _Py_NULL) { return false; } - return root_obj->static_or_dynamic == 0; + return root_obj->static_or_dynamic == STATIC; } // MUST BE GUARDED BY partitionnode_is_static BEFORE CALLING THIS @@ -754,6 +732,16 @@ _Py_uop_analyze_and_optimize( temp_writebuffer[buffer_trace_len] = insert; buffer_trace_len++; } +#if PARTITION_DEBUG + fprintf(stderr, "Emitting SAVE_IP\n"); +#endif + // Use the next SAVE_IP + int temp = i; + for (; trace[temp].opcode != SAVE_IP && temp < trace_len; temp++) { + } + assert(trace[temp].opcode == SAVE_IP); + temp_writebuffer[buffer_trace_len] = trace[temp]; + buffer_trace_len++; num_dynamic_operands++; } @@ -890,80 +878,6 @@ _Py_uop_analyze_and_optimize( for (int y = stack_outputs; y > 0; y--) { stack_virtual_or_real[STACK_LEVEL() - y] = !should_emit; } -// if (should_emit) { -// -// // Emit instruction -// temp_writebuffer[buffer_trace_len] = trace[i]; -// buffer_trace_len++; -// -// // Update the real abstract interpreter -// stack_pointer = ctx->real_stack_pointer; -// locals = ctx->real_locals; -// stack = ctx->real_stack; -// -// /* -// * The following are special cased: -// * @TODO: shift these to the DSL -// */ -// switch (opcode) { -//#include "abstract_interp_cases.c.h" -// // @TODO convert these to autogenerated using DSL -// case LOAD_FAST: -// case LOAD_FAST_CHECK: -// STACK_GROW(1); -// PARTITIONNODE_OVERWRITE(GETLOCAL(oparg), PEEK(1), false); -// break; -// case LOAD_FAST_AND_CLEAR: { -// STACK_GROW(1); -// PARTITIONNODE_OVERWRITE(GETLOCAL(oparg), PEEK(1), false); -// PARTITIONNODE_OVERWRITE(&PARTITIONNODE_NULLROOT, GETLOCAL(oparg), false); -// break; -// } -// case LOAD_CONST: { -// _Py_PARTITIONNODE_t value = MAKE_STATIC_ROOT(GETITEM(co->co_consts, oparg)); -// STACK_GROW(1); -// PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)value, PEEK(1), false); -// break; -// } -// case STORE_FAST: -// case STORE_FAST_MAYBE_NULL: { -// _Py_PARTITIONNODE_t *value = PEEK(1); -// PARTITIONNODE_OVERWRITE(value, GETLOCAL(oparg), false); -// STACK_SHRINK(1); -// break; -// } -// case COPY: { -// _Py_PARTITIONNODE_t *bottom = PEEK(1 + (oparg - 1)); -// STACK_GROW(1); -// PARTITIONNODE_SET(bottom, PEEK(1), false); -// break; -// } -// -// case _BINARY_OP_MULTIPLY_INT: { -// STACK_SHRINK(1); -// PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); -// break; -// -// } -// -// case _BINARY_OP_ADD_INT: { -// STACK_SHRINK(1); -// PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); -// break; -// } -// -// case _BINARY_OP_SUBTRACT_INT: { -// STACK_SHRINK(1); -// PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); -// break; -// } -// default: -// fprintf(stderr, "Unknown opcode in abstract interpreter\n"); -// Py_UNREACHABLE(); -// } -// -// ctx->real_stack_pointer = stack_pointer; -// } } assert(STACK_SIZE() >= 0); assert(buffer_trace_len <= trace_len); From d5cceb999a88f0695c54d35b533fc7de7f84b1a8 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 10 Aug 2023 21:20:34 +0800 Subject: [PATCH 27/48] re-compute jump offsets and targets --- Include/internal/pycore_uops.h | 4 +- Python/optimizer_analysis.c | 149 ++++++++++++++++++++++++++++++--- 2 files changed, 141 insertions(+), 12 deletions(-) diff --git a/Include/internal/pycore_uops.h b/Include/internal/pycore_uops.h index 59451bc7826698..d351f3353a5c6f 100644 --- a/Include/internal/pycore_uops.h +++ b/Include/internal/pycore_uops.h @@ -11,8 +11,8 @@ extern "C" { #define _Py_UOP_MAX_TRACE_LENGTH 256 typedef struct { - uint32_t opcode; - uint32_t oparg; + int32_t opcode; + int32_t oparg; uint64_t operand; // A cache entry } _PyUOpInstruction; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 0baf527f767dd9..2c16b12f62b80f 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -539,6 +539,72 @@ op_is_pure(int opcode, int oparg, _Py_PARTITIONNODE_t *locals) } } +static inline bool +op_is_jump(int opcode) +{ + return (opcode == _POP_JUMP_IF_FALSE || opcode == _POP_JUMP_IF_TRUE); +} + + +// Number the jump targets and the jump instructions with a unique (negative) ID. +// This replaces the instruction's opcode in the trace with their negative IDs. +// Aids relocation later when we need to recompute jumps after optimization passes. +static _PyUOpInstruction * +number_jumps_and_targets(_PyUOpInstruction *trace, int trace_len, int *max_id) +{ + int jump_and_target_count = 0; + int jump_and_target_id = -1; + for (int i = 0; i < trace_len; i++) { + if (op_is_jump(trace[i].opcode)) { + // 1 for the jump, 1 for its target + jump_and_target_count += 2; + } + } + + // +1 because 1-based indexing not zero based + _PyUOpInstruction *jump_id_to_instruction = PyMem_New(_PyUOpInstruction, jump_and_target_count + 1); + if (jump_id_to_instruction == NULL) { + return NULL; + } + + + for (int i = 0; i < trace_len; i++) { + if (op_is_jump(trace[i].opcode)) { + int target = trace[i].oparg; + int target_id = jump_and_target_id; + + // 1 for the jump target + assert(jump_and_target_id < 0); + // Negative opcode! + assert(trace[target].opcode > 0); + // Already assigned a jump ID + if (trace[target].opcode < 0) { + target_id = trace[target].opcode; + } + else { + // Else, assign a new jump ID. + jump_id_to_instruction[-target_id] = trace[target]; + trace[target].opcode = target_id; + jump_and_target_id--; + fprintf(stderr, "op %d oparg %d\n", jump_id_to_instruction[-target_id].opcode, jump_id_to_instruction[-target_id].oparg); + } + + // 1 for the jump + assert(jump_and_target_id < 0); + jump_id_to_instruction[-jump_and_target_id] = trace[i]; + // Negative opcode! + assert(trace[i].opcode >= 0); + trace[i].opcode = jump_and_target_id; + jump_and_target_id--; + // Point the jump to the target ID. + trace[i].oparg = target_id; + + } + } + *max_id = jump_and_target_id; + return jump_id_to_instruction; +} + // Remove contiguous SAVE_IPs, leaving only the last one before a non-SAVE_IP instruction. static int remove_duplicate_save_ips(_PyUOpInstruction *trace, int trace_len) @@ -570,20 +636,51 @@ remove_duplicate_save_ips(_PyUOpInstruction *trace, int trace_len) /** * Fixes all side exits due to jumps. This MUST be called as the last * pass over the trace. Otherwise jumps will point to invalid ends. + * + * Runtime complexity of O(n*k), where n is trace length and k is number of jump + * instructions. Since k is usually quite low, this is nearly linear. */ -static int -fix_jump_side_exits(_PyUOpInstruction *trace, int trace_len) +static void +fix_jump_side_exits(_PyUOpInstruction *trace, int trace_len, + _PyUOpInstruction *jump_id_to_instruction, int max_jump_id) { for (int i = 0; i < trace_len; i++) { int oparg = trace[i].oparg; int opcode = trace[i].opcode; - switch (opcode) { - case _POP_JUMP_IF_TRUE: - case _POP_JUMP_IF_FALSE: - trace[i].oparg = trace_len - 2; + // Indicates it's a jump target or jump instruction + if (opcode < 0 && opcode > max_jump_id) { + opcode = -opcode; + int real_oparg = jump_id_to_instruction[opcode].oparg; + int real_opcode = jump_id_to_instruction[opcode].opcode; + if (op_is_jump(real_opcode)) { + trace[i].opcode = real_opcode; + + // Search for our target ID. + int target_id = oparg; + for (int x = 0; x < trace_len; x++) { + if (trace[x].opcode == target_id) { + trace[i].oparg = x; + break; + } + } + + assert(trace[i].oparg >= 0); + } + } + } + + // Final pass to swap out all the jump target IDs with their actual targets. + for (int i = 0; i < trace_len; i++) { + int oparg = trace[i].oparg; + int opcode = trace[i].opcode; + // Indicates it's a jump target or jump instruction + if (opcode < 0 && opcode > max_jump_id) { + int real_oparg = jump_id_to_instruction[-opcode].oparg; + int real_opcode = jump_id_to_instruction[-opcode].opcode; + trace[i].oparg = real_oparg; + trace[i].opcode = real_opcode; } } - return trace_len; } #ifndef Py_DEBUG @@ -631,6 +728,9 @@ _Py_uop_analyze_and_optimize( #define PARTITIONNODE_SET(src, dst, flag) partitionnode_set((src), (dst), (flag)) #define PARTITIONNODE_OVERWRITE(src, dst, flag) partitionnode_overwrite(ctx, (src), (dst), (flag)) #define MAKE_STATIC_ROOT(val) partitionnode_make_root(0, (val)) + PyObject *co_const_copy = NULL; + _PyUOpInstruction *jump_id_to_instruction = NULL; + _PyUOpInstruction *temp_writebuffer = PyMem_New(_PyUOpInstruction, trace_len); if (temp_writebuffer == NULL) { return trace_len; @@ -644,7 +744,14 @@ _Py_uop_analyze_and_optimize( return trace_len; } - PyObject *co_const_copy = PyList_New(PyTuple_Size(co->co_consts)); + int max_jump_id = 0; + jump_id_to_instruction = number_jumps_and_targets(trace, trace_len, &max_jump_id); + if (jump_id_to_instruction == NULL) { + goto abstract_error; + } + + + co_const_copy = PyList_New(PyTuple_Size(co->co_consts)); if (co_const_copy == NULL) { goto abstract_error; } @@ -664,6 +771,15 @@ _Py_uop_analyze_and_optimize( oparg = trace[i].oparg; opcode = trace[i].opcode; + // Is a special jump/target ID, decode that + if (opcode < 0 && opcode > max_jump_id) { +#if PARTITION_DEBUG + fprintf(stderr, "Special jump target/ID %d\n", opcode); +#endif + oparg = jump_id_to_instruction[-opcode].oparg; + opcode = jump_id_to_instruction[-opcode].opcode; + } + // Partial evaluation - the partition nodes already gave us the static-dynamic variable split. // For partial evaluation, we simply need to follow these rules: // 1. Operations on dynamic variables need to be emitted. @@ -878,12 +994,22 @@ _Py_uop_analyze_and_optimize( for (int y = stack_outputs; y > 0; y--) { stack_virtual_or_real[STACK_LEVEL() - y] = !should_emit; } + + if (opcode == EXIT_TRACE) { + // Copy the rest of the stubs over, then end. + for (; i < trace_len; i++) { + temp_writebuffer[buffer_trace_len] = trace[i]; + buffer_trace_len++; + } + break; + } } assert(STACK_SIZE() >= 0); - assert(buffer_trace_len <= trace_len); buffer_trace_len = remove_duplicate_save_ips(temp_writebuffer, buffer_trace_len); - buffer_trace_len = fix_jump_side_exits(temp_writebuffer, buffer_trace_len); + fix_jump_side_exits(temp_writebuffer, buffer_trace_len, jump_id_to_instruction, max_jump_id); + + assert(buffer_trace_len <= trace_len); #if PARTITION_DEBUG if (buffer_trace_len < trace_len) { @@ -906,11 +1032,14 @@ _Py_uop_analyze_and_optimize( Py_XDECREF(co_const_copy); memcpy(trace, temp_writebuffer, buffer_trace_len * sizeof(_PyUOpInstruction)); PyMem_Free(temp_writebuffer); + PyMem_Free(jump_id_to_instruction); return buffer_trace_len; abstract_error: Py_XDECREF(co_const_copy); Py_DECREF(ctx); + PyMem_Free(temp_writebuffer); + PyMem_Free(jump_id_to_instruction); assert(PyErr_Occurred()); PyErr_Clear(); return trace_len; From 8c0d65fab8e3c8f79e11e8853d3e997f17f3ac54 Mon Sep 17 00:00:00 2001 From: Jules <57632293+JuliaPoo@users.noreply.github.com> Date: Fri, 11 Aug 2023 00:28:36 +0800 Subject: [PATCH 28/48] Fix+Refactor: Extra EXIT_TRACE emitted (#42) * Fix+Refactor: Extra EXIT_TRACE emitted * Style: Removed whitespace --- Python/optimizer_analysis.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 2c16b12f62b80f..7a66300a384d32 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -848,14 +848,15 @@ _Py_uop_analyze_and_optimize( temp_writebuffer[buffer_trace_len] = insert; buffer_trace_len++; } -#if PARTITION_DEBUG - fprintf(stderr, "Emitting SAVE_IP\n"); -#endif + // Use the next SAVE_IP int temp = i; - for (; trace[temp].opcode != SAVE_IP && temp < trace_len; temp++) { - } + for (; trace[temp].opcode != SAVE_IP && temp < trace_len; temp++); assert(trace[temp].opcode == SAVE_IP); + +#if PARTITION_DEBUG + fprintf(stderr, "Emitting SAVE_IP\n"); +#endif temp_writebuffer[buffer_trace_len] = trace[temp]; buffer_trace_len++; num_dynamic_operands++; @@ -876,9 +877,10 @@ _Py_uop_analyze_and_optimize( #ifdef PARTITION_DEBUG #ifdef Py_DEBUG - fprintf(stderr, " [-] Type propagating across: %s{%d} : %d\n", + fprintf(stderr, " [-] Type propagating across: %s{%d} : %d. {reader: %d, writer: %d}\n", (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode], - opcode, oparg); + opcode, oparg, + i, buffer_trace_len); #endif #endif switch (opcode) { @@ -997,7 +999,14 @@ _Py_uop_analyze_and_optimize( if (opcode == EXIT_TRACE) { // Copy the rest of the stubs over, then end. +#if PARTITION_DEBUG + fprintf(stderr, "Exit trace encountered, emitting the rest of the stubs\n"); +#endif + i++; // We've already emitted an EXIT_TRACE for (; i < trace_len; i++) { +#if PARTITION_DEBUG + fprintf(stderr, "Emitting %s\n", (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode]); +#endif temp_writebuffer[buffer_trace_len] = trace[i]; buffer_trace_len++; } @@ -1005,10 +1014,8 @@ _Py_uop_analyze_and_optimize( } } assert(STACK_SIZE() >= 0); - buffer_trace_len = remove_duplicate_save_ips(temp_writebuffer, buffer_trace_len); fix_jump_side_exits(temp_writebuffer, buffer_trace_len, jump_id_to_instruction, max_jump_id); - assert(buffer_trace_len <= trace_len); #if PARTITION_DEBUG From 95db909ae151949710bbe5b06ddc7c0a51491c4a Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 11 Aug 2023 01:28:22 +0800 Subject: [PATCH 29/48] fix: overallocate buffer and virtual/real stack offset calculation --- Python/optimizer_analysis.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 7a66300a384d32..09b05fc3591ddf 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -18,6 +18,8 @@ #define STATIC 0 #define DYNAMIC 1 +#define OVERALLOCATE_FACTOR 2 + // TYPENODE is a tagged pointer that uses the last 2 LSB as the tag #define _Py_PARTITIONNODE_t uintptr_t @@ -731,7 +733,7 @@ _Py_uop_analyze_and_optimize( PyObject *co_const_copy = NULL; _PyUOpInstruction *jump_id_to_instruction = NULL; - _PyUOpInstruction *temp_writebuffer = PyMem_New(_PyUOpInstruction, trace_len); + _PyUOpInstruction *temp_writebuffer = PyMem_New(_PyUOpInstruction, trace_len * OVERALLOCATE_FACTOR); if (temp_writebuffer == NULL) { return trace_len; } @@ -836,11 +838,13 @@ _Py_uop_analyze_and_optimize( // INSERT to the correct position in the stack - int offset_from_target = x - num_dynamic_operands - 1; + int target_entry = virtual_stack_size - x; + + int offset_from_target = real_stack_size - target_entry; if (offset_from_target > 0) { _PyUOpInstruction insert; insert.opcode = INSERT; - insert.oparg = -offset_from_target; + insert.oparg = offset_from_target; #if PARTITION_DEBUG fprintf(stderr, "Emitting INSERT %d\n", offset_from_target); From 1e05ef8aaed758e6913a23dddd6bb8420f6191b3 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 11 Aug 2023 01:29:25 +0800 Subject: [PATCH 30/48] more bugfix --- Python/optimizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 4e3ec2953cb17b..ff306b6e5b2482 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -698,12 +698,12 @@ uop_optimize( return trace_length; } OBJECT_STAT_INC(optimization_traces_created); + trace_length = _Py_uop_analyze_and_optimize(code, trace, trace_length, curr_stackentries); _PyUOpExecutorObject *executor = PyObject_NewVar(_PyUOpExecutorObject, &UOpExecutor_Type, trace_length); if (executor == NULL) { return -1; } executor->base.execute = _PyUopExecute; - trace_length = _Py_uop_analyze_and_optimize(code, trace, trace_length, curr_stackentries); memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction)); *exec_ptr = (_PyExecutorObject *)executor; return 1; From 4d7abc7422a6a214ff8386bd7015e4d804be9d08 Mon Sep 17 00:00:00 2001 From: Jules <57632293+JuliaPoo@users.noreply.github.com> Date: Fri, 11 Aug 2023 15:11:27 +0800 Subject: [PATCH 31/48] Perf+Cleanup: Removed temporary allocation in `remove_duplicate_save_ips` (#43) * Cleanup: Removed warnings from ubuntu build * Perf: Removed temporary allocation in `remove_duplicate_save_ips` --- Python/optimizer_analysis.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 09b05fc3591ddf..44d8b603991dc2 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -338,7 +338,7 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, for (int i = 0; i < nlocals; i++) { _Py_PARTITIONNODE_t *node_ptr = &(ctx->locals[i]); if (*node_ptr == child_test) { - if (new_root == (_Py_PARTITIONNODE_t)NULL) { + if (new_root == NULL) { // First child encountered! initialise root new_root = node_ptr; *node_ptr = old_dst; @@ -356,7 +356,7 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, for (int i = 0; i < nstack; i++) { _Py_PARTITIONNODE_t *node_ptr = &(ctx->stack[i]); if (*node_ptr == child_test) { - if (new_root == (_Py_PARTITIONNODE_t)NULL) { + if (new_root == NULL) { // First child encountered! initialise root new_root = node_ptr; *node_ptr = old_dst; @@ -428,8 +428,6 @@ print_ctx_node(_Py_UOpsAbstractInterpContext *ctx, int i, bool is_printing_stack bool is_local = false; bool is_stack = false; - int locals_offset = -1; - int stack_offset = -1; int parent_idx = -1; _Py_PARTITIONNODE_t *node = is_printing_stack ? &ctx->stack[i] : &ctx->locals[i]; @@ -611,11 +609,9 @@ number_jumps_and_targets(_PyUOpInstruction *trace, int trace_len, int *max_id) static int remove_duplicate_save_ips(_PyUOpInstruction *trace, int trace_len) { - _PyUOpInstruction *temp_trace = PyMem_New(_PyUOpInstruction, trace_len); - if (temp_trace == NULL) { - return trace_len; - } - int temp_trace_len = 0; + // Don't have to allocate a temporary trace array + // because the writer is guaranteed to be behind the reader. + int new_temp_len = 0; _PyUOpInstruction curr; for (int i = 0; i < trace_len; i++) { @@ -623,16 +619,14 @@ remove_duplicate_save_ips(_PyUOpInstruction *trace, int trace_len) if (i < trace_len && curr.opcode == SAVE_IP && trace[i+1].opcode == SAVE_IP) { continue; } - temp_trace[temp_trace_len] = curr; - temp_trace_len++; + trace[new_temp_len] = curr; + new_temp_len++; } - memcpy(trace, temp_trace, temp_trace_len * sizeof(_PyUOpInstruction)); - PyMem_Free(temp_trace); #if PARTITION_DEBUG - fprintf(stderr, "Removed %d SAVE_IPs\n", trace_len - temp_trace_len); + fprintf(stderr, "Removed %d SAVE_IPs\n", trace_len - new_temp_len); #endif - return temp_trace_len; + return new_temp_len; } /** @@ -652,7 +646,6 @@ fix_jump_side_exits(_PyUOpInstruction *trace, int trace_len, // Indicates it's a jump target or jump instruction if (opcode < 0 && opcode > max_jump_id) { opcode = -opcode; - int real_oparg = jump_id_to_instruction[opcode].oparg; int real_opcode = jump_id_to_instruction[opcode].opcode; if (op_is_jump(real_opcode)) { trace[i].opcode = real_opcode; @@ -673,7 +666,6 @@ fix_jump_side_exits(_PyUOpInstruction *trace, int trace_len, // Final pass to swap out all the jump target IDs with their actual targets. for (int i = 0; i < trace_len; i++) { - int oparg = trace[i].oparg; int opcode = trace[i].opcode; // Indicates it's a jump target or jump instruction if (opcode < 0 && opcode > max_jump_id) { From 3d76f9a66666a956073e2c109ca9f7db2a6b46a6 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 11 Aug 2023 23:47:34 +0800 Subject: [PATCH 32/48] clean up code --- Python/abstract_interp_cases.c.h | 21 +++-- Python/optimizer_analysis.c | 122 ++++++++++++++++++------------ Tools/cases_generator/stacking.py | 12 +-- 3 files changed, 89 insertions(+), 66 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 6e8448d3dc4412..3aa267d37f28a2 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -132,6 +132,12 @@ break; } + case BINARY_SUBSCR_STR_INT: { + STACK_SHRINK(1); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + break; + } + case BINARY_SUBSCR_TUPLE_INT: { STACK_SHRINK(1); PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); @@ -291,7 +297,7 @@ STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-(oparg & 1 ? 1 : 0))), true); break; } @@ -307,7 +313,7 @@ STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-(oparg & 1 ? 1 : 0))), true); break; } @@ -315,7 +321,7 @@ STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-(oparg & 1 ? 1 : 0))), true); break; } @@ -419,9 +425,8 @@ case LOAD_SUPER_ATTR_ATTR: { STACK_SHRINK(2); - STACK_GROW(((oparg & 1) ? 1 : 0)); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (0 ? 1 : 0))), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-(0 ? 1 : 0))), true); break; } @@ -435,7 +440,7 @@ case LOAD_ATTR: { STACK_GROW(((oparg & 1) ? 1 : 0)); PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-(oparg & 1 ? 1 : 0))), true); break; } @@ -450,7 +455,7 @@ case _LOAD_ATTR_INSTANCE_VALUE: { STACK_GROW(((oparg & 1) ? 1 : 0)); PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-(oparg & 1 ? 1 : 0))), true); break; } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 09b05fc3591ddf..6c2421efbc9764 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -20,6 +20,13 @@ #define OVERALLOCATE_FACTOR 2 +#ifdef Py_DEBUG +#define DPRINTF(level, ...) \ + if (lltrace >= (level)) { printf(__VA_ARGS__); } +#else +#define DPRINTF(level, ...) +#endif + // TYPENODE is a tagged pointer that uses the last 2 LSB as the tag #define _Py_PARTITIONNODE_t uintptr_t @@ -420,11 +427,17 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, } } -#if PARTITION_DEBUG +#ifdef Py_DEBUG void print_ctx_node(_Py_UOpsAbstractInterpContext *ctx, int i, bool is_printing_stack, int nstack_use, int nstack) { + char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } + bool is_local = false; bool is_stack = false; @@ -438,7 +451,7 @@ print_ctx_node(_Py_UOpsAbstractInterpContext *ctx, int i, bool is_printing_stack _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); if (is_printing_stack) { - fprintf(stderr, "%s", i == nstack_use - 1 ? "." : " "); + DPRINTF(3, "%s", i == nstack_use - 1 ? "." : " "); } if (tag == TYPE_REF) { @@ -456,10 +469,10 @@ print_ctx_node(_Py_UOpsAbstractInterpContext *ctx, int i, bool is_printing_stack _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); - fprintf(stderr, "%s:", + DPRINTF(3, "%s:", ptr == NULL ? "?" : (ptr->static_or_dynamic == STATIC ? "static" : "dynamic")); - if (ptr != NULL && ptr->static_or_dynamic == STATIC) { - PyObject_Print(ptr->const_val, stderr, 0); + if (lltrace >= 4 && ptr != NULL && ptr->static_or_dynamic == STATIC) { + PyObject_Print(ptr->const_val, stdout, 0); } if (tag == TYPE_REF) { @@ -468,8 +481,7 @@ print_ctx_node(_Py_UOpsAbstractInterpContext *ctx, int i, bool is_printing_stack : is_stack ? "stack" : "const"; - fprintf(stderr, "->%s[%d]", - wher, parent_idx); + DPRINTF(3, "->%s[%d]", wher, parent_idx); } } @@ -479,6 +491,12 @@ print_ctx_node(_Py_UOpsAbstractInterpContext *ctx, int i, bool is_printing_stack static void print_ctx(_Py_UOpsAbstractInterpContext *ctx) { + char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } + _Py_PARTITIONNODE_t *locals = ctx->locals; _Py_PARTITIONNODE_t *stackptr = ctx->stack_pointer; @@ -486,19 +504,19 @@ print_ctx(_Py_UOpsAbstractInterpContext *ctx) int nstack = ctx->stack_len; int nlocals = ctx->locals_len; - fprintf(stderr, " Stack: %p: [", ctx->stack); + DPRINTF(3, " Stack: %p: [", ctx->stack); for (int i = 0; i < nstack; i++) { print_ctx_node(ctx, i, true, nstack_use, nstack); - fprintf(stderr, " | "); + DPRINTF(3, " | "); } - fprintf(stderr, "]\n"); + DPRINTF(3, "]\n"); - fprintf(stderr, " Locals %p: [", locals); + DPRINTF(3, " Locals %p: [", locals); for (int i = 0; i < nlocals; i++) { print_ctx_node(ctx, i, false, nstack_use, nstack); - fprintf(stderr, " | "); + DPRINTF(3, " | "); } - fprintf(stderr, "]\n"); + DPRINTF(3, "]\n"); } #endif @@ -588,7 +606,6 @@ number_jumps_and_targets(_PyUOpInstruction *trace, int trace_len, int *max_id) jump_id_to_instruction[-target_id] = trace[target]; trace[target].opcode = target_id; jump_and_target_id--; - fprintf(stderr, "op %d oparg %d\n", jump_id_to_instruction[-target_id].opcode, jump_id_to_instruction[-target_id].oparg); } // 1 for the jump @@ -611,6 +628,14 @@ number_jumps_and_targets(_PyUOpInstruction *trace, int trace_len, int *max_id) static int remove_duplicate_save_ips(_PyUOpInstruction *trace, int trace_len) { +#ifdef Py_DEBUG + char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } +#endif + _PyUOpInstruction *temp_trace = PyMem_New(_PyUOpInstruction, trace_len); if (temp_trace == NULL) { return trace_len; @@ -629,9 +654,7 @@ remove_duplicate_save_ips(_PyUOpInstruction *trace, int trace_len) memcpy(trace, temp_trace, temp_trace_len * sizeof(_PyUOpInstruction)); PyMem_Free(temp_trace); -#if PARTITION_DEBUG - fprintf(stderr, "Removed %d SAVE_IPs\n", trace_len - temp_trace_len); -#endif + DPRINTF(3, "Removed %d SAVE_IPs\n", trace_len - temp_trace_len); return temp_trace_len; } @@ -730,6 +753,14 @@ _Py_uop_analyze_and_optimize( #define PARTITIONNODE_SET(src, dst, flag) partitionnode_set((src), (dst), (flag)) #define PARTITIONNODE_OVERWRITE(src, dst, flag) partitionnode_overwrite(ctx, (src), (dst), (flag)) #define MAKE_STATIC_ROOT(val) partitionnode_make_root(0, (val)) +#ifdef Py_DEBUG + char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } +#endif + PyObject *co_const_copy = NULL; _PyUOpInstruction *jump_id_to_instruction = NULL; @@ -740,7 +771,8 @@ _Py_uop_analyze_and_optimize( int buffer_trace_len = 0; - _Py_UOpsAbstractInterpContext *ctx = _Py_UOpsAbstractInterpContext_New(co->co_stacksize, co->co_nlocals, curr_stacklen); + _Py_UOpsAbstractInterpContext *ctx = _Py_UOpsAbstractInterpContext_New( + co->co_stacksize, co->co_nlocals, curr_stacklen); if (ctx == NULL) { PyMem_Free(temp_writebuffer); return trace_len; @@ -775,9 +807,7 @@ _Py_uop_analyze_and_optimize( // Is a special jump/target ID, decode that if (opcode < 0 && opcode > max_jump_id) { -#if PARTITION_DEBUG - fprintf(stderr, "Special jump target/ID %d\n", opcode); -#endif + DPRINTF(2, "Special jump target/ID %d\n", opcode); oparg = jump_id_to_instruction[-opcode].oparg; opcode = jump_id_to_instruction[-opcode].opcode; } @@ -830,9 +860,8 @@ _Py_uop_analyze_and_optimize( goto abstract_error; } -#if PARTITION_DEBUG - fprintf(stderr, "Emitting LOAD_CONST\n"); -#endif + DPRINTF(2, "Emitting LOAD_CONST\n"); + temp_writebuffer[buffer_trace_len] = load_const; buffer_trace_len++; @@ -846,9 +875,8 @@ _Py_uop_analyze_and_optimize( insert.opcode = INSERT; insert.oparg = offset_from_target; -#if PARTITION_DEBUG - fprintf(stderr, "Emitting INSERT %d\n", offset_from_target); -#endif + DPRINTF(2, "Emitting INSERT %d\n", offset_from_target); + temp_writebuffer[buffer_trace_len] = insert; buffer_trace_len++; } @@ -858,9 +886,8 @@ _Py_uop_analyze_and_optimize( for (; trace[temp].opcode != SAVE_IP && temp < trace_len; temp++); assert(trace[temp].opcode == SAVE_IP); -#if PARTITION_DEBUG - fprintf(stderr, "Emitting SAVE_IP\n"); -#endif + DPRINTF(2, "Emitting SAVE_IP\n"); + temp_writebuffer[buffer_trace_len] = trace[temp]; buffer_trace_len++; num_dynamic_operands++; @@ -868,9 +895,9 @@ _Py_uop_analyze_and_optimize( } } -#if PARTITION_DEBUG - fprintf(stderr, "Emitting %s\n", (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode]); -#endif + + DPRINTF(2, "Emitting %s\n", (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode]); + temp_writebuffer[buffer_trace_len] = trace[i]; buffer_trace_len++; } @@ -879,14 +906,12 @@ _Py_uop_analyze_and_optimize( * @TODO: shift these to the DSL */ -#ifdef PARTITION_DEBUG -#ifdef Py_DEBUG - fprintf(stderr, " [-] Type propagating across: %s{%d} : %d. {reader: %d, writer: %d}\n", + + DPRINTF(2, " [-] Type propagating across: %s{%d} : %d. {reader: %d, writer: %d}\n", (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode], opcode, oparg, i, buffer_trace_len); -#endif -#endif + switch (opcode) { #include "abstract_interp_cases.c.h" // @TODO convert these to autogenerated using DSL @@ -987,11 +1012,11 @@ _Py_uop_analyze_and_optimize( } } default: - fprintf(stderr, "Unknown opcode in abstract interpreter\n"); + DPRINTF(1, "Unknown opcode in abstract interpreter\n"); Py_UNREACHABLE(); } -#if PARTITION_DEBUG +#ifdef Py_DEBUG print_ctx(ctx); #endif @@ -1003,14 +1028,14 @@ _Py_uop_analyze_and_optimize( if (opcode == EXIT_TRACE) { // Copy the rest of the stubs over, then end. -#if PARTITION_DEBUG - fprintf(stderr, "Exit trace encountered, emitting the rest of the stubs\n"); -#endif + + DPRINTF(2, "Exit trace encountered, emitting the rest of the stubs\n"); + i++; // We've already emitted an EXIT_TRACE for (; i < trace_len; i++) { -#if PARTITION_DEBUG - fprintf(stderr, "Emitting %s\n", (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode]); -#endif + + DPRINTF(2, "Emitting %s\n", (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode]); + temp_writebuffer[buffer_trace_len] = trace[i]; buffer_trace_len++; } @@ -1022,11 +1047,12 @@ _Py_uop_analyze_and_optimize( fix_jump_side_exits(temp_writebuffer, buffer_trace_len, jump_id_to_instruction, max_jump_id); assert(buffer_trace_len <= trace_len); -#if PARTITION_DEBUG +#ifdef Py_DEBUG if (buffer_trace_len < trace_len) { - fprintf(stderr, "Shortened trace by %d instructions\n", trace_len - buffer_trace_len); + DPRINTF(2, "Shortened trace by %d instructions\n", trace_len - buffer_trace_len); } #endif + Py_DECREF(ctx); PyObject *co_const_final = PyTuple_New(PyList_Size(co_const_copy)); diff --git a/Tools/cases_generator/stacking.py b/Tools/cases_generator/stacking.py index 50294039af8f48..5277d72b53ee36 100644 --- a/Tools/cases_generator/stacking.py +++ b/Tools/cases_generator/stacking.py @@ -418,13 +418,5 @@ def _write_components_for_abstract_interp( # NULL out the output stack effects for poke in mgr.pokes: if not poke.effect.size and poke.effect.name not in mgr.instr.unmoved_names: - out.emit(f"PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-({poke.offset.as_index()})), true);") - # out.assign( - # StackEffect( - # poke.as_variable(), - # poke.effect.type, - # poke.effect.cond, - # poke.effect.size, - # ), - # StackEffect("partitionnode_nullroot()"), - # ) + out.emit(f"PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)" + f"PARTITIONNODE_NULLROOT, PEEK(-({poke.offset.as_index()})), true);") From 9a5a3f7a968b34fd3168d944a427de38e41cf78c Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 12 Aug 2023 00:40:59 +0800 Subject: [PATCH 33/48] make static --- Python/optimizer_analysis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index db53cb3930344d..7c6fac7e17d975 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -429,7 +429,7 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, #ifdef Py_DEBUG -void +static void print_ctx_node(_Py_UOpsAbstractInterpContext *ctx, int i, bool is_printing_stack, int nstack_use, int nstack) { char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); From df490d0c0e9cc33e43ea215f024a283069c209ca Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 12 Aug 2023 02:27:39 +0800 Subject: [PATCH 34/48] make types static --- Python/optimizer_analysis.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 7c6fac7e17d975..3081fce43907f4 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -58,7 +58,7 @@ partitionnode_dealloc(PyObject *o) Py_TYPE(self)->tp_free(o); } -PyTypeObject _Py_PartitionRootNode_Type = { +static PyTypeObject _Py_PartitionRootNode_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "uops abstract interpreter's root node", .tp_basicsize = sizeof(_Py_PartitionRootNode), @@ -143,7 +143,7 @@ abstractinterp_dealloc(PyObject *o) Py_TYPE(self)->tp_free((PyObject *)self); } -PyTypeObject _Py_UOpsAbstractInterpContext_Type = { +static PyTypeObject _Py_UOpsAbstractInterpContext_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "uops abstract interpreter's context", .tp_basicsize = sizeof(_Py_UOpsAbstractInterpContext), From 1e4fc94beaf4f6a133345077dc13d99f88539b99 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 12 Aug 2023 03:23:02 +0800 Subject: [PATCH 35/48] make const and ignore in c analyzer --- Python/optimizer_analysis.c | 2 +- Tools/c-analyzer/cpython/ignored.tsv | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 3081fce43907f4..fbbf1864853a04 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -102,7 +102,7 @@ partitionnode_make_ref(_Py_PARTITIONNODE_t *node) } -static _Py_PARTITIONNODE_t PARTITIONNODE_NULLROOT = (_Py_PARTITIONNODE_t)_Py_NULL | TYPE_ROOT; +static const _Py_PARTITIONNODE_t PARTITIONNODE_NULLROOT = (_Py_PARTITIONNODE_t)_Py_NULL | TYPE_ROOT; // Tier 2 types meta interpreter typedef struct _Py_UOpsAbstractInterpContext { diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 66815c72ffbc63..bad1156e6607fd 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -715,3 +715,5 @@ Modules/expat/xmlrole.c - error - ## other Modules/_io/_iomodule.c - _PyIO_Module - Modules/_sqlite/module.c - _sqlite3module - +Python/optimizer_analysis.c - _Py_PartitionRootNode_Type +Python/optimizer_analysis.c - _Py_UOpsAbstractInterpContext_Type From 6de77a7de45fc491bc91f539cc7370da684b6b24 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 12 Aug 2023 03:37:23 +0800 Subject: [PATCH 36/48] fix c-analyzer ignored list --- Tools/c-analyzer/cpython/ignored.tsv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index bad1156e6607fd..706fb1062770b1 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -715,5 +715,5 @@ Modules/expat/xmlrole.c - error - ## other Modules/_io/_iomodule.c - _PyIO_Module - Modules/_sqlite/module.c - _sqlite3module - -Python/optimizer_analysis.c - _Py_PartitionRootNode_Type -Python/optimizer_analysis.c - _Py_UOpsAbstractInterpContext_Type +Python/optimizer_analysis.c - _Py_PartitionRootNode_Type - +Python/optimizer_analysis.c - _Py_UOpsAbstractInterpContext_Type - From a11fc80374b2c788141b05f20d097fc7d845f607 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 14 Aug 2023 04:47:27 +0800 Subject: [PATCH 37/48] more cleanup --- Python/optimizer_analysis.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index fbbf1864853a04..142d9f519022fe 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -118,11 +118,6 @@ typedef struct _Py_UOpsAbstractInterpContext { // Indicates whether the stack entry is real or virtualised. // true - virtual false - real bool *stack_virtual_or_real; - // The following represent the real (emitted instructions) stack and locals. - // points to one element after the abstract stack - _Py_PARTITIONNODE_t *real_stack_pointer; - _Py_PARTITIONNODE_t *real_stack; - _Py_PARTITIONNODE_t *real_locals; } _Py_UOpsAbstractInterpContext; static void @@ -165,8 +160,7 @@ _Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stackl self->stack_len = stack_len; self->locals_len = locals_len; - // Double the size needed because we also need a representation for the real stack and locals. - _Py_PARTITIONNODE_t *locals_with_stack = PyMem_New(_Py_PARTITIONNODE_t, (locals_len + stack_len) * 2); + _Py_PARTITIONNODE_t *locals_with_stack = PyMem_New(_Py_PARTITIONNODE_t, (locals_len + stack_len)); if (locals_with_stack == NULL) { Py_DECREF(self); return NULL; @@ -183,15 +177,16 @@ _Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stackl locals_with_stack[i] = PARTITIONNODE_NULLROOT; } + for (int i = 0; i < stack_len; i++) { + virtual_or_real[i] = false; + } + self->locals = locals_with_stack; self->stack = locals_with_stack + locals_len; self->stack_pointer = self->stack + curr_stacklen; self->stack_virtual_or_real = virtual_or_real; - self->real_locals = self->locals + locals_len + stack_len; - self->real_stack = self->stack + locals_len + stack_len; - self->real_stack_pointer = self->stack_pointer + locals_len + stack_len; return self; } @@ -778,7 +773,7 @@ _Py_uop_analyze_and_optimize( goto abstract_error; } - + // We will be adding more constants due to partial evaluation. co_const_copy = PyList_New(PyTuple_Size(co->co_consts)); if (co_const_copy == NULL) { goto abstract_error; From 56c62eb4c515900f55a76e9c6093263010227a75 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 14 Aug 2023 05:02:51 +0800 Subject: [PATCH 38/48] regen files --- Python/abstract_interp_cases.c.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 3aa267d37f28a2..ed78ef148faf40 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -425,8 +425,8 @@ case LOAD_SUPER_ATTR_ATTR: { STACK_SHRINK(2); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (0 ? 1 : 0))), true); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-(0 ? 1 : 0))), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(0)), true); break; } From 3c08ebeed2dc7ac6f8751faed8a1aa778376f2c7 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 14 Aug 2023 19:56:04 +0800 Subject: [PATCH 39/48] address review --- Include/internal/pycore_uops.h | 2 +- Lib/test/test_capi/test_misc.py | 1 + Makefile.pre.in | 2 ++ Python/abstract_interp_cases.c.h | 1 + Python/bytecodes.c | 3 +-- Python/executor_cases.c.h | 7 ++++++- Python/optimizer_analysis.c | 2 +- Tools/cases_generator/generate_cases.py | 9 +++------ Tools/cases_generator/instructions.py | 9 --------- 9 files changed, 16 insertions(+), 20 deletions(-) diff --git a/Include/internal/pycore_uops.h b/Include/internal/pycore_uops.h index d351f3353a5c6f..30b87e43a3f5d5 100644 --- a/Include/internal/pycore_uops.h +++ b/Include/internal/pycore_uops.h @@ -8,7 +8,7 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif -#define _Py_UOP_MAX_TRACE_LENGTH 256 +#define _Py_UOP_MAX_TRACE_LENGTH 64 typedef struct { int32_t opcode; diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 5f0de8b47115a7..836bebf3208db1 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2643,5 +2643,6 @@ def testfunc(loops): binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] self.assertEqual(len(binop_count), 1) + if __name__ == "__main__": unittest.main() diff --git a/Makefile.pre.in b/Makefile.pre.in index f931020ee49ca3..1c5912105f9941 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1553,10 +1553,12 @@ regen-cases: -m $(srcdir)/Include/internal/pycore_opcode_metadata.h.new \ -e $(srcdir)/Python/executor_cases.c.h.new \ -p $(srcdir)/Lib/_opcode_metadata.py.new \ + -a $(srcdir)/Python/abstract_interp_cases.c.h.new \ $(srcdir)/Python/bytecodes.c $(UPDATE_FILE) $(srcdir)/Python/generated_cases.c.h $(srcdir)/Python/generated_cases.c.h.new $(UPDATE_FILE) $(srcdir)/Include/internal/pycore_opcode_metadata.h $(srcdir)/Include/internal/pycore_opcode_metadata.h.new $(UPDATE_FILE) $(srcdir)/Python/executor_cases.c.h $(srcdir)/Python/executor_cases.c.h.new + $(UPDATE_FILE) $(srcdir)/Python/abstract_interp_cases.c.h $(srcdir)/Python/abstract_interp_cases.c.h.new $(UPDATE_FILE) $(srcdir)/Lib/_opcode_metadata.py $(srcdir)/Lib/_opcode_metadata.py.new Python/compile.o: $(srcdir)/Include/internal/pycore_opcode_metadata.h diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index ed78ef148faf40..6bfcf534646b1e 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -756,5 +756,6 @@ } case INSERT: { + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - oparg)), true); break; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 7566fcdf64c908..12c5948af7a8e7 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3743,13 +3743,12 @@ dummy_func( return frame; } - op(INSERT, (--)) { + op(INSERT, (stuff[oparg], top -- top, stuff[oparg])) { // Inserts TOS at position specified by oparg PyObject *tos = TOP(); for (int i = 1; i < oparg + 1; i++) { stack_pointer[i] = stack_pointer[i - 1]; } - POKE(oparg, tos); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 03b0f659c2cf41..0e9b001b422eb6 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2735,11 +2735,16 @@ } case INSERT: { + PyObject *top; + PyObject **stuff; + top = stack_pointer[-1]; + stuff = stack_pointer - 1 - oparg; + stuff = stack_pointer - oparg; // Inserts TOS at position specified by oparg PyObject *tos = TOP(); for (int i = 1; i < oparg + 1; i++) { stack_pointer[i] = stack_pointer[i - 1]; } - POKE(oparg, tos); + stack_pointer[-1 - oparg] = top; break; } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 142d9f519022fe..5cc9312ca1165d 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -173,7 +173,7 @@ _Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stackl return NULL; } - for (int i = 0; i < (locals_len + stack_len) * 2; i++) { + for (int i = 0; i < (locals_len + stack_len); i++) { locals_with_stack[i] = PARTITIONNODE_NULLROOT; } diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index ca7f67ca1f6627..3b0e21f245b386 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -61,7 +61,7 @@ INSTR_FMT_PREFIX = "INSTR_FMT_" -# @TODO generate all these after updating the DSL +# TODO: generate all these after updating the DSL SPECIALLY_HANDLED_ABSTRACT_INSTR = { "LOAD_FAST", "LOAD_FAST_CHECK", @@ -135,7 +135,7 @@ def effect_str(effects: list[StackEffect]) -> str: pushed: str | None match thing: case parsing.InstDef(): - if thing.kind != "op" or (thing.kind != "inst" and self.instrs[thing.name].is_viable_uop()): + if thing.kind == "instr" or self.instrs[thing.name].is_viable_uop(): instr = self.instrs[thing.name] popped = effect_str(instr.input_effects) pushed = effect_str(instr.output_effects) @@ -641,8 +641,7 @@ def write_abstract_interpreter_instructions( for thing in self.everything: match thing: case OverriddenInstructionPlaceHolder(): - # TODO: Is this helpful? - self.write_overridden_instr_place_holder(thing) + pass case parsing.InstDef(): instr = AbstractInstruction(self.instrs[thing.name].inst) if instr.is_viable_uop() and instr.name not in SPECIALLY_HANDLED_ABSTRACT_INSTR: @@ -650,8 +649,6 @@ def write_abstract_interpreter_instructions( with self.out.block(f"case {thing.name}:"): instr.write(self.out, tier=TIER_TWO) self.out.emit("break;") - # elif instr.kind != "op": - # print(f"NOTE: {thing.name} is not a viable uop") case parsing.Macro(): pass case parsing.Pseudo(): diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index a10e8f41ab67db..a505df08fa265b 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -255,15 +255,6 @@ def __init__(self, inst: parsing.InstDef): def write(self, out: Formatter, tier: Tiers = TIER_ONE) -> None: """Write one abstract instruction, sans prologue and epilogue.""" - # Write a static assertion that a family's cache size is correct - if family := self.family: - if self.name == family.name: - if cache_size := family.size: - out.emit( - f"static_assert({cache_size} == " - f'{self.cache_offset}, "incorrect cache size");' - ) - stacking.write_single_instr_for_abstract_interp(self, out) def write_body( From d5f16be5d5fe7b6d2294b103a7fcdef59fc667c0 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 14 Aug 2023 19:56:36 +0800 Subject: [PATCH 40/48] regen --- Include/internal/pycore_opcode_metadata.h | 304 +--------------------- 1 file changed, 2 insertions(+), 302 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 86df531b161105..5c02dff63b0a3a 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -65,10 +65,6 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { switch(opcode) { case NOP: return 0; - case RESUME: - return 0; - case INSTRUMENTED_RESUME: - return 0; case LOAD_CLOSURE: return 0; case LOAD_FAST_CHECK: @@ -77,30 +73,20 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case LOAD_FAST_AND_CLEAR: return 0; - case LOAD_FAST_LOAD_FAST: - return 0; case LOAD_CONST: return 0; case STORE_FAST: return 1; case STORE_FAST_MAYBE_NULL: return 1; - case STORE_FAST_LOAD_FAST: - return 1; - case STORE_FAST_STORE_FAST: - return 2; case POP_TOP: return 1; case PUSH_NULL: return 0; case END_FOR: return 2; - case INSTRUMENTED_END_FOR: - return 2; case END_SEND: return 2; - case INSTRUMENTED_END_SEND: - return 2; case UNARY_NEGATIVE: return 1; case UNARY_NOT: @@ -171,8 +157,6 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 2; case BINARY_SUBSCR_DICT: return 2; - case BINARY_SUBSCR_GETITEM: - return 2; case LIST_APPEND: return (oparg-1) + 2; case SET_ADD: @@ -189,40 +173,14 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case CALL_INTRINSIC_2: return 2; - case RAISE_VARARGS: - return oparg; - case INTERPRETER_EXIT: - return 1; - case RETURN_VALUE: - return 1; - case INSTRUMENTED_RETURN_VALUE: - return 1; - case RETURN_CONST: - return 0; - case INSTRUMENTED_RETURN_CONST: - return 0; case GET_AITER: return 1; case GET_ANEXT: return 1; case GET_AWAITABLE: return 1; - case SEND: - return 2; - case SEND_GEN: - return 2; - case INSTRUMENTED_YIELD_VALUE: - return 1; - case YIELD_VALUE: - return 1; case POP_EXCEPT: return 1; - case RERAISE: - return oparg + 1; - case END_ASYNC_FOR: - return 2; - case CLEANUP_THROW: - return 3; case LOAD_ASSERTION_ERROR: return 0; case LOAD_BUILD_CLASS: @@ -275,8 +233,6 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case DELETE_FAST: return 0; - case MAKE_CELL: - return 0; case DELETE_DEREF: return 0; case LOAD_FROM_DICT_OR_DEREF: @@ -311,10 +267,6 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return (oparg - 1) + 5; case MAP_ADD: return (oparg - 1) + 3; - case INSTRUMENTED_LOAD_SUPER_ATTR: - return 3; - case LOAD_SUPER_ATTR: - return 3; case LOAD_SUPER_METHOD: return 3; case LOAD_ZERO_SUPER_METHOD: @@ -337,24 +289,6 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case LOAD_ATTR_INSTANCE_VALUE: return 1; - case LOAD_ATTR_MODULE: - return 1; - case LOAD_ATTR_WITH_HINT: - return 1; - case LOAD_ATTR_SLOT: - return 1; - case LOAD_ATTR_CLASS: - return 1; - case LOAD_ATTR_PROPERTY: - return 1; - case LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN: - return 1; - case STORE_ATTR_INSTANCE_VALUE: - return 2; - case STORE_ATTR_WITH_HINT: - return 2; - case STORE_ATTR_SLOT: - return 2; case COMPARE_OP: return 2; case COMPARE_OP_FLOAT: @@ -371,32 +305,16 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 2; case CHECK_EXC_MATCH: return 2; - case IMPORT_NAME: - return 2; - case IMPORT_FROM: - return 1; - case JUMP_FORWARD: - return 0; - case JUMP_BACKWARD: - return 0; case JUMP: return 0; case JUMP_NO_INTERRUPT: return 0; - case ENTER_EXECUTOR: - return 0; - case POP_JUMP_IF_FALSE: - return 1; - case POP_JUMP_IF_TRUE: - return 1; case IS_NONE: return 1; case POP_JUMP_IF_NONE: return 1; case POP_JUMP_IF_NOT_NONE: return 1; - case JUMP_BACKWARD_NO_INTERRUPT: - return 0; case GET_LEN: return 1; case MATCH_CLASS: @@ -411,10 +329,6 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case GET_YIELD_FROM_ITER: return 1; - case FOR_ITER: - return 1; - case INSTRUMENTED_FOR_ITER: - return 0; case _ITER_CHECK_LIST: return 1; case _IS_ITER_EXHAUSTED_LIST: @@ -439,12 +353,6 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case FOR_ITER_RANGE: return 1; - case FOR_ITER_GEN: - return 1; - case BEFORE_ASYNC_WITH: - return 1; - case BEFORE_WITH: - return 1; case WITH_EXCEPT_START: return 4; case SETUP_FINALLY: @@ -457,70 +365,32 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case PUSH_EXC_INFO: return 1; - case LOAD_ATTR_METHOD_WITH_VALUES: - return 1; - case LOAD_ATTR_METHOD_NO_DICT: - return 1; - case LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: - return 1; - case LOAD_ATTR_NONDESCRIPTOR_NO_DICT: - return 1; - case LOAD_ATTR_METHOD_LAZY_DICT: - return 1; - case KW_NAMES: - return 0; - case INSTRUMENTED_CALL: - return 0; - case CALL: - return oparg + 2; - case CALL_BOUND_METHOD_EXACT_ARGS: - return oparg + 2; - case CALL_PY_EXACT_ARGS: - return oparg + 2; - case CALL_PY_WITH_DEFAULTS: - return oparg + 2; case CALL_NO_KW_TYPE_1: return oparg + 2; case CALL_NO_KW_STR_1: return oparg + 2; case CALL_NO_KW_TUPLE_1: return oparg + 2; - case CALL_NO_KW_ALLOC_AND_ENTER_INIT: - return oparg + 2; case EXIT_INIT_CHECK: return 1; - case CALL_BUILTIN_CLASS: - return oparg + 2; case CALL_NO_KW_BUILTIN_O: return oparg + 2; case CALL_NO_KW_BUILTIN_FAST: return oparg + 2; - case CALL_BUILTIN_FAST_WITH_KEYWORDS: - return oparg + 2; case CALL_NO_KW_LEN: return oparg + 2; case CALL_NO_KW_ISINSTANCE: return oparg + 2; - case CALL_NO_KW_LIST_APPEND: - return oparg + 2; case CALL_NO_KW_METHOD_DESCRIPTOR_O: return oparg + 2; - case CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: - return oparg + 2; case CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS: return oparg + 2; case CALL_NO_KW_METHOD_DESCRIPTOR_FAST: return oparg + 2; - case INSTRUMENTED_CALL_FUNCTION_EX: - return 0; - case CALL_FUNCTION_EX: - return ((oparg & 1) ? 1 : 0) + 3; case MAKE_FUNCTION: return 1; case SET_FUNCTION_ATTRIBUTE: return 2; - case RETURN_GENERATOR: - return 0; case BUILD_SLICE: return ((oparg == 3) ? 1 : 0) + 2; case CONVERT_VALUE: @@ -535,26 +405,6 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 2; case SWAP: return (oparg-2) + 2; - case INSTRUMENTED_INSTRUCTION: - return 0; - case INSTRUMENTED_JUMP_FORWARD: - return 0; - case INSTRUMENTED_JUMP_BACKWARD: - return 0; - case INSTRUMENTED_POP_JUMP_IF_TRUE: - return 0; - case INSTRUMENTED_POP_JUMP_IF_FALSE: - return 0; - case INSTRUMENTED_POP_JUMP_IF_NONE: - return 0; - case INSTRUMENTED_POP_JUMP_IF_NOT_NONE: - return 0; - case EXTENDED_ARG: - return 0; - case CACHE: - return 0; - case RESERVED: - return 0; case _POP_JUMP_IF_FALSE: return 1; case _POP_JUMP_IF_TRUE: @@ -566,7 +416,7 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { case EXIT_TRACE: return 0; case INSERT: - return 0; + return oparg + 1; default: return -1; } @@ -581,10 +431,6 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { switch(opcode) { case NOP: return 0; - case RESUME: - return 0; - case INSTRUMENTED_RESUME: - return 0; case LOAD_CLOSURE: return 1; case LOAD_FAST_CHECK: @@ -593,30 +439,20 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case LOAD_FAST_AND_CLEAR: return 1; - case LOAD_FAST_LOAD_FAST: - return 2; case LOAD_CONST: return 1; case STORE_FAST: return 0; case STORE_FAST_MAYBE_NULL: return 0; - case STORE_FAST_LOAD_FAST: - return 1; - case STORE_FAST_STORE_FAST: - return 0; case POP_TOP: return 0; case PUSH_NULL: return 1; case END_FOR: return 0; - case INSTRUMENTED_END_FOR: - return 0; case END_SEND: return 1; - case INSTRUMENTED_END_SEND: - return 1; case UNARY_NEGATIVE: return 1; case UNARY_NOT: @@ -687,8 +523,6 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case BINARY_SUBSCR_DICT: return 1; - case BINARY_SUBSCR_GETITEM: - return 1; case LIST_APPEND: return (oparg-1) + 1; case SET_ADD: @@ -705,40 +539,14 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case CALL_INTRINSIC_2: return 1; - case RAISE_VARARGS: - return 0; - case INTERPRETER_EXIT: - return 0; - case RETURN_VALUE: - return 0; - case INSTRUMENTED_RETURN_VALUE: - return 0; - case RETURN_CONST: - return 0; - case INSTRUMENTED_RETURN_CONST: - return 0; case GET_AITER: return 1; case GET_ANEXT: return 2; case GET_AWAITABLE: return 1; - case SEND: - return 2; - case SEND_GEN: - return 2; - case INSTRUMENTED_YIELD_VALUE: - return 1; - case YIELD_VALUE: - return 1; case POP_EXCEPT: return 0; - case RERAISE: - return oparg; - case END_ASYNC_FOR: - return 0; - case CLEANUP_THROW: - return 2; case LOAD_ASSERTION_ERROR: return 1; case LOAD_BUILD_CLASS: @@ -791,8 +599,6 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return (oparg & 1 ? 1 : 0) + 1; case DELETE_FAST: return 0; - case MAKE_CELL: - return 0; case DELETE_DEREF: return 0; case LOAD_FROM_DICT_OR_DEREF: @@ -827,10 +633,6 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return (oparg - 1) + 4; case MAP_ADD: return (oparg - 1) + 1; - case INSTRUMENTED_LOAD_SUPER_ATTR: - return ((oparg & 1) ? 1 : 0) + 1; - case LOAD_SUPER_ATTR: - return ((oparg & 1) ? 1 : 0) + 1; case LOAD_SUPER_METHOD: return ((oparg & 1) ? 1 : 0) + 1; case LOAD_ZERO_SUPER_METHOD: @@ -853,24 +655,6 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return ((oparg & 1) ? 1 : 0) + 1; case LOAD_ATTR_INSTANCE_VALUE: return (oparg & 1 ? 1 : 0) + 1; - case LOAD_ATTR_MODULE: - return ((oparg & 1) ? 1 : 0) + 1; - case LOAD_ATTR_WITH_HINT: - return ((oparg & 1) ? 1 : 0) + 1; - case LOAD_ATTR_SLOT: - return ((oparg & 1) ? 1 : 0) + 1; - case LOAD_ATTR_CLASS: - return ((oparg & 1) ? 1 : 0) + 1; - case LOAD_ATTR_PROPERTY: - return 1; - case LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN: - return 1; - case STORE_ATTR_INSTANCE_VALUE: - return 0; - case STORE_ATTR_WITH_HINT: - return 0; - case STORE_ATTR_SLOT: - return 0; case COMPARE_OP: return 1; case COMPARE_OP_FLOAT: @@ -887,32 +671,16 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 2; case CHECK_EXC_MATCH: return 2; - case IMPORT_NAME: - return 1; - case IMPORT_FROM: - return 2; - case JUMP_FORWARD: - return 0; - case JUMP_BACKWARD: - return 0; case JUMP: return 0; case JUMP_NO_INTERRUPT: return 0; - case ENTER_EXECUTOR: - return 0; - case POP_JUMP_IF_FALSE: - return 0; - case POP_JUMP_IF_TRUE: - return 0; case IS_NONE: return 1; case POP_JUMP_IF_NONE: return 0; case POP_JUMP_IF_NOT_NONE: return 0; - case JUMP_BACKWARD_NO_INTERRUPT: - return 0; case GET_LEN: return 2; case MATCH_CLASS: @@ -927,10 +695,6 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case GET_YIELD_FROM_ITER: return 1; - case FOR_ITER: - return 2; - case INSTRUMENTED_FOR_ITER: - return 0; case _ITER_CHECK_LIST: return 1; case _IS_ITER_EXHAUSTED_LIST: @@ -955,12 +719,6 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 2; case FOR_ITER_RANGE: return 2; - case FOR_ITER_GEN: - return 2; - case BEFORE_ASYNC_WITH: - return 2; - case BEFORE_WITH: - return 2; case WITH_EXCEPT_START: return 5; case SETUP_FINALLY: @@ -973,70 +731,32 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case PUSH_EXC_INFO: return 2; - case LOAD_ATTR_METHOD_WITH_VALUES: - return 2; - case LOAD_ATTR_METHOD_NO_DICT: - return 2; - case LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: - return 1; - case LOAD_ATTR_NONDESCRIPTOR_NO_DICT: - return 1; - case LOAD_ATTR_METHOD_LAZY_DICT: - return 2; - case KW_NAMES: - return 0; - case INSTRUMENTED_CALL: - return 0; - case CALL: - return 1; - case CALL_BOUND_METHOD_EXACT_ARGS: - return 1; - case CALL_PY_EXACT_ARGS: - return 1; - case CALL_PY_WITH_DEFAULTS: - return 1; case CALL_NO_KW_TYPE_1: return 1; case CALL_NO_KW_STR_1: return 1; case CALL_NO_KW_TUPLE_1: return 1; - case CALL_NO_KW_ALLOC_AND_ENTER_INIT: - return 1; case EXIT_INIT_CHECK: return 0; - case CALL_BUILTIN_CLASS: - return 1; case CALL_NO_KW_BUILTIN_O: return 1; case CALL_NO_KW_BUILTIN_FAST: return 1; - case CALL_BUILTIN_FAST_WITH_KEYWORDS: - return 1; case CALL_NO_KW_LEN: return 1; case CALL_NO_KW_ISINSTANCE: return 1; - case CALL_NO_KW_LIST_APPEND: - return 1; case CALL_NO_KW_METHOD_DESCRIPTOR_O: return 1; - case CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: - return 1; case CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS: return 1; case CALL_NO_KW_METHOD_DESCRIPTOR_FAST: return 1; - case INSTRUMENTED_CALL_FUNCTION_EX: - return 0; - case CALL_FUNCTION_EX: - return 1; case MAKE_FUNCTION: return 1; case SET_FUNCTION_ATTRIBUTE: return 1; - case RETURN_GENERATOR: - return 0; case BUILD_SLICE: return 1; case CONVERT_VALUE: @@ -1051,26 +771,6 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case SWAP: return (oparg-2) + 2; - case INSTRUMENTED_INSTRUCTION: - return 0; - case INSTRUMENTED_JUMP_FORWARD: - return 0; - case INSTRUMENTED_JUMP_BACKWARD: - return 0; - case INSTRUMENTED_POP_JUMP_IF_TRUE: - return 0; - case INSTRUMENTED_POP_JUMP_IF_FALSE: - return 0; - case INSTRUMENTED_POP_JUMP_IF_NONE: - return 0; - case INSTRUMENTED_POP_JUMP_IF_NOT_NONE: - return 0; - case EXTENDED_ARG: - return 0; - case CACHE: - return 0; - case RESERVED: - return 0; case _POP_JUMP_IF_FALSE: return 0; case _POP_JUMP_IF_TRUE: @@ -1082,7 +782,7 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { case EXIT_TRACE: return 0; case INSERT: - return 0; + return oparg + 1; default: return -1; } From 1e61c49bed895c6ab2db18d96ef060a60b1f46fd Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 14 Aug 2023 20:05:07 +0800 Subject: [PATCH 41/48] and env var to block tests --- Lib/test/test_capi/test_misc.py | 2 +- Python/optimizer.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 836bebf3208db1..6fbfa80a39036c 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2618,7 +2618,7 @@ def testfunc(it): with self.assertRaises(StopIteration): next(it) - +@unittest.skipIf(os.getenv("PYTHONUOPSOPTIMIZE") is None, "UOps optimization isn't enabled") class TestUopsOptimization(unittest.TestCase): def test_int_constant_propagation(self): diff --git a/Python/optimizer.c b/Python/optimizer.c index 3aaafd33113481..ff07926082e7da 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -698,7 +698,10 @@ uop_optimize( return trace_length; } OBJECT_STAT_INC(optimization_traces_created); - trace_length = _Py_uop_analyze_and_optimize(code, trace, trace_length, curr_stackentries); + char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE"); + if (uop_optimize != NULL && *uop_optimize >= '0') { + trace_length = _Py_uop_analyze_and_optimize(code, trace, trace_length, curr_stackentries); + } _PyUOpExecutorObject *executor = PyObject_NewVar(_PyUOpExecutorObject, &UOpExecutor_Type, trace_length); if (executor == NULL) { return -1; From 6c24b493c879784936845b4a42611599840a2fdf Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 14 Aug 2023 20:14:30 +0800 Subject: [PATCH 42/48] regen again --- Include/internal/pycore_opcode_metadata.h | 300 ++++++++++++++++++++++ Python/abstract_interp_cases.c.h | 2 +- Python/executor_cases.c.h | 2 +- Tools/cases_generator/generate_cases.py | 2 +- 4 files changed, 303 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 5c02dff63b0a3a..01e2adc3502cbb 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -65,6 +65,10 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { switch(opcode) { case NOP: return 0; + case RESUME: + return 0; + case INSTRUMENTED_RESUME: + return 0; case LOAD_CLOSURE: return 0; case LOAD_FAST_CHECK: @@ -73,20 +77,30 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case LOAD_FAST_AND_CLEAR: return 0; + case LOAD_FAST_LOAD_FAST: + return 0; case LOAD_CONST: return 0; case STORE_FAST: return 1; case STORE_FAST_MAYBE_NULL: return 1; + case STORE_FAST_LOAD_FAST: + return 1; + case STORE_FAST_STORE_FAST: + return 2; case POP_TOP: return 1; case PUSH_NULL: return 0; case END_FOR: return 2; + case INSTRUMENTED_END_FOR: + return 2; case END_SEND: return 2; + case INSTRUMENTED_END_SEND: + return 2; case UNARY_NEGATIVE: return 1; case UNARY_NOT: @@ -157,6 +171,8 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 2; case BINARY_SUBSCR_DICT: return 2; + case BINARY_SUBSCR_GETITEM: + return 2; case LIST_APPEND: return (oparg-1) + 2; case SET_ADD: @@ -173,14 +189,40 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case CALL_INTRINSIC_2: return 2; + case RAISE_VARARGS: + return oparg; + case INTERPRETER_EXIT: + return 1; + case RETURN_VALUE: + return 1; + case INSTRUMENTED_RETURN_VALUE: + return 1; + case RETURN_CONST: + return 0; + case INSTRUMENTED_RETURN_CONST: + return 0; case GET_AITER: return 1; case GET_ANEXT: return 1; case GET_AWAITABLE: return 1; + case SEND: + return 2; + case SEND_GEN: + return 2; + case INSTRUMENTED_YIELD_VALUE: + return 1; + case YIELD_VALUE: + return 1; case POP_EXCEPT: return 1; + case RERAISE: + return oparg + 1; + case END_ASYNC_FOR: + return 2; + case CLEANUP_THROW: + return 3; case LOAD_ASSERTION_ERROR: return 0; case LOAD_BUILD_CLASS: @@ -233,6 +275,8 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case DELETE_FAST: return 0; + case MAKE_CELL: + return 0; case DELETE_DEREF: return 0; case LOAD_FROM_DICT_OR_DEREF: @@ -267,6 +311,10 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return (oparg - 1) + 5; case MAP_ADD: return (oparg - 1) + 3; + case INSTRUMENTED_LOAD_SUPER_ATTR: + return 3; + case LOAD_SUPER_ATTR: + return 3; case LOAD_SUPER_METHOD: return 3; case LOAD_ZERO_SUPER_METHOD: @@ -289,6 +337,24 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case LOAD_ATTR_INSTANCE_VALUE: return 1; + case LOAD_ATTR_MODULE: + return 1; + case LOAD_ATTR_WITH_HINT: + return 1; + case LOAD_ATTR_SLOT: + return 1; + case LOAD_ATTR_CLASS: + return 1; + case LOAD_ATTR_PROPERTY: + return 1; + case LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN: + return 1; + case STORE_ATTR_INSTANCE_VALUE: + return 2; + case STORE_ATTR_WITH_HINT: + return 2; + case STORE_ATTR_SLOT: + return 2; case COMPARE_OP: return 2; case COMPARE_OP_FLOAT: @@ -305,16 +371,32 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 2; case CHECK_EXC_MATCH: return 2; + case IMPORT_NAME: + return 2; + case IMPORT_FROM: + return 1; + case JUMP_FORWARD: + return 0; + case JUMP_BACKWARD: + return 0; case JUMP: return 0; case JUMP_NO_INTERRUPT: return 0; + case ENTER_EXECUTOR: + return 0; + case POP_JUMP_IF_FALSE: + return 1; + case POP_JUMP_IF_TRUE: + return 1; case IS_NONE: return 1; case POP_JUMP_IF_NONE: return 1; case POP_JUMP_IF_NOT_NONE: return 1; + case JUMP_BACKWARD_NO_INTERRUPT: + return 0; case GET_LEN: return 1; case MATCH_CLASS: @@ -329,6 +411,10 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case GET_YIELD_FROM_ITER: return 1; + case FOR_ITER: + return 1; + case INSTRUMENTED_FOR_ITER: + return 0; case _ITER_CHECK_LIST: return 1; case _IS_ITER_EXHAUSTED_LIST: @@ -353,6 +439,12 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case FOR_ITER_RANGE: return 1; + case FOR_ITER_GEN: + return 1; + case BEFORE_ASYNC_WITH: + return 1; + case BEFORE_WITH: + return 1; case WITH_EXCEPT_START: return 4; case SETUP_FINALLY: @@ -365,32 +457,70 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case PUSH_EXC_INFO: return 1; + case LOAD_ATTR_METHOD_WITH_VALUES: + return 1; + case LOAD_ATTR_METHOD_NO_DICT: + return 1; + case LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: + return 1; + case LOAD_ATTR_NONDESCRIPTOR_NO_DICT: + return 1; + case LOAD_ATTR_METHOD_LAZY_DICT: + return 1; + case KW_NAMES: + return 0; + case INSTRUMENTED_CALL: + return 0; + case CALL: + return oparg + 2; + case CALL_BOUND_METHOD_EXACT_ARGS: + return oparg + 2; + case CALL_PY_EXACT_ARGS: + return oparg + 2; + case CALL_PY_WITH_DEFAULTS: + return oparg + 2; case CALL_NO_KW_TYPE_1: return oparg + 2; case CALL_NO_KW_STR_1: return oparg + 2; case CALL_NO_KW_TUPLE_1: return oparg + 2; + case CALL_NO_KW_ALLOC_AND_ENTER_INIT: + return oparg + 2; case EXIT_INIT_CHECK: return 1; + case CALL_BUILTIN_CLASS: + return oparg + 2; case CALL_NO_KW_BUILTIN_O: return oparg + 2; case CALL_NO_KW_BUILTIN_FAST: return oparg + 2; + case CALL_BUILTIN_FAST_WITH_KEYWORDS: + return oparg + 2; case CALL_NO_KW_LEN: return oparg + 2; case CALL_NO_KW_ISINSTANCE: return oparg + 2; + case CALL_NO_KW_LIST_APPEND: + return oparg + 2; case CALL_NO_KW_METHOD_DESCRIPTOR_O: return oparg + 2; + case CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: + return oparg + 2; case CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS: return oparg + 2; case CALL_NO_KW_METHOD_DESCRIPTOR_FAST: return oparg + 2; + case INSTRUMENTED_CALL_FUNCTION_EX: + return 0; + case CALL_FUNCTION_EX: + return ((oparg & 1) ? 1 : 0) + 3; case MAKE_FUNCTION: return 1; case SET_FUNCTION_ATTRIBUTE: return 2; + case RETURN_GENERATOR: + return 0; case BUILD_SLICE: return ((oparg == 3) ? 1 : 0) + 2; case CONVERT_VALUE: @@ -405,6 +535,26 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 2; case SWAP: return (oparg-2) + 2; + case INSTRUMENTED_INSTRUCTION: + return 0; + case INSTRUMENTED_JUMP_FORWARD: + return 0; + case INSTRUMENTED_JUMP_BACKWARD: + return 0; + case INSTRUMENTED_POP_JUMP_IF_TRUE: + return 0; + case INSTRUMENTED_POP_JUMP_IF_FALSE: + return 0; + case INSTRUMENTED_POP_JUMP_IF_NONE: + return 0; + case INSTRUMENTED_POP_JUMP_IF_NOT_NONE: + return 0; + case EXTENDED_ARG: + return 0; + case CACHE: + return 0; + case RESERVED: + return 0; case _POP_JUMP_IF_FALSE: return 1; case _POP_JUMP_IF_TRUE: @@ -431,6 +581,10 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { switch(opcode) { case NOP: return 0; + case RESUME: + return 0; + case INSTRUMENTED_RESUME: + return 0; case LOAD_CLOSURE: return 1; case LOAD_FAST_CHECK: @@ -439,20 +593,30 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case LOAD_FAST_AND_CLEAR: return 1; + case LOAD_FAST_LOAD_FAST: + return 2; case LOAD_CONST: return 1; case STORE_FAST: return 0; case STORE_FAST_MAYBE_NULL: return 0; + case STORE_FAST_LOAD_FAST: + return 1; + case STORE_FAST_STORE_FAST: + return 0; case POP_TOP: return 0; case PUSH_NULL: return 1; case END_FOR: return 0; + case INSTRUMENTED_END_FOR: + return 0; case END_SEND: return 1; + case INSTRUMENTED_END_SEND: + return 1; case UNARY_NEGATIVE: return 1; case UNARY_NOT: @@ -523,6 +687,8 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case BINARY_SUBSCR_DICT: return 1; + case BINARY_SUBSCR_GETITEM: + return 1; case LIST_APPEND: return (oparg-1) + 1; case SET_ADD: @@ -539,14 +705,40 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case CALL_INTRINSIC_2: return 1; + case RAISE_VARARGS: + return 0; + case INTERPRETER_EXIT: + return 0; + case RETURN_VALUE: + return 0; + case INSTRUMENTED_RETURN_VALUE: + return 0; + case RETURN_CONST: + return 0; + case INSTRUMENTED_RETURN_CONST: + return 0; case GET_AITER: return 1; case GET_ANEXT: return 2; case GET_AWAITABLE: return 1; + case SEND: + return 2; + case SEND_GEN: + return 2; + case INSTRUMENTED_YIELD_VALUE: + return 1; + case YIELD_VALUE: + return 1; case POP_EXCEPT: return 0; + case RERAISE: + return oparg; + case END_ASYNC_FOR: + return 0; + case CLEANUP_THROW: + return 2; case LOAD_ASSERTION_ERROR: return 1; case LOAD_BUILD_CLASS: @@ -599,6 +791,8 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return (oparg & 1 ? 1 : 0) + 1; case DELETE_FAST: return 0; + case MAKE_CELL: + return 0; case DELETE_DEREF: return 0; case LOAD_FROM_DICT_OR_DEREF: @@ -633,6 +827,10 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return (oparg - 1) + 4; case MAP_ADD: return (oparg - 1) + 1; + case INSTRUMENTED_LOAD_SUPER_ATTR: + return ((oparg & 1) ? 1 : 0) + 1; + case LOAD_SUPER_ATTR: + return ((oparg & 1) ? 1 : 0) + 1; case LOAD_SUPER_METHOD: return ((oparg & 1) ? 1 : 0) + 1; case LOAD_ZERO_SUPER_METHOD: @@ -655,6 +853,24 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return ((oparg & 1) ? 1 : 0) + 1; case LOAD_ATTR_INSTANCE_VALUE: return (oparg & 1 ? 1 : 0) + 1; + case LOAD_ATTR_MODULE: + return ((oparg & 1) ? 1 : 0) + 1; + case LOAD_ATTR_WITH_HINT: + return ((oparg & 1) ? 1 : 0) + 1; + case LOAD_ATTR_SLOT: + return ((oparg & 1) ? 1 : 0) + 1; + case LOAD_ATTR_CLASS: + return ((oparg & 1) ? 1 : 0) + 1; + case LOAD_ATTR_PROPERTY: + return 1; + case LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN: + return 1; + case STORE_ATTR_INSTANCE_VALUE: + return 0; + case STORE_ATTR_WITH_HINT: + return 0; + case STORE_ATTR_SLOT: + return 0; case COMPARE_OP: return 1; case COMPARE_OP_FLOAT: @@ -671,16 +887,32 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 2; case CHECK_EXC_MATCH: return 2; + case IMPORT_NAME: + return 1; + case IMPORT_FROM: + return 2; + case JUMP_FORWARD: + return 0; + case JUMP_BACKWARD: + return 0; case JUMP: return 0; case JUMP_NO_INTERRUPT: return 0; + case ENTER_EXECUTOR: + return 0; + case POP_JUMP_IF_FALSE: + return 0; + case POP_JUMP_IF_TRUE: + return 0; case IS_NONE: return 1; case POP_JUMP_IF_NONE: return 0; case POP_JUMP_IF_NOT_NONE: return 0; + case JUMP_BACKWARD_NO_INTERRUPT: + return 0; case GET_LEN: return 2; case MATCH_CLASS: @@ -695,6 +927,10 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case GET_YIELD_FROM_ITER: return 1; + case FOR_ITER: + return 2; + case INSTRUMENTED_FOR_ITER: + return 0; case _ITER_CHECK_LIST: return 1; case _IS_ITER_EXHAUSTED_LIST: @@ -719,6 +955,12 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 2; case FOR_ITER_RANGE: return 2; + case FOR_ITER_GEN: + return 2; + case BEFORE_ASYNC_WITH: + return 2; + case BEFORE_WITH: + return 2; case WITH_EXCEPT_START: return 5; case SETUP_FINALLY: @@ -731,32 +973,70 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case PUSH_EXC_INFO: return 2; + case LOAD_ATTR_METHOD_WITH_VALUES: + return 2; + case LOAD_ATTR_METHOD_NO_DICT: + return 2; + case LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: + return 1; + case LOAD_ATTR_NONDESCRIPTOR_NO_DICT: + return 1; + case LOAD_ATTR_METHOD_LAZY_DICT: + return 2; + case KW_NAMES: + return 0; + case INSTRUMENTED_CALL: + return 0; + case CALL: + return 1; + case CALL_BOUND_METHOD_EXACT_ARGS: + return 1; + case CALL_PY_EXACT_ARGS: + return 1; + case CALL_PY_WITH_DEFAULTS: + return 1; case CALL_NO_KW_TYPE_1: return 1; case CALL_NO_KW_STR_1: return 1; case CALL_NO_KW_TUPLE_1: return 1; + case CALL_NO_KW_ALLOC_AND_ENTER_INIT: + return 1; case EXIT_INIT_CHECK: return 0; + case CALL_BUILTIN_CLASS: + return 1; case CALL_NO_KW_BUILTIN_O: return 1; case CALL_NO_KW_BUILTIN_FAST: return 1; + case CALL_BUILTIN_FAST_WITH_KEYWORDS: + return 1; case CALL_NO_KW_LEN: return 1; case CALL_NO_KW_ISINSTANCE: return 1; + case CALL_NO_KW_LIST_APPEND: + return 1; case CALL_NO_KW_METHOD_DESCRIPTOR_O: return 1; + case CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: + return 1; case CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS: return 1; case CALL_NO_KW_METHOD_DESCRIPTOR_FAST: return 1; + case INSTRUMENTED_CALL_FUNCTION_EX: + return 0; + case CALL_FUNCTION_EX: + return 1; case MAKE_FUNCTION: return 1; case SET_FUNCTION_ATTRIBUTE: return 1; + case RETURN_GENERATOR: + return 0; case BUILD_SLICE: return 1; case CONVERT_VALUE: @@ -771,6 +1051,26 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case SWAP: return (oparg-2) + 2; + case INSTRUMENTED_INSTRUCTION: + return 0; + case INSTRUMENTED_JUMP_FORWARD: + return 0; + case INSTRUMENTED_JUMP_BACKWARD: + return 0; + case INSTRUMENTED_POP_JUMP_IF_TRUE: + return 0; + case INSTRUMENTED_POP_JUMP_IF_FALSE: + return 0; + case INSTRUMENTED_POP_JUMP_IF_NONE: + return 0; + case INSTRUMENTED_POP_JUMP_IF_NOT_NONE: + return 0; + case EXTENDED_ARG: + return 0; + case CACHE: + return 0; + case RESERVED: + return 0; case _POP_JUMP_IF_FALSE: return 0; case _POP_JUMP_IF_TRUE: diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 6bfcf534646b1e..bd8ea4b6854edd 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -1,6 +1,6 @@ // This file is generated by Tools/cases_generator/generate_cases.py // from: -// Python/bytecodes.c +// Python\bytecodes.c // Do not edit! case NOP: { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 0e9b001b422eb6..d802c1a7c02c8c 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1,6 +1,6 @@ // This file is generated by Tools/cases_generator/generate_cases.py // from: -// Python/bytecodes.c +// Python\bytecodes.c // Do not edit! case NOP: { diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 3b0e21f245b386..ccd89c568624d2 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -135,7 +135,7 @@ def effect_str(effects: list[StackEffect]) -> str: pushed: str | None match thing: case parsing.InstDef(): - if thing.kind == "instr" or self.instrs[thing.name].is_viable_uop(): + if thing.kind != "op" or self.instrs[thing.name].is_viable_uop(): instr = self.instrs[thing.name] popped = effect_str(instr.input_effects) pushed = effect_str(instr.output_effects) From 2be404d4ea449b458eb126f221c32e81f72cfbd8 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 14 Aug 2023 20:31:51 +0800 Subject: [PATCH 43/48] fix generated files --- Python/abstract_interp_cases.c.h | 2 +- Python/executor_cases.c.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index bd8ea4b6854edd..6bfcf534646b1e 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -1,6 +1,6 @@ // This file is generated by Tools/cases_generator/generate_cases.py // from: -// Python\bytecodes.c +// Python/bytecodes.c // Do not edit! case NOP: { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index d802c1a7c02c8c..0e9b001b422eb6 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1,6 +1,6 @@ // This file is generated by Tools/cases_generator/generate_cases.py // from: -// Python\bytecodes.c +// Python/bytecodes.c // Do not edit! case NOP: { From 29e255d388bcc15efb89fa3bb14425daa9c296d7 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 15 Aug 2023 12:39:24 +0800 Subject: [PATCH 44/48] Address review --- Python/executor_cases.c.h | 12 ++++++------ Python/optimizer.c | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 0e9b001b422eb6..ec988a7b8e19cc 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2736,14 +2736,14 @@ case INSERT: { PyObject *top; - PyObject **stuff; + PyObject **stuff1; + PyObject **stuff2; top = stack_pointer[-1]; - stuff = stack_pointer - 1 - oparg; - stuff = stack_pointer - oparg; - // Inserts TOS at position specified by oparg - PyObject *tos = TOP(); + stuff1 = stack_pointer - 1 - oparg; + stuff2 = stack_pointer - oparg; + // Inserts TOS at position specified by oparg; for (int i = 1; i < oparg + 1; i++) { - stack_pointer[i] = stack_pointer[i - 1]; + stack_pointer[-i] = stack_pointer[-(i - 1)]; } stack_pointer[-1 - oparg] = top; break; diff --git a/Python/optimizer.c b/Python/optimizer.c index ff07926082e7da..d3ac2424038ef9 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -699,7 +699,7 @@ uop_optimize( } OBJECT_STAT_INC(optimization_traces_created); char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE"); - if (uop_optimize != NULL && *uop_optimize >= '0') { + if (uop_optimize != NULL && *uop_optimize > '0') { trace_length = _Py_uop_analyze_and_optimize(code, trace, trace_length, curr_stackentries); } _PyUOpExecutorObject *executor = PyObject_NewVar(_PyUOpExecutorObject, &UOpExecutor_Type, trace_length); From 3c441176a5eb8cbe4d161c037b70d1ccfe22f1c6 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 15 Aug 2023 13:30:12 +0800 Subject: [PATCH 45/48] fix up INSERT --- Python/bytecodes.c | 7 +++---- Python/executor_cases.c.h | 4 ---- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 12c5948af7a8e7..9004472e88f2b7 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3743,11 +3743,10 @@ dummy_func( return frame; } - op(INSERT, (stuff[oparg], top -- top, stuff[oparg])) { - // Inserts TOS at position specified by oparg - PyObject *tos = TOP(); + op(INSERT, (unused[oparg], top -- top, unused[oparg])) { + // Inserts TOS at position specified by oparg; for (int i = 1; i < oparg + 1; i++) { - stack_pointer[i] = stack_pointer[i - 1]; + stack_pointer[-i] = stack_pointer[-(i - 1)]; } } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index ec988a7b8e19cc..5caf6a52ede352 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2736,11 +2736,7 @@ case INSERT: { PyObject *top; - PyObject **stuff1; - PyObject **stuff2; top = stack_pointer[-1]; - stuff1 = stack_pointer - 1 - oparg; - stuff2 = stack_pointer - oparg; // Inserts TOS at position specified by oparg; for (int i = 1; i < oparg + 1; i++) { stack_pointer[-i] = stack_pointer[-(i - 1)]; From b758b470b44166567f77d5d60d284ecc3d2c115e Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 15 Aug 2023 13:34:43 +0800 Subject: [PATCH 46/48] remove experimental parts --- Lib/test/test_capi/test_misc.py | 25 - Python/optimizer_analysis.c | 1046 ------------------------------- 2 files changed, 1071 deletions(-) diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 6fbfa80a39036c..c81212202d9ef2 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2618,31 +2618,6 @@ def testfunc(it): with self.assertRaises(StopIteration): next(it) -@unittest.skipIf(os.getenv("PYTHONUOPSOPTIMIZE") is None, "UOps optimization isn't enabled") -class TestUopsOptimization(unittest.TestCase): - - def test_int_constant_propagation(self): - def testfunc(loops): - num = 0 - while num < loops: - x = 0 - y = 1 - z = 2 - a = x + y + z + x + y + z + x + y + z - num += 1 - return a - - opt = _testinternalcapi.get_uop_optimizer() - res = None - with temporary_optimizer(opt): - res = testfunc(3) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - self.assertEqual(res, 9) - binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - self.assertEqual(len(binop_count), 1) - if __name__ == "__main__": unittest.main() diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 5cc9312ca1165d..e48e018052c712 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -13,701 +13,6 @@ #include #include "pycore_optimizer.h" -#define PARTITION_DEBUG 1 - -#define STATIC 0 -#define DYNAMIC 1 - -#define OVERALLOCATE_FACTOR 2 - -#ifdef Py_DEBUG -#define DPRINTF(level, ...) \ - if (lltrace >= (level)) { printf(__VA_ARGS__); } -#else -#define DPRINTF(level, ...) -#endif - -// TYPENODE is a tagged pointer that uses the last 2 LSB as the tag -#define _Py_PARTITIONNODE_t uintptr_t - -// PARTITIONNODE Tags -typedef enum _Py_TypeNodeTags { - // Node is unused - TYPE_NULL = 0, - // TYPE_ROOT_POSITIVE can point to a root struct or be a NULL - TYPE_ROOT= 1, - // TYPE_REF points to a TYPE_ROOT or a TYPE_REF - TYPE_REF = 2, -} _Py_TypeNodeTags; - -typedef struct _Py_PartitionRootNode { - PyObject_HEAD - // For partial evaluation - // 0 - static - // 1 - dynamic - uint8_t static_or_dynamic; - PyObject *const_val; - // For types (TODO) -} _Py_PartitionRootNode; - -static void -partitionnode_dealloc(PyObject *o) -{ - _Py_PartitionRootNode *self = (_Py_PartitionRootNode *)o; - Py_CLEAR(self->const_val); - Py_TYPE(self)->tp_free(o); -} - -static PyTypeObject _Py_PartitionRootNode_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - .tp_name = "uops abstract interpreter's root node", - .tp_basicsize = sizeof(_Py_PartitionRootNode), - .tp_dealloc = partitionnode_dealloc, - .tp_free = PyObject_Free, - .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION -}; - -static inline _Py_TypeNodeTags -partitionnode_get_tag(_Py_PARTITIONNODE_t node) -{ - return node & 0b11; -} - -static inline _Py_PARTITIONNODE_t -partitionnode_clear_tag(_Py_PARTITIONNODE_t node) -{ - return node & (~(uintptr_t)(0b11)); -} - -// static_or_dynamic -// 0 - static -// 1 - dynamic -// If static, const_value must be set! -static inline _Py_PARTITIONNODE_t -partitionnode_make_root(uint8_t static_or_dynamic, PyObject *const_val) -{ - _Py_PartitionRootNode *root = PyObject_New(_Py_PartitionRootNode, &_Py_PartitionRootNode_Type); - if (root == NULL) { - return 0; - } - root->static_or_dynamic = static_or_dynamic; - root->const_val = Py_NewRef(const_val); - return (_Py_PARTITIONNODE_t)root | TYPE_ROOT; -} - -static inline _Py_PARTITIONNODE_t -partitionnode_make_ref(_Py_PARTITIONNODE_t *node) -{ - return partitionnode_clear_tag((_Py_PARTITIONNODE_t)node) | TYPE_REF; -} - - -static const _Py_PARTITIONNODE_t PARTITIONNODE_NULLROOT = (_Py_PARTITIONNODE_t)_Py_NULL | TYPE_ROOT; - -// Tier 2 types meta interpreter -typedef struct _Py_UOpsAbstractInterpContext { - PyObject_HEAD - // The following are abstract stack and locals. - // points to one element after the abstract stack - _Py_PARTITIONNODE_t *stack_pointer; - int stack_len; - _Py_PARTITIONNODE_t *stack; - int locals_len; - _Py_PARTITIONNODE_t *locals; - - // Indicates whether the stack entry is real or virtualised. - // true - virtual false - real - bool *stack_virtual_or_real; -} _Py_UOpsAbstractInterpContext; - -static void -abstractinterp_dealloc(PyObject *o) -{ - _Py_UOpsAbstractInterpContext *self = (_Py_UOpsAbstractInterpContext *)o; - // Traverse all nodes and decref the root objects (if they are not NULL). - // Note: stack is after locals so this is safe - int total = self->locals_len + self->stack_len; - for (int i = 0; i < total; i++) { - _Py_PARTITIONNODE_t node = self->locals[i]; - if (partitionnode_get_tag(node) == TYPE_ROOT) { - Py_XDECREF(partitionnode_clear_tag(node)); - } - } - PyMem_Free(self->locals); - // No need to free stack because it is allocated together with the locals. - Py_TYPE(self)->tp_free((PyObject *)self); -} - -static PyTypeObject _Py_UOpsAbstractInterpContext_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - .tp_name = "uops abstract interpreter's context", - .tp_basicsize = sizeof(_Py_UOpsAbstractInterpContext), - .tp_dealloc = abstractinterp_dealloc, - .tp_free = PyObject_Free, - .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION -}; - -_Py_UOpsAbstractInterpContext * -_Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stacklen) -{ - _Py_UOpsAbstractInterpContext *self = (_Py_UOpsAbstractInterpContext *)PyType_GenericAlloc( - (PyTypeObject *)&_Py_UOpsAbstractInterpContext_Type, 0); - if (self == NULL) { - return NULL; - } - - // Setup - self->stack_len = stack_len; - self->locals_len = locals_len; - - _Py_PARTITIONNODE_t *locals_with_stack = PyMem_New(_Py_PARTITIONNODE_t, (locals_len + stack_len)); - if (locals_with_stack == NULL) { - Py_DECREF(self); - return NULL; - } - - bool *virtual_or_real = PyMem_New(bool, stack_len); - if (virtual_or_real == NULL) { - Py_DECREF(self); - PyMem_Free(locals_with_stack); - return NULL; - } - - for (int i = 0; i < (locals_len + stack_len); i++) { - locals_with_stack[i] = PARTITIONNODE_NULLROOT; - } - - for (int i = 0; i < stack_len; i++) { - virtual_or_real[i] = false; - } - - self->locals = locals_with_stack; - self->stack = locals_with_stack + locals_len; - self->stack_pointer = self->stack + curr_stacklen; - - self->stack_virtual_or_real = virtual_or_real; - - return self; -} - -#if PARTITION_DEBUG -static void print_ctx(_Py_UOpsAbstractInterpContext *ctx); -#endif - -static _Py_PARTITIONNODE_t * -partitionnode_get_rootptr(_Py_PARTITIONNODE_t *ref) -{ - _Py_TypeNodeTags tag = partitionnode_get_tag(*ref); - while (tag != TYPE_ROOT) { - ref = (_Py_PARTITIONNODE_t *)(partitionnode_clear_tag(*ref)); - tag = partitionnode_get_tag(*ref); - } - return ref; -} - -/** - * @brief Checks if two nodes are in the same partition. -*/ -static bool -partitionnode_is_same_partition(_Py_PARTITIONNODE_t *x, _Py_PARTITIONNODE_t *y) -{ - return partitionnode_get_rootptr(x) == partitionnode_get_rootptr(y); -} - -/** - * @brief Performs SET operation. dst tree becomes part of src tree - * - * If src_is_new is set, src is interpreted as a TYPE_ROOT - * not part of the type_context. Otherwise, it is interpreted as a pointer - * to a _Py_PARTITIONNODE_t. - * - * If src_is_new: - * Overwrites the root of the dst tree with the src node - * else: - * Makes the root of the dst tree a TYPE_REF to src - * -*/ -static void -partitionnode_set(_Py_PARTITIONNODE_t *src, _Py_PARTITIONNODE_t *dst, bool src_is_new) -{ - { - -#ifdef Py_DEBUG - // If `src_is_new` is set: - // - `src` doesn't belong inside the type context yet. - // - `src` has to be a TYPE_ROOT - // - `src` is to be interpreted as a _Py_TYPENODE_t - if (src_is_new) { - assert(partitionnode_get_tag(*src) == TYPE_ROOT); - } -#endif - - // This prevents cycles from forming - if (!src_is_new && partitionnode_is_same_partition(src, dst)) { - return; - } - - _Py_TypeNodeTags tag = partitionnode_get_tag(*dst); - switch (tag) { - case TYPE_ROOT: { - _Py_PARTITIONNODE_t old_root = partitionnode_clear_tag(*dst); - Py_XDECREF(old_root); - if (!src_is_new) { - // Make dst a reference to src - *dst = partitionnode_make_ref(src); - break; - } - // Make dst the src - *dst = *src; - break; - } - case TYPE_REF: { - _Py_PARTITIONNODE_t *rootptr = partitionnode_get_rootptr(dst); - _Py_PARTITIONNODE_t old_root = partitionnode_clear_tag(*rootptr); - Py_XDECREF(old_root); - if (!src_is_new) { - // Traverse up to the root of dst, make root a reference to src - *rootptr = partitionnode_make_ref(src); - break; - } - // Make root of dst the src - *rootptr = *src; - break; - } - default: - Py_UNREACHABLE(); - } - } -} - - -/** - * @brief Performs OVERWRITE operation. dst node gets overwritten by src node - * - * If src_is_new is set, src is interpreted as a TYPE_ROOT - * not part of the ctx. Otherwise, it is interpreted as a pointer - * to a _Py_PARTITIONNODE_t. - * - * If src_is_new: - * Removes dst node from its tree (+fixes all the references to dst) - * Overwrite the dst node with the src node - * else: - * Removes dst node from its tree (+fixes all the references to dst) - * Makes the root of the dst tree a TYPE_REF to src - * -*/ -static void -partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, - _Py_PARTITIONNODE_t *src, _Py_PARTITIONNODE_t *dst, bool src_is_new) -{ -#ifdef Py_DEBUG - if (src_is_new) { - assert(partitionnode_get_tag((_Py_PARTITIONNODE_t)src) == TYPE_ROOT); - } -#endif - - // This prevents cycles from forming - if (!src_is_new && partitionnode_is_same_partition(src, dst)) { - return; - } - - _Py_TypeNodeTags tag = partitionnode_get_tag(*dst); - switch (tag) { - case TYPE_ROOT: { - - _Py_PARTITIONNODE_t old_dst = *dst; - if (!src_is_new) { - // Make dst a reference to src - *dst = partitionnode_make_ref(src); - assert(partitionnode_clear_tag(*dst) != (_Py_PARTITIONNODE_t)_Py_NULL); - } - else { - // Make dst the src - *dst = (_Py_PARTITIONNODE_t)src; - } - - - /* Pick one child of dst and make that the new root of the dst tree */ - - // Children of dst will have this form - _Py_PARTITIONNODE_t child_test = partitionnode_make_ref( - (_Py_PARTITIONNODE_t *)partitionnode_clear_tag((_Py_PARTITIONNODE_t)dst)); - // Will be initialised to the first child we find - _Py_PARTITIONNODE_t *new_root = (_Py_PARTITIONNODE_t *)NULL; - - // Search locals for children - int nlocals = ctx->locals_len; - for (int i = 0; i < nlocals; i++) { - _Py_PARTITIONNODE_t *node_ptr = &(ctx->locals[i]); - if (*node_ptr == child_test) { - if (new_root == NULL) { - // First child encountered! initialise root - new_root = node_ptr; - *node_ptr = old_dst; - Py_XINCREF(partitionnode_clear_tag(old_dst)); - } - else { - // Not the first child encounted, point it to the new root - *node_ptr = partitionnode_make_ref(new_root); - } - } - } - - // Search stack for children - int nstack = ctx->stack_len; - for (int i = 0; i < nstack; i++) { - _Py_PARTITIONNODE_t *node_ptr = &(ctx->stack[i]); - if (*node_ptr == child_test) { - if (new_root == NULL) { - // First child encountered! initialise root - new_root = node_ptr; - *node_ptr = old_dst; - Py_XINCREF(partitionnode_clear_tag(old_dst)); - } - else { - // Not the first child encounted, point it to the new root - *node_ptr = partitionnode_make_ref(new_root); - } - } - } - - // This ndoe is no longer referencing the old root. - Py_XDECREF(partitionnode_clear_tag(old_dst)); - break; - } - case TYPE_REF: { - - _Py_PARTITIONNODE_t old_dst = *dst; - // Make dst a reference to src - if (!src_is_new) { - // Make dst a reference to src - *dst = partitionnode_make_ref(src); - assert(partitionnode_get_tag(*dst) == TYPE_REF); - assert(partitionnode_clear_tag(*dst) != (_Py_PARTITIONNODE_t)_Py_NULL); - } - else { - // Make dst the src - *dst = (_Py_PARTITIONNODE_t)src; - } - - /* Make all child of src be a reference to the parent of dst */ - - // Children of dst will have this form - _Py_PARTITIONNODE_t child_test = partitionnode_make_ref( - (_Py_PARTITIONNODE_t *)partitionnode_clear_tag((_Py_PARTITIONNODE_t)dst)); - - // Search locals for children - int nlocals = ctx->locals_len; - for (int i = 0; i < nlocals; i++) { - _Py_PARTITIONNODE_t *node_ptr = &(ctx->locals[i]); - if (*node_ptr == child_test) { - // Is a child of dst. Point it to the parent of dst - *node_ptr = old_dst; - } - } - - // Search stack for children - int nstack = ctx->stack_len; - for (int i = 0; i < nstack; i++) { - _Py_PARTITIONNODE_t *node_ptr = &(ctx->stack[i]); - if (*node_ptr == child_test) { - // Is a child of dst. Point it to the parent of dst - *node_ptr = old_dst; - } - } - break; - } - default: - Py_UNREACHABLE(); - } -} - -#ifdef Py_DEBUG - -static void -print_ctx_node(_Py_UOpsAbstractInterpContext *ctx, int i, bool is_printing_stack, int nstack_use, int nstack) -{ - char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); - int lltrace = 0; - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = *uop_debug - '0'; // TODO: Parse an int and all that - } - - bool is_local = false; - bool is_stack = false; - - int parent_idx = -1; - - _Py_PARTITIONNODE_t *node = is_printing_stack ? &ctx->stack[i] : &ctx->locals[i]; - _Py_PARTITIONNODE_t tag = partitionnode_get_tag(*node); - - _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); - - if (is_printing_stack) { - DPRINTF(3, "%s", i == nstack_use - 1 ? "." : " "); - } - - if (tag == TYPE_REF) { - _Py_PARTITIONNODE_t *parent = (_Py_PARTITIONNODE_t *)(partitionnode_clear_tag(*node)); - int local_index = (int)(parent - ctx->locals); - int stack_index = (int)(parent - ctx->stack); - is_local = local_index >= 0 && local_index < ctx->locals_len; - is_stack = stack_index >= 0 && stack_index < nstack; - parent_idx = is_local - ? local_index - : is_stack - ? stack_index - : -1; - } - - - _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); - DPRINTF(3, "%s:", - ptr == NULL ? "?" : (ptr->static_or_dynamic == STATIC ? "static" : "dynamic")); - if (lltrace >= 4 && ptr != NULL && ptr->static_or_dynamic == STATIC) { - PyObject_Print(ptr->const_val, stdout, 0); - } - - if (tag == TYPE_REF) { - const char *wher = is_local - ? "locals" - : is_stack - ? "stack" - : "const"; - DPRINTF(3, "->%s[%d]", wher, parent_idx); - } -} - -/** - * @brief Print the entries in the abstract interpreter context (along with locals). -*/ -static void -print_ctx(_Py_UOpsAbstractInterpContext *ctx) -{ - char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); - int lltrace = 0; - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = *uop_debug - '0'; // TODO: Parse an int and all that - } - - _Py_PARTITIONNODE_t *locals = ctx->locals; - _Py_PARTITIONNODE_t *stackptr = ctx->stack_pointer; - - int nstack_use = (int)(stackptr - ctx->stack); - int nstack = ctx->stack_len; - int nlocals = ctx->locals_len; - - DPRINTF(3, " Stack: %p: [", ctx->stack); - for (int i = 0; i < nstack; i++) { - print_ctx_node(ctx, i, true, nstack_use, nstack); - DPRINTF(3, " | "); - } - DPRINTF(3, "]\n"); - - DPRINTF(3, " Locals %p: [", locals); - for (int i = 0; i < nlocals; i++) { - print_ctx_node(ctx, i, false, nstack_use, nstack); - DPRINTF(3, " | "); - } - DPRINTF(3, "]\n"); -} -#endif - -static bool -partitionnode_is_static(_Py_PARTITIONNODE_t *node) -{ - _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); - _Py_PartitionRootNode *root_obj = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); - if (root_obj == _Py_NULL) { - return false; - } - return root_obj->static_or_dynamic == STATIC; -} - -// MUST BE GUARDED BY partitionnode_is_static BEFORE CALLING THIS -static inline PyObject * -get_const(_Py_PARTITIONNODE_t *node) -{ - assert(partitionnode_is_static(node)); - _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); - _Py_PartitionRootNode *root_obj = (_Py_PartitionRootNode * )partitionnode_clear_tag(*root); - return root_obj->const_val; -} - -// Hardcoded for now, @TODO autogenerate these from the DSL. -static inline bool -op_is_pure(int opcode, int oparg, _Py_PARTITIONNODE_t *locals) -{ - switch (opcode) { - case LOAD_CONST: - case _BINARY_OP_MULTIPLY_INT: - case _BINARY_OP_ADD_INT: - case _BINARY_OP_SUBTRACT_INT: - case _GUARD_BOTH_INT: - return true; - case LOAD_FAST: - return partitionnode_is_static(&locals[oparg]) && get_const(&locals[oparg]) != _Py_NULL; - default: - return false; - } -} - -static inline bool -op_is_jump(int opcode) -{ - return (opcode == _POP_JUMP_IF_FALSE || opcode == _POP_JUMP_IF_TRUE); -} - - -// Number the jump targets and the jump instructions with a unique (negative) ID. -// This replaces the instruction's opcode in the trace with their negative IDs. -// Aids relocation later when we need to recompute jumps after optimization passes. -static _PyUOpInstruction * -number_jumps_and_targets(_PyUOpInstruction *trace, int trace_len, int *max_id) -{ - int jump_and_target_count = 0; - int jump_and_target_id = -1; - for (int i = 0; i < trace_len; i++) { - if (op_is_jump(trace[i].opcode)) { - // 1 for the jump, 1 for its target - jump_and_target_count += 2; - } - } - - // +1 because 1-based indexing not zero based - _PyUOpInstruction *jump_id_to_instruction = PyMem_New(_PyUOpInstruction, jump_and_target_count + 1); - if (jump_id_to_instruction == NULL) { - return NULL; - } - - - for (int i = 0; i < trace_len; i++) { - if (op_is_jump(trace[i].opcode)) { - int target = trace[i].oparg; - int target_id = jump_and_target_id; - - // 1 for the jump target - assert(jump_and_target_id < 0); - // Negative opcode! - assert(trace[target].opcode > 0); - // Already assigned a jump ID - if (trace[target].opcode < 0) { - target_id = trace[target].opcode; - } - else { - // Else, assign a new jump ID. - jump_id_to_instruction[-target_id] = trace[target]; - trace[target].opcode = target_id; - jump_and_target_id--; - } - - // 1 for the jump - assert(jump_and_target_id < 0); - jump_id_to_instruction[-jump_and_target_id] = trace[i]; - // Negative opcode! - assert(trace[i].opcode >= 0); - trace[i].opcode = jump_and_target_id; - jump_and_target_id--; - // Point the jump to the target ID. - trace[i].oparg = target_id; - - } - } - *max_id = jump_and_target_id; - return jump_id_to_instruction; -} - -// Remove contiguous SAVE_IPs, leaving only the last one before a non-SAVE_IP instruction. -static int -remove_duplicate_save_ips(_PyUOpInstruction *trace, int trace_len) -{ -#ifdef Py_DEBUG - char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); - int lltrace = 0; - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = *uop_debug - '0'; // TODO: Parse an int and all that - } -#endif - - // Don't have to allocate a temporary trace array - // because the writer is guaranteed to be behind the reader. - int new_temp_len = 0; - - _PyUOpInstruction curr; - for (int i = 0; i < trace_len; i++) { - curr = trace[i]; - if (i < trace_len && curr.opcode == SAVE_IP && trace[i+1].opcode == SAVE_IP) { - continue; - } - trace[new_temp_len] = curr; - new_temp_len++; - } - - - DPRINTF(2, "Removed %d SAVE_IPs\n", trace_len - new_temp_len); - - return new_temp_len; -} - -/** - * Fixes all side exits due to jumps. This MUST be called as the last - * pass over the trace. Otherwise jumps will point to invalid ends. - * - * Runtime complexity of O(n*k), where n is trace length and k is number of jump - * instructions. Since k is usually quite low, this is nearly linear. -*/ -static void -fix_jump_side_exits(_PyUOpInstruction *trace, int trace_len, - _PyUOpInstruction *jump_id_to_instruction, int max_jump_id) -{ - for (int i = 0; i < trace_len; i++) { - int oparg = trace[i].oparg; - int opcode = trace[i].opcode; - // Indicates it's a jump target or jump instruction - if (opcode < 0 && opcode > max_jump_id) { - opcode = -opcode; - int real_opcode = jump_id_to_instruction[opcode].opcode; - if (op_is_jump(real_opcode)) { - trace[i].opcode = real_opcode; - - // Search for our target ID. - int target_id = oparg; - for (int x = 0; x < trace_len; x++) { - if (trace[x].opcode == target_id) { - trace[i].oparg = x; - break; - } - } - - assert(trace[i].oparg >= 0); - } - } - } - - // Final pass to swap out all the jump target IDs with their actual targets. - for (int i = 0; i < trace_len; i++) { - int opcode = trace[i].opcode; - // Indicates it's a jump target or jump instruction - if (opcode < 0 && opcode > max_jump_id) { - int real_oparg = jump_id_to_instruction[-opcode].oparg; - int real_opcode = jump_id_to_instruction[-opcode].opcode; - trace[i].oparg = real_oparg; - trace[i].opcode = real_opcode; - } - } -} - -#ifndef Py_DEBUG -#define GETITEM(v, i) PyList_GET_ITEM((v), (i)) -#else -static inline PyObject * -GETITEM(PyObject *v, Py_ssize_t i) { - assert(PyList_CheckExact(v)); - assert(i >= 0); - assert(i < PyList_GET_SIZE(v)); - return PyList_GET_ITEM(v, i); -} -#endif int _Py_uop_analyze_and_optimize( @@ -717,356 +22,5 @@ _Py_uop_analyze_and_optimize( int curr_stacklen ) { -#define STACK_LEVEL() ((int)(*stack_pointer - stack)) -#define STACK_SIZE() (co->co_stacksize) -#define BASIC_STACKADJ(n) (*stack_pointer += n) - -#ifdef Py_DEBUG -#define STACK_GROW(n) do { \ - assert(n >= 0); \ - BASIC_STACKADJ(n); \ - assert(STACK_LEVEL() <= STACK_SIZE()); \ - } while (0) -#define STACK_SHRINK(n) do { \ - assert(n >= 0); \ - assert(STACK_LEVEL() >= n); \ - BASIC_STACKADJ(-(n)); \ - } while (0) -#else -#define STACK_GROW(n) BASIC_STACKADJ(n) -#define STACK_SHRINK(n) BASIC_STACKADJ(-(n)) -#endif -#define PEEK(idx) (&((*stack_pointer)[-(idx)])) -#define GETLOCAL(idx) (&(locals[idx])) - -#define PARTITIONNODE_SET(src, dst, flag) partitionnode_set((src), (dst), (flag)) -#define PARTITIONNODE_OVERWRITE(src, dst, flag) partitionnode_overwrite(ctx, (src), (dst), (flag)) -#define MAKE_STATIC_ROOT(val) partitionnode_make_root(0, (val)) -#ifdef Py_DEBUG - char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); - int lltrace = 0; - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = *uop_debug - '0'; // TODO: Parse an int and all that - } -#endif - - PyObject *co_const_copy = NULL; - _PyUOpInstruction *jump_id_to_instruction = NULL; - - _PyUOpInstruction *temp_writebuffer = PyMem_New(_PyUOpInstruction, trace_len * OVERALLOCATE_FACTOR); - if (temp_writebuffer == NULL) { - return trace_len; - } - - int buffer_trace_len = 0; - - _Py_UOpsAbstractInterpContext *ctx = _Py_UOpsAbstractInterpContext_New( - co->co_stacksize, co->co_nlocals, curr_stacklen); - if (ctx == NULL) { - PyMem_Free(temp_writebuffer); - return trace_len; - } - - int max_jump_id = 0; - jump_id_to_instruction = number_jumps_and_targets(trace, trace_len, &max_jump_id); - if (jump_id_to_instruction == NULL) { - goto abstract_error; - } - - // We will be adding more constants due to partial evaluation. - co_const_copy = PyList_New(PyTuple_Size(co->co_consts)); - if (co_const_copy == NULL) { - goto abstract_error; - } - // Copy over the co_const tuple - for (int x = 0; x < PyTuple_GET_SIZE(co->co_consts); x++) { - PyList_SET_ITEM(co_const_copy, x, Py_NewRef(PyTuple_GET_ITEM(co->co_consts, x))); - } - - int oparg; - int opcode; - bool *stack_virtual_or_real = ctx->stack_virtual_or_real; - - _Py_PARTITIONNODE_t **stack_pointer = &ctx->stack_pointer; - _Py_PARTITIONNODE_t *stack = ctx->stack; - _Py_PARTITIONNODE_t *locals = ctx->locals; - for (int i = 0; i < trace_len; i++) { - oparg = trace[i].oparg; - opcode = trace[i].opcode; - - // Is a special jump/target ID, decode that - if (opcode < 0 && opcode > max_jump_id) { - DPRINTF(2, "Special jump target/ID %d\n", opcode); - oparg = jump_id_to_instruction[-opcode].oparg; - opcode = jump_id_to_instruction[-opcode].opcode; - } - - // Partial evaluation - the partition nodes already gave us the static-dynamic variable split. - // For partial evaluation, we simply need to follow these rules: - // 1. Operations on dynamic variables need to be emitted. - // If an operand was previously partially evaluated and not yet emitted, then emit the residual with a LOAD_CONST. - // 2. Operations on static variables are a no-op as the abstract interpreter already analyzed their results. - - // For all stack inputs, are their variables static? - int num_inputs = _PyOpcode_num_popped(opcode, oparg, false); - int num_dynamic_operands = 0; - - // We need to also check if this operation is "pure". That it can accept - // constant nodes, output constant nodes, and does not cause any side effects. - bool should_emit = !op_is_pure(opcode, oparg, locals); - - int virtual_objects = 0; - assert(num_inputs >= 0); - for (int x = num_inputs; x > 0; x--) { - if (!partitionnode_is_static(PEEK(x))) { - should_emit = true; - num_dynamic_operands++; - } - if (stack_virtual_or_real[STACK_LEVEL() - num_inputs]) { - virtual_objects++; - } - } - - int num_static_operands = num_inputs - num_dynamic_operands; - - assert(num_static_operands >= 0); - - - if (should_emit) { - if (num_static_operands > 0) { - int real_stack_size = num_dynamic_operands; - int virtual_stack_size = (int)(ctx->stack_pointer - ctx->stack); - assert(virtual_stack_size >= real_stack_size); - for (int x = num_inputs; x > 0; x--) { - // Re-materialise all virtual (partially-evaluated) constants - if (partitionnode_is_static(PEEK(x)) && stack_virtual_or_real[STACK_LEVEL() - x]) { - stack_virtual_or_real[STACK_LEVEL() - x] = false; - PyObject *const_val = get_const(PEEK(x)); - _PyUOpInstruction load_const; - load_const.opcode = LOAD_CONST; - load_const.oparg = (int)PyList_GET_SIZE(co_const_copy); - if (PyList_Append(co_const_copy, const_val) < 0) { - goto abstract_error; - } - - DPRINTF(2, "Emitting LOAD_CONST\n"); - - temp_writebuffer[buffer_trace_len] = load_const; - buffer_trace_len++; - - - // INSERT to the correct position in the stack - int target_entry = virtual_stack_size - x; - - int offset_from_target = real_stack_size - target_entry; - if (offset_from_target > 0) { - _PyUOpInstruction insert; - insert.opcode = INSERT; - insert.oparg = offset_from_target; - - DPRINTF(2, "Emitting INSERT %d\n", offset_from_target); - - temp_writebuffer[buffer_trace_len] = insert; - buffer_trace_len++; - } - - // Use the next SAVE_IP - int temp = i; - for (; trace[temp].opcode != SAVE_IP && temp < trace_len; temp++); - assert(trace[temp].opcode == SAVE_IP); - - DPRINTF(2, "Emitting SAVE_IP\n"); - - temp_writebuffer[buffer_trace_len] = trace[temp]; - buffer_trace_len++; - num_dynamic_operands++; - } - - } - } - - DPRINTF(2, "Emitting %s\n", (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode]); - - temp_writebuffer[buffer_trace_len] = trace[i]; - buffer_trace_len++; - } - /* - * The following are special cased: - * @TODO: shift these to the DSL - */ - - - DPRINTF(2, " [-] Type propagating across: %s{%d} : %d. {reader: %d, writer: %d}\n", - (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode], - opcode, oparg, - i, buffer_trace_len); - - switch (opcode) { -#include "abstract_interp_cases.c.h" - // @TODO convert these to autogenerated using DSL - case LOAD_FAST: - case LOAD_FAST_CHECK: - STACK_GROW(1); - PARTITIONNODE_OVERWRITE(GETLOCAL(oparg), PEEK(1), false); - break; - case LOAD_FAST_AND_CLEAR: { - STACK_GROW(1); - PARTITIONNODE_OVERWRITE(GETLOCAL(oparg), PEEK(1), false); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, GETLOCAL(oparg), true); - break; - } - case LOAD_CONST: { - _Py_PARTITIONNODE_t* value = (_Py_PARTITIONNODE_t *)MAKE_STATIC_ROOT(GETITEM(co_const_copy, oparg)); - STACK_GROW(1); - PARTITIONNODE_OVERWRITE(value, PEEK(1), true); - break; - } - case STORE_FAST: - case STORE_FAST_MAYBE_NULL: { - _Py_PARTITIONNODE_t *value = PEEK(1); - PARTITIONNODE_OVERWRITE(value, GETLOCAL(oparg), false); - STACK_SHRINK(1); - break; - } - case COPY: { - _Py_PARTITIONNODE_t *bottom = PEEK(1 + (oparg - 1)); - STACK_GROW(1); - PARTITIONNODE_OVERWRITE(bottom, PEEK(1), false); - break; - } - - // Arithmetic operations - - case _BINARY_OP_MULTIPLY_INT: { - if (!should_emit) { - PyObject *right; - PyObject *left; - PyObject *res; - right = get_const(PEEK(1)); - left = get_const(PEEK(2)); - STAT_INC(BINARY_OP, hit); - res = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); - if (res == NULL) goto abstract_error; - STACK_SHRINK(1); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)MAKE_STATIC_ROOT(res), PEEK(-(-1)), true); - break; - } - else { - STACK_SHRINK(1); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); - break; - } - - } - - case _BINARY_OP_ADD_INT: { - if (!should_emit) { - PyObject *right; - PyObject *left; - PyObject *res; - right = get_const(PEEK(1)); - left = get_const(PEEK(2)); - STAT_INC(BINARY_OP, hit); - res = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); - if (res == NULL) goto abstract_error; - STACK_SHRINK(1); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)MAKE_STATIC_ROOT(res), PEEK(-(-1)), true); - break; - } - else { - STACK_SHRINK(1); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); - break; - } - } - - case _BINARY_OP_SUBTRACT_INT: { - if (!should_emit) { - PyObject *right; - PyObject *left; - PyObject *res; - right = get_const(PEEK(1)); - left = get_const(PEEK(2)); - STAT_INC(BINARY_OP, hit); - res = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); - if (res == NULL) goto abstract_error; - STACK_SHRINK(1); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)MAKE_STATIC_ROOT(res), PEEK(-(-1)), true); - break; - } - else { - STACK_SHRINK(1); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); - break; - } - } - default: - DPRINTF(1, "Unknown opcode in abstract interpreter\n"); - Py_UNREACHABLE(); - } - -#ifdef Py_DEBUG - print_ctx(ctx); -#endif - - // Mark all stack outputs as virtual or real - int stack_outputs = _PyOpcode_num_pushed(opcode, oparg, false); - for (int y = stack_outputs; y > 0; y--) { - stack_virtual_or_real[STACK_LEVEL() - y] = !should_emit; - } - - if (opcode == EXIT_TRACE) { - // Copy the rest of the stubs over, then end. - - DPRINTF(2, "Exit trace encountered, emitting the rest of the stubs\n"); - - i++; // We've already emitted an EXIT_TRACE - for (; i < trace_len; i++) { - - DPRINTF(2, "Emitting %s\n", (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode]); - - temp_writebuffer[buffer_trace_len] = trace[i]; - buffer_trace_len++; - } - break; - } - } - assert(STACK_SIZE() >= 0); - buffer_trace_len = remove_duplicate_save_ips(temp_writebuffer, buffer_trace_len); - fix_jump_side_exits(temp_writebuffer, buffer_trace_len, jump_id_to_instruction, max_jump_id); - assert(buffer_trace_len <= trace_len); - -#ifdef Py_DEBUG - if (buffer_trace_len < trace_len) { - DPRINTF(2, "Shortened trace by %d instructions\n", trace_len - buffer_trace_len); - } -#endif - - Py_DECREF(ctx); - - PyObject *co_const_final = PyTuple_New(PyList_Size(co_const_copy)); - if (co_const_final == NULL) { - goto abstract_error; - } - // Copy over the co_const tuple - for (int x = 0; x < PyList_GET_SIZE(co_const_copy); x++) { - PyTuple_SET_ITEM(co_const_final, x, Py_NewRef(PyList_GET_ITEM(co_const_copy, x))); - } - - - Py_SETREF(co->co_consts, co_const_final); - Py_XDECREF(co_const_copy); - memcpy(trace, temp_writebuffer, buffer_trace_len * sizeof(_PyUOpInstruction)); - PyMem_Free(temp_writebuffer); - PyMem_Free(jump_id_to_instruction); - return buffer_trace_len; - -abstract_error: - Py_XDECREF(co_const_copy); - Py_DECREF(ctx); - PyMem_Free(temp_writebuffer); - PyMem_Free(jump_id_to_instruction); - assert(PyErr_Occurred()); - PyErr_Clear(); return trace_len; } From 80c7f1826d6c051d3a0648ef945e4ac65a7e9b98 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 15 Aug 2023 17:31:07 +0800 Subject: [PATCH 47/48] revert more changes --- Include/internal/pycore_uops.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_uops.h b/Include/internal/pycore_uops.h index 30b87e43a3f5d5..254eeca2361bea 100644 --- a/Include/internal/pycore_uops.h +++ b/Include/internal/pycore_uops.h @@ -11,8 +11,8 @@ extern "C" { #define _Py_UOP_MAX_TRACE_LENGTH 64 typedef struct { - int32_t opcode; - int32_t oparg; + uint32_t opcode; + uint32_t oparg; uint64_t operand; // A cache entry } _PyUOpInstruction; From 6a2b204ef97f60cd4d095ab6cbd8c12d2dbc6bdf Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 16 Aug 2023 01:23:07 +0800 Subject: [PATCH 48/48] use memmove --- Python/bytecodes.c | 4 +--- Python/executor_cases.c.h | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 9004472e88f2b7..e9a5cf59e7d689 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3745,9 +3745,7 @@ dummy_func( op(INSERT, (unused[oparg], top -- top, unused[oparg])) { // Inserts TOS at position specified by oparg; - for (int i = 1; i < oparg + 1; i++) { - stack_pointer[-i] = stack_pointer[-(i - 1)]; - } + memmove(&stack_pointer[-1 - oparg], &stack_pointer[-oparg], oparg * sizeof(stack_pointer[0])); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 5caf6a52ede352..85d27777423abd 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2738,9 +2738,7 @@ PyObject *top; top = stack_pointer[-1]; // Inserts TOS at position specified by oparg; - for (int i = 1; i < oparg + 1; i++) { - stack_pointer[-i] = stack_pointer[-(i - 1)]; - } + memmove(&stack_pointer[-1 - oparg], &stack_pointer[-oparg], oparg * sizeof(stack_pointer[0])); stack_pointer[-1 - oparg] = top; break; }