gh-128563: Move GO_TO_INSTRUCTION and PREDICT to cases generator (GH-129115)
https://github.com/python/cpython/commit/86c1a60d5a28cfb51f8843b307f8969c40e... commit: 86c1a60d5a28cfb51f8843b307f8969c40e3bbec branch: main author: Ken Jin <kenjin@python.org> committer: Fidget-Spinner <kenjin4096@gmail.com> date: 2025-01-22T09:22:25+08:00 summary: gh-128563: Move GO_TO_INSTRUCTION and PREDICT to cases generator (GH-129115) files: M Lib/test/test_generated_cases.py M Python/ceval_macros.h M Python/generated_cases.c.h M Tools/cases_generator/generators_common.py M Tools/cases_generator/tier1_generator.py diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index 4b17bd5c797375..c93d6fc3d237a2 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -412,7 +412,7 @@ def test_predictions(self): frame->instr_ptr = next_instr; next_instr += 1; INSTRUCTION_STATS(OP1); - PREDICTED(OP1); + PREDICTED_OP1:; _PyStackRef res; res = Py_None; stack_pointer[-1] = res; @@ -646,7 +646,7 @@ def test_macro_instruction(self): frame->instr_ptr = next_instr; next_instr += 6; INSTRUCTION_STATS(OP); - PREDICTED(OP); + PREDICTED_OP:; _Py_CODEUNIT* const this_instr = next_instr - 6; (void)this_instr; _PyStackRef left; diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index feaac07fe435b6..62c80c96e422fd 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -166,35 +166,6 @@ GETITEM(PyObject *v, Py_ssize_t i) { #define JUMPBY(x) (next_instr += (x)) #define SKIP_OVER(x) (next_instr += (x)) -/* OpCode prediction macros - Some opcodes tend to come in pairs thus making it possible to - predict the second code when the first is run. For example, - COMPARE_OP is often followed by POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE. - - Verifying the prediction costs a single high-speed test of a register - variable against a constant. If the pairing was good, then the - processor's own internal branch predication has a high likelihood of - success, resulting in a nearly zero-overhead transition to the - next opcode. A successful prediction saves a trip through the eval-loop - including its unpredictable switch-case branch. Combined with the - processor's internal branch prediction, a successful PREDICT has the - effect of making the two opcodes run as if they were a single new opcode - with the bodies combined. - - If collecting opcode statistics, your choices are to either keep the - predictions turned-on and interpret the results as if some opcodes - had been combined or turn-off predictions so that the opcode frequency - counter updates for both opcodes. - - Opcode prediction is disabled with threaded code, since the latter allows - the CPU to record separate branch prediction information for each - opcode. - -*/ - -#define PREDICT_ID(op) PRED_##op -#define PREDICTED(op) PREDICT_ID(op): - /* Stack manipulation macros */ @@ -260,8 +231,6 @@ GETITEM(PyObject *v, Py_ssize_t i) { GETLOCAL(i) = value; \ PyStackRef_XCLOSE(tmp); } while (0) -#define GO_TO_INSTRUCTION(op) goto PREDICT_ID(op) - #ifdef Py_STATS #define UPDATE_MISS_STATS(INSTNAME) \ do { \ @@ -281,7 +250,7 @@ GETITEM(PyObject *v, Py_ssize_t i) { /* This is only a single jump on release builds! */ \ UPDATE_MISS_STATS((INSTNAME)); \ assert(_PyOpcode_Deopt[opcode] == (INSTNAME)); \ - GO_TO_INSTRUCTION(INSTNAME); \ + goto PREDICTED_##INSTNAME; \ } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index a8dbcabde538d5..d0ab667a8bc8ab 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -13,7 +13,7 @@ frame->instr_ptr = next_instr; next_instr += 6; INSTRUCTION_STATS(BINARY_OP); - PREDICTED(BINARY_OP); + PREDICTED_BINARY_OP:; _Py_CODEUNIT* const this_instr = next_instr - 6; (void)this_instr; _PyStackRef lhs; @@ -484,7 +484,7 @@ frame->instr_ptr = next_instr; next_instr += 2; INSTRUCTION_STATS(BINARY_SUBSCR); - PREDICTED(BINARY_SUBSCR); + PREDICTED_BINARY_SUBSCR:; _Py_CODEUNIT* const this_instr = next_instr - 2; (void)this_instr; _PyStackRef container; @@ -931,7 +931,7 @@ frame->instr_ptr = next_instr; next_instr += 4; INSTRUCTION_STATS(CALL); - PREDICTED(CALL); + PREDICTED_CALL:; _Py_CODEUNIT* const this_instr = next_instr - 4; (void)this_instr; _PyStackRef *callable; @@ -1707,7 +1707,7 @@ frame->instr_ptr = next_instr; next_instr += 1; INSTRUCTION_STATS(CALL_FUNCTION_EX); - PREDICTED(CALL_FUNCTION_EX); + PREDICTED_CALL_FUNCTION_EX:; _Py_CODEUNIT* const this_instr = next_instr - 1; (void)this_instr; _PyStackRef func; @@ -1960,7 +1960,7 @@ frame->instr_ptr = next_instr; next_instr += 4; INSTRUCTION_STATS(CALL_KW); - PREDICTED(CALL_KW); + PREDICTED_CALL_KW:; _Py_CODEUNIT* const this_instr = next_instr - 4; (void)this_instr; _PyStackRef *callable; @@ -3299,7 +3299,7 @@ frame->instr_ptr = next_instr; next_instr += 2; INSTRUCTION_STATS(COMPARE_OP); - PREDICTED(COMPARE_OP); + PREDICTED_COMPARE_OP:; _Py_CODEUNIT* const this_instr = next_instr - 2; (void)this_instr; _PyStackRef left; @@ -3479,7 +3479,7 @@ frame->instr_ptr = next_instr; next_instr += 2; INSTRUCTION_STATS(CONTAINS_OP); - PREDICTED(CONTAINS_OP); + PREDICTED_CONTAINS_OP:; _Py_CODEUNIT* const this_instr = next_instr - 2; (void)this_instr; _PyStackRef left; @@ -4000,7 +4000,7 @@ frame->instr_ptr = next_instr; next_instr += 2; INSTRUCTION_STATS(FOR_ITER); - PREDICTED(FOR_ITER); + PREDICTED_FOR_ITER:; _Py_CODEUNIT* const this_instr = next_instr - 2; (void)this_instr; _PyStackRef iter; @@ -4631,7 +4631,8 @@ frame->instr_ptr = next_instr; next_instr += 1; INSTRUCTION_STATS(INSTRUMENTED_CALL_FUNCTION_EX); - GO_TO_INSTRUCTION(CALL_FUNCTION_EX); + + goto PREDICTED_CALL_FUNCTION_EX; } TARGET(INSTRUMENTED_CALL_KW) { @@ -4655,7 +4656,7 @@ stack_pointer = _PyFrame_GetStackPointer(frame); if (err) goto error; PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter); - GO_TO_INSTRUCTION(CALL_KW); + goto PREDICTED_CALL_KW; } TARGET(INSTRUMENTED_END_FOR) { @@ -4846,7 +4847,7 @@ // cancel out the decrement that will happen in LOAD_SUPER_ATTR; we // don't want to specialize instrumented instructions PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter); - GO_TO_INSTRUCTION(LOAD_SUPER_ATTR); + goto PREDICTED_LOAD_SUPER_ATTR; } TARGET(INSTRUMENTED_LOAD_SUPER_METHOD) { @@ -4858,7 +4859,7 @@ // cancel out the decrement that will happen in LOAD_SUPER_ATTR; we // don't want to specialize instrumented instructions PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter); - GO_TO_INSTRUCTION(LOAD_SUPER_METHOD); + goto PREDICTED_LOAD_SUPER_METHOD; } TARGET(INSTRUMENTED_NOT_TAKEN) { @@ -5309,7 +5310,7 @@ frame->instr_ptr = next_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR); - PREDICTED(LOAD_ATTR); + PREDICTED_LOAD_ATTR:; _Py_CODEUNIT* const this_instr = next_instr - 10; (void)this_instr; _PyStackRef owner; @@ -5826,7 +5827,7 @@ frame->instr_ptr = next_instr; next_instr += 1; INSTRUCTION_STATS(LOAD_CONST); - PREDICTED(LOAD_CONST); + PREDICTED_LOAD_CONST:; _Py_CODEUNIT* const this_instr = next_instr - 1; (void)this_instr; _PyStackRef value; @@ -6069,7 +6070,7 @@ frame->instr_ptr = next_instr; next_instr += 5; INSTRUCTION_STATS(LOAD_GLOBAL); - PREDICTED(LOAD_GLOBAL); + PREDICTED_LOAD_GLOBAL:; _Py_CODEUNIT* const this_instr = next_instr - 5; (void)this_instr; _PyStackRef *res; @@ -6219,7 +6220,7 @@ frame->instr_ptr = next_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_METHOD); - PREDICTED(LOAD_METHOD); + PREDICTED_LOAD_METHOD:; _Py_CODEUNIT* const this_instr = next_instr - 10; (void)this_instr; _PyStackRef owner; @@ -6479,7 +6480,7 @@ frame->instr_ptr = next_instr; next_instr += 2; INSTRUCTION_STATS(LOAD_SUPER_ATTR); - PREDICTED(LOAD_SUPER_ATTR); + PREDICTED_LOAD_SUPER_ATTR:; _Py_CODEUNIT* const this_instr = next_instr - 2; (void)this_instr; _PyStackRef global_super_st; @@ -6609,7 +6610,7 @@ frame->instr_ptr = next_instr; next_instr += 2; INSTRUCTION_STATS(LOAD_SUPER_METHOD); - PREDICTED(LOAD_SUPER_METHOD); + PREDICTED_LOAD_SUPER_METHOD:; _Py_CODEUNIT* const this_instr = next_instr - 2; (void)this_instr; _PyStackRef global_super_st; @@ -7179,7 +7180,7 @@ frame->instr_ptr = next_instr; next_instr += 1; INSTRUCTION_STATS(RESUME); - PREDICTED(RESUME); + PREDICTED_RESUME:; _Py_CODEUNIT* const this_instr = next_instr - 1; (void)this_instr; // _LOAD_BYTECODE @@ -7330,7 +7331,7 @@ frame->instr_ptr = next_instr; next_instr += 2; INSTRUCTION_STATS(SEND); - PREDICTED(SEND); + PREDICTED_SEND:; _Py_CODEUNIT* const this_instr = next_instr - 2; (void)this_instr; _PyStackRef receiver; @@ -7571,7 +7572,7 @@ frame->instr_ptr = next_instr; next_instr += 5; INSTRUCTION_STATS(STORE_ATTR); - PREDICTED(STORE_ATTR); + PREDICTED_STORE_ATTR:; _Py_CODEUNIT* const this_instr = next_instr - 5; (void)this_instr; _PyStackRef owner; @@ -7935,7 +7936,7 @@ frame->instr_ptr = next_instr; next_instr += 2; INSTRUCTION_STATS(STORE_SUBSCR); - PREDICTED(STORE_SUBSCR); + PREDICTED_STORE_SUBSCR:; _Py_CODEUNIT* const this_instr = next_instr - 2; (void)this_instr; _PyStackRef container; @@ -8065,7 +8066,7 @@ frame->instr_ptr = next_instr; next_instr += 4; INSTRUCTION_STATS(TO_BOOL); - PREDICTED(TO_BOOL); + PREDICTED_TO_BOOL:; _Py_CODEUNIT* const this_instr = next_instr - 4; (void)this_instr; _PyStackRef value; @@ -8301,7 +8302,7 @@ frame->instr_ptr = next_instr; next_instr += 2; INSTRUCTION_STATS(UNPACK_SEQUENCE); - PREDICTED(UNPACK_SEQUENCE); + PREDICTED_UNPACK_SEQUENCE:; _Py_CODEUNIT* const this_instr = next_instr - 2; (void)this_instr; _PyStackRef seq; diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index c441569b7e70dc..9edf3d4898eb4d 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -127,6 +127,7 @@ def __init__(self, out: CWriter): "DISPATCH": self.dispatch, "INSTRUCTION_SIZE": self.instruction_size, "POP_INPUT": self.pop_input, + "GO_TO_INSTRUCTION": self.go_to_instruction, } self.out = out @@ -402,6 +403,23 @@ def sync_sp( self._print_storage(storage) return True + def go_to_instruction( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: Uop, + storage: Storage, + inst: Instruction | None, + ) -> bool: + next(tkn_iter) + name = next(tkn_iter) + next(tkn_iter) + next(tkn_iter) + assert name.kind == "IDENTIFIER" + self.emit("\n") + self.emit(f"goto PREDICTED_{name.text};\n") + return True + def emit_save(self, storage: Storage) -> None: storage.save(self.out) self._print_storage(storage) diff --git a/Tools/cases_generator/tier1_generator.py b/Tools/cases_generator/tier1_generator.py index 95876c387bd745..f4409da3d83ccd 100644 --- a/Tools/cases_generator/tier1_generator.py +++ b/Tools/cases_generator/tier1_generator.py @@ -158,7 +158,7 @@ def generate_tier1( out.emit(f"next_instr += {inst.size};\n") out.emit(f"INSTRUCTION_STATS({name});\n") if inst.is_target: - out.emit(f"PREDICTED({name});\n") + out.emit(f"PREDICTED_{name}:;\n") if needs_this: out.emit(f"_Py_CODEUNIT* const this_instr = next_instr - {inst.size};\n") out.emit(unused_guard)
participants (1)
-
Fidget-Spinner