diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index be582122118e44..e13b2b373c47b1 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -198,6 +198,9 @@ struct _ts { PyObject *current_executor; + /* Internal to the JIT */ + struct _PyExitData *jit_exit; + uint64_t dict_global_version; /* Used to store/retrieve `threading.local` keys/values for this thread */ diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index f1f427d99dea69..dbf702d27762b1 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -944,6 +944,7 @@ struct _is { bool jit; struct _PyExecutorObject *executor_list_head; struct _PyExecutorObject *executor_deletion_list_head; + struct _PyExecutorObject *cold_executor; int executor_deletion_list_remaining_capacity; size_t trace_run_counter; _rare_events rare_events; diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 8b7f12bf03d624..f644551c48fe5b 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -67,8 +67,9 @@ typedef struct { #endif } _PyUOpInstruction; -typedef struct { +typedef struct _PyExitData { uint32_t target; + uint16_t index; _Py_BackoffCounter temperature; struct _PyExecutorObject *executor; } _PyExitData; @@ -354,6 +355,14 @@ PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored); PyAPI_FUNC(int) _PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, _PyExecutorObject **exec_ptr, int chain_depth); +static inline _PyExecutorObject *_PyExecutor_FromExit(_PyExitData *exit) +{ + _PyExitData *exit0 = exit - exit->index; + return (_PyExecutorObject *)(((char *)exit0) - offsetof(_PyExecutorObject, exits)); +} + +extern _PyExecutorObject *_PyExecutor_GetColdExecutor(void); + static inline int is_terminator(const _PyUOpInstruction *uop) { int opcode = uop->opcode; diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index a9432401525ebb..684969a23c4d22 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -81,99 +81,100 @@ extern "C" { #define _CHECK_STACK_SPACE 357 #define _CHECK_STACK_SPACE_OPERAND 358 #define _CHECK_VALIDITY 359 -#define _COMPARE_OP 360 -#define _COMPARE_OP_FLOAT 361 -#define _COMPARE_OP_INT 362 -#define _COMPARE_OP_STR 363 -#define _CONTAINS_OP 364 -#define _CONTAINS_OP_DICT 365 -#define _CONTAINS_OP_SET 366 +#define _COLD_EXIT 360 +#define _COMPARE_OP 361 +#define _COMPARE_OP_FLOAT 362 +#define _COMPARE_OP_INT 363 +#define _COMPARE_OP_STR 364 +#define _CONTAINS_OP 365 +#define _CONTAINS_OP_DICT 366 +#define _CONTAINS_OP_SET 367 #define _CONVERT_VALUE CONVERT_VALUE -#define _COPY 367 -#define _COPY_1 368 -#define _COPY_2 369 -#define _COPY_3 370 +#define _COPY 368 +#define _COPY_1 369 +#define _COPY_2 370 +#define _COPY_3 371 #define _COPY_FREE_VARS COPY_FREE_VARS -#define _CREATE_INIT_FRAME 371 +#define _CREATE_INIT_FRAME 372 #define _DELETE_ATTR DELETE_ATTR #define _DELETE_DEREF DELETE_DEREF #define _DELETE_FAST DELETE_FAST #define _DELETE_GLOBAL DELETE_GLOBAL #define _DELETE_NAME DELETE_NAME #define _DELETE_SUBSCR DELETE_SUBSCR -#define _DEOPT 372 +#define _DEOPT 373 #define _DICT_MERGE DICT_MERGE #define _DICT_UPDATE DICT_UPDATE -#define _DO_CALL 373 -#define _DO_CALL_FUNCTION_EX 374 -#define _DO_CALL_KW 375 +#define _DO_CALL 374 +#define _DO_CALL_FUNCTION_EX 375 +#define _DO_CALL_KW 376 #define _END_FOR END_FOR #define _END_SEND END_SEND -#define _ERROR_POP_N 376 +#define _ERROR_POP_N 377 #define _EXIT_INIT_CHECK EXIT_INIT_CHECK -#define _EXPAND_METHOD 377 -#define _EXPAND_METHOD_KW 378 -#define _FATAL_ERROR 379 +#define _EXPAND_METHOD 378 +#define _EXPAND_METHOD_KW 379 +#define _FATAL_ERROR 380 #define _FORMAT_SIMPLE FORMAT_SIMPLE #define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC -#define _FOR_ITER 380 -#define _FOR_ITER_GEN_FRAME 381 -#define _FOR_ITER_TIER_TWO 382 +#define _FOR_ITER 381 +#define _FOR_ITER_GEN_FRAME 382 +#define _FOR_ITER_TIER_TWO 383 #define _GET_AITER GET_AITER #define _GET_ANEXT GET_ANEXT #define _GET_AWAITABLE GET_AWAITABLE #define _GET_ITER GET_ITER #define _GET_LEN GET_LEN #define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER -#define _GUARD_BINARY_OP_EXTEND 383 -#define _GUARD_CALLABLE_ISINSTANCE 384 -#define _GUARD_CALLABLE_LEN 385 -#define _GUARD_CALLABLE_LIST_APPEND 386 -#define _GUARD_CALLABLE_STR_1 387 -#define _GUARD_CALLABLE_TUPLE_1 388 -#define _GUARD_CALLABLE_TYPE_1 389 -#define _GUARD_DORV_NO_DICT 390 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 391 -#define _GUARD_GLOBALS_VERSION 392 -#define _GUARD_IS_FALSE_POP 393 -#define _GUARD_IS_NONE_POP 394 -#define _GUARD_IS_NOT_NONE_POP 395 -#define _GUARD_IS_TRUE_POP 396 -#define _GUARD_KEYS_VERSION 397 -#define _GUARD_NOS_DICT 398 -#define _GUARD_NOS_FLOAT 399 -#define _GUARD_NOS_INT 400 -#define _GUARD_NOS_LIST 401 -#define _GUARD_NOS_NOT_NULL 402 -#define _GUARD_NOS_NULL 403 -#define _GUARD_NOS_OVERFLOWED 404 -#define _GUARD_NOS_TUPLE 405 -#define _GUARD_NOS_UNICODE 406 -#define _GUARD_NOT_EXHAUSTED_LIST 407 -#define _GUARD_NOT_EXHAUSTED_RANGE 408 -#define _GUARD_NOT_EXHAUSTED_TUPLE 409 -#define _GUARD_THIRD_NULL 410 -#define _GUARD_TOS_ANY_SET 411 -#define _GUARD_TOS_DICT 412 -#define _GUARD_TOS_FLOAT 413 -#define _GUARD_TOS_INT 414 -#define _GUARD_TOS_LIST 415 -#define _GUARD_TOS_OVERFLOWED 416 -#define _GUARD_TOS_SLICE 417 -#define _GUARD_TOS_TUPLE 418 -#define _GUARD_TOS_UNICODE 419 -#define _GUARD_TYPE_VERSION 420 -#define _GUARD_TYPE_VERSION_AND_LOCK 421 +#define _GUARD_BINARY_OP_EXTEND 384 +#define _GUARD_CALLABLE_ISINSTANCE 385 +#define _GUARD_CALLABLE_LEN 386 +#define _GUARD_CALLABLE_LIST_APPEND 387 +#define _GUARD_CALLABLE_STR_1 388 +#define _GUARD_CALLABLE_TUPLE_1 389 +#define _GUARD_CALLABLE_TYPE_1 390 +#define _GUARD_DORV_NO_DICT 391 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 392 +#define _GUARD_GLOBALS_VERSION 393 +#define _GUARD_IS_FALSE_POP 394 +#define _GUARD_IS_NONE_POP 395 +#define _GUARD_IS_NOT_NONE_POP 396 +#define _GUARD_IS_TRUE_POP 397 +#define _GUARD_KEYS_VERSION 398 +#define _GUARD_NOS_DICT 399 +#define _GUARD_NOS_FLOAT 400 +#define _GUARD_NOS_INT 401 +#define _GUARD_NOS_LIST 402 +#define _GUARD_NOS_NOT_NULL 403 +#define _GUARD_NOS_NULL 404 +#define _GUARD_NOS_OVERFLOWED 405 +#define _GUARD_NOS_TUPLE 406 +#define _GUARD_NOS_UNICODE 407 +#define _GUARD_NOT_EXHAUSTED_LIST 408 +#define _GUARD_NOT_EXHAUSTED_RANGE 409 +#define _GUARD_NOT_EXHAUSTED_TUPLE 410 +#define _GUARD_THIRD_NULL 411 +#define _GUARD_TOS_ANY_SET 412 +#define _GUARD_TOS_DICT 413 +#define _GUARD_TOS_FLOAT 414 +#define _GUARD_TOS_INT 415 +#define _GUARD_TOS_LIST 416 +#define _GUARD_TOS_OVERFLOWED 417 +#define _GUARD_TOS_SLICE 418 +#define _GUARD_TOS_TUPLE 419 +#define _GUARD_TOS_UNICODE 420 +#define _GUARD_TYPE_VERSION 421 +#define _GUARD_TYPE_VERSION_AND_LOCK 422 #define _IMPORT_FROM IMPORT_FROM #define _IMPORT_NAME IMPORT_NAME -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 422 -#define _INIT_CALL_PY_EXACT_ARGS 423 -#define _INIT_CALL_PY_EXACT_ARGS_0 424 -#define _INIT_CALL_PY_EXACT_ARGS_1 425 -#define _INIT_CALL_PY_EXACT_ARGS_2 426 -#define _INIT_CALL_PY_EXACT_ARGS_3 427 -#define _INIT_CALL_PY_EXACT_ARGS_4 428 -#define _INSERT_NULL 429 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 423 +#define _INIT_CALL_PY_EXACT_ARGS 424 +#define _INIT_CALL_PY_EXACT_ARGS_0 425 +#define _INIT_CALL_PY_EXACT_ARGS_1 426 +#define _INIT_CALL_PY_EXACT_ARGS_2 427 +#define _INIT_CALL_PY_EXACT_ARGS_3 428 +#define _INIT_CALL_PY_EXACT_ARGS_4 429 +#define _INSERT_NULL 430 #define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER #define _INSTRUMENTED_INSTRUCTION INSTRUMENTED_INSTRUCTION #define _INSTRUMENTED_JUMP_FORWARD INSTRUMENTED_JUMP_FORWARD @@ -183,177 +184,177 @@ extern "C" { #define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE #define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE #define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE -#define _IS_NONE 430 +#define _IS_NONE 431 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 431 -#define _ITER_CHECK_RANGE 432 -#define _ITER_CHECK_TUPLE 433 -#define _ITER_JUMP_LIST 434 -#define _ITER_JUMP_RANGE 435 -#define _ITER_JUMP_TUPLE 436 -#define _ITER_NEXT_LIST 437 -#define _ITER_NEXT_LIST_TIER_TWO 438 -#define _ITER_NEXT_RANGE 439 -#define _ITER_NEXT_TUPLE 440 -#define _JUMP_TO_TOP 441 +#define _ITER_CHECK_LIST 432 +#define _ITER_CHECK_RANGE 433 +#define _ITER_CHECK_TUPLE 434 +#define _ITER_JUMP_LIST 435 +#define _ITER_JUMP_RANGE 436 +#define _ITER_JUMP_TUPLE 437 +#define _ITER_NEXT_LIST 438 +#define _ITER_NEXT_LIST_TIER_TWO 439 +#define _ITER_NEXT_RANGE 440 +#define _ITER_NEXT_TUPLE 441 +#define _JUMP_TO_TOP 442 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 442 -#define _LOAD_ATTR_CLASS 443 +#define _LOAD_ATTR 443 +#define _LOAD_ATTR_CLASS 444 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 444 -#define _LOAD_ATTR_METHOD_LAZY_DICT 445 -#define _LOAD_ATTR_METHOD_NO_DICT 446 -#define _LOAD_ATTR_METHOD_WITH_VALUES 447 -#define _LOAD_ATTR_MODULE 448 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 449 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 450 -#define _LOAD_ATTR_PROPERTY_FRAME 451 -#define _LOAD_ATTR_SLOT 452 -#define _LOAD_ATTR_WITH_HINT 453 +#define _LOAD_ATTR_INSTANCE_VALUE 445 +#define _LOAD_ATTR_METHOD_LAZY_DICT 446 +#define _LOAD_ATTR_METHOD_NO_DICT 447 +#define _LOAD_ATTR_METHOD_WITH_VALUES 448 +#define _LOAD_ATTR_MODULE 449 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 450 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 451 +#define _LOAD_ATTR_PROPERTY_FRAME 452 +#define _LOAD_ATTR_SLOT 453 +#define _LOAD_ATTR_WITH_HINT 454 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS -#define _LOAD_BYTECODE 454 +#define _LOAD_BYTECODE 455 #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 455 -#define _LOAD_CONST_INLINE_BORROW 456 -#define _LOAD_CONST_UNDER_INLINE 457 -#define _LOAD_CONST_UNDER_INLINE_BORROW 458 +#define _LOAD_CONST_INLINE 456 +#define _LOAD_CONST_INLINE_BORROW 457 +#define _LOAD_CONST_UNDER_INLINE 458 +#define _LOAD_CONST_UNDER_INLINE_BORROW 459 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 459 -#define _LOAD_FAST_0 460 -#define _LOAD_FAST_1 461 -#define _LOAD_FAST_2 462 -#define _LOAD_FAST_3 463 -#define _LOAD_FAST_4 464 -#define _LOAD_FAST_5 465 -#define _LOAD_FAST_6 466 -#define _LOAD_FAST_7 467 +#define _LOAD_FAST 460 +#define _LOAD_FAST_0 461 +#define _LOAD_FAST_1 462 +#define _LOAD_FAST_2 463 +#define _LOAD_FAST_3 464 +#define _LOAD_FAST_4 465 +#define _LOAD_FAST_5 466 +#define _LOAD_FAST_6 467 +#define _LOAD_FAST_7 468 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR -#define _LOAD_FAST_BORROW 468 -#define _LOAD_FAST_BORROW_0 469 -#define _LOAD_FAST_BORROW_1 470 -#define _LOAD_FAST_BORROW_2 471 -#define _LOAD_FAST_BORROW_3 472 -#define _LOAD_FAST_BORROW_4 473 -#define _LOAD_FAST_BORROW_5 474 -#define _LOAD_FAST_BORROW_6 475 -#define _LOAD_FAST_BORROW_7 476 +#define _LOAD_FAST_BORROW 469 +#define _LOAD_FAST_BORROW_0 470 +#define _LOAD_FAST_BORROW_1 471 +#define _LOAD_FAST_BORROW_2 472 +#define _LOAD_FAST_BORROW_3 473 +#define _LOAD_FAST_BORROW_4 474 +#define _LOAD_FAST_BORROW_5 475 +#define _LOAD_FAST_BORROW_6 476 +#define _LOAD_FAST_BORROW_7 477 #define _LOAD_FAST_BORROW_LOAD_FAST_BORROW LOAD_FAST_BORROW_LOAD_FAST_BORROW #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 477 -#define _LOAD_GLOBAL_BUILTINS 478 -#define _LOAD_GLOBAL_MODULE 479 +#define _LOAD_GLOBAL 478 +#define _LOAD_GLOBAL_BUILTINS 479 +#define _LOAD_GLOBAL_MODULE 480 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 480 -#define _LOAD_SMALL_INT_0 481 -#define _LOAD_SMALL_INT_1 482 -#define _LOAD_SMALL_INT_2 483 -#define _LOAD_SMALL_INT_3 484 -#define _LOAD_SPECIAL 485 +#define _LOAD_SMALL_INT 481 +#define _LOAD_SMALL_INT_0 482 +#define _LOAD_SMALL_INT_1 483 +#define _LOAD_SMALL_INT_2 484 +#define _LOAD_SMALL_INT_3 485 +#define _LOAD_SPECIAL 486 #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 486 +#define _MAKE_CALLARGS_A_TUPLE 487 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 487 +#define _MAKE_WARM 488 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 488 -#define _MAYBE_EXPAND_METHOD_KW 489 -#define _MONITOR_CALL 490 -#define _MONITOR_CALL_KW 491 -#define _MONITOR_JUMP_BACKWARD 492 -#define _MONITOR_RESUME 493 +#define _MAYBE_EXPAND_METHOD 489 +#define _MAYBE_EXPAND_METHOD_KW 490 +#define _MONITOR_CALL 491 +#define _MONITOR_CALL_KW 492 +#define _MONITOR_JUMP_BACKWARD 493 +#define _MONITOR_RESUME 494 #define _NOP NOP -#define _POP_CALL 494 -#define _POP_CALL_LOAD_CONST_INLINE_BORROW 495 -#define _POP_CALL_ONE 496 -#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 497 -#define _POP_CALL_TWO 498 -#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 499 +#define _POP_CALL 495 +#define _POP_CALL_LOAD_CONST_INLINE_BORROW 496 +#define _POP_CALL_ONE 497 +#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 498 +#define _POP_CALL_TWO 499 +#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 500 #define _POP_EXCEPT POP_EXCEPT #define _POP_ITER POP_ITER -#define _POP_JUMP_IF_FALSE 500 -#define _POP_JUMP_IF_TRUE 501 +#define _POP_JUMP_IF_FALSE 501 +#define _POP_JUMP_IF_TRUE 502 #define _POP_TOP POP_TOP -#define _POP_TOP_FLOAT 502 -#define _POP_TOP_INT 503 -#define _POP_TOP_LOAD_CONST_INLINE 504 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 505 -#define _POP_TOP_NOP 506 -#define _POP_TOP_UNICODE 507 -#define _POP_TWO 508 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 509 +#define _POP_TOP_FLOAT 503 +#define _POP_TOP_INT 504 +#define _POP_TOP_LOAD_CONST_INLINE 505 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 506 +#define _POP_TOP_NOP 507 +#define _POP_TOP_UNICODE 508 +#define _POP_TWO 509 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 510 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 510 +#define _PUSH_FRAME 511 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 511 -#define _PY_FRAME_GENERAL 512 -#define _PY_FRAME_KW 513 -#define _QUICKEN_RESUME 514 -#define _REPLACE_WITH_TRUE 515 +#define _PUSH_NULL_CONDITIONAL 512 +#define _PY_FRAME_GENERAL 513 +#define _PY_FRAME_KW 514 +#define _QUICKEN_RESUME 515 +#define _REPLACE_WITH_TRUE 516 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 516 -#define _SEND 517 -#define _SEND_GEN_FRAME 518 +#define _SAVE_RETURN_OFFSET 517 +#define _SEND 518 +#define _SEND_GEN_FRAME 519 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 519 -#define _STORE_ATTR 520 -#define _STORE_ATTR_INSTANCE_VALUE 521 -#define _STORE_ATTR_SLOT 522 -#define _STORE_ATTR_WITH_HINT 523 +#define _START_EXECUTOR 520 +#define _STORE_ATTR 521 +#define _STORE_ATTR_INSTANCE_VALUE 522 +#define _STORE_ATTR_SLOT 523 +#define _STORE_ATTR_WITH_HINT 524 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 524 -#define _STORE_FAST_0 525 -#define _STORE_FAST_1 526 -#define _STORE_FAST_2 527 -#define _STORE_FAST_3 528 -#define _STORE_FAST_4 529 -#define _STORE_FAST_5 530 -#define _STORE_FAST_6 531 -#define _STORE_FAST_7 532 +#define _STORE_FAST 525 +#define _STORE_FAST_0 526 +#define _STORE_FAST_1 527 +#define _STORE_FAST_2 528 +#define _STORE_FAST_3 529 +#define _STORE_FAST_4 530 +#define _STORE_FAST_5 531 +#define _STORE_FAST_6 532 +#define _STORE_FAST_7 533 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 533 -#define _STORE_SUBSCR 534 -#define _STORE_SUBSCR_DICT 535 -#define _STORE_SUBSCR_LIST_INT 536 -#define _SWAP 537 -#define _SWAP_2 538 -#define _SWAP_3 539 -#define _TIER2_RESUME_CHECK 540 -#define _TO_BOOL 541 +#define _STORE_SLICE 534 +#define _STORE_SUBSCR 535 +#define _STORE_SUBSCR_DICT 536 +#define _STORE_SUBSCR_LIST_INT 537 +#define _SWAP 538 +#define _SWAP_2 539 +#define _SWAP_3 540 +#define _TIER2_RESUME_CHECK 541 +#define _TO_BOOL 542 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 542 +#define _TO_BOOL_LIST 543 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 543 +#define _TO_BOOL_STR 544 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 544 -#define _UNPACK_SEQUENCE_LIST 545 -#define _UNPACK_SEQUENCE_TUPLE 546 -#define _UNPACK_SEQUENCE_TWO_TUPLE 547 +#define _UNPACK_SEQUENCE 545 +#define _UNPACK_SEQUENCE_LIST 546 +#define _UNPACK_SEQUENCE_TUPLE 547 +#define _UNPACK_SEQUENCE_TWO_TUPLE 548 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 547 +#define MAX_UOP_ID 548 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index ff7e800aa9bb1a..28ddd6e8f86482 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -336,6 +336,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_DEOPT] = 0, [_ERROR_POP_N] = HAS_ARG_FLAG, [_TIER2_RESUME_CHECK] = HAS_DEOPT_FLAG, + [_COLD_EXIT] = HAS_ESCAPES_FLAG, }; const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = { @@ -419,6 +420,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_CHECK_STACK_SPACE] = "_CHECK_STACK_SPACE", [_CHECK_STACK_SPACE_OPERAND] = "_CHECK_STACK_SPACE_OPERAND", [_CHECK_VALIDITY] = "_CHECK_VALIDITY", + [_COLD_EXIT] = "_COLD_EXIT", [_COMPARE_OP] = "_COMPARE_OP", [_COMPARE_OP_FLOAT] = "_COMPARE_OP_FLOAT", [_COMPARE_OP_INT] = "_COMPARE_OP_INT", @@ -1301,6 +1303,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _TIER2_RESUME_CHECK: return 0; + case _COLD_EXIT: + return 0; default: return -1; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index d9abc4c53d1f50..0244320e58817d 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2964,6 +2964,7 @@ dummy_func( else { this_instr[1].counter = initial_jump_backoff_counter(); assert(tstate->current_executor == NULL); + assert(executor != tstate->interp->cold_executor); GOTO_TIER_TWO(executor); } } @@ -3028,6 +3029,7 @@ dummy_func( } DISPATCH_GOTO(); } + assert(executor != tstate->interp->cold_executor); GOTO_TIER_TWO(executor); #else Py_FatalError("ENTER_EXECUTOR is not supported in this build"); @@ -5238,9 +5240,8 @@ dummy_func( tier2 op(_EXIT_TRACE, (exit_p/4 --)) { _PyExitData *exit = (_PyExitData *)exit_p; - PyCodeObject *code = _PyFrame_GetCode(frame); - _Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target; #if defined(Py_DEBUG) && !defined(_Py_JIT) + _Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target; OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); if (frame->lltrace >= 2) { printf("SIDE EXIT: [UOp "); @@ -5251,32 +5252,7 @@ dummy_func( _PyOpcode_OpName[target->op.code]); } #endif - if (exit->executor && !exit->executor->vm_data.valid) { - exit->temperature = initial_temperature_backoff_counter(); - Py_CLEAR(exit->executor); - } - if (exit->executor == NULL) { - _Py_BackoffCounter temperature = exit->temperature; - if (!backoff_counter_triggers(temperature)) { - exit->temperature = advance_backoff_counter(temperature); - GOTO_TIER_ONE(target); - } - _PyExecutorObject *executor; - if (target->op.code == ENTER_EXECUTOR) { - executor = code->co_executors->executors[target->op.arg]; - Py_INCREF(executor); - } - else { - int chain_depth = current_executor->vm_data.chain_depth + 1; - int optimized = _PyOptimizer_Optimize(frame, target, &executor, chain_depth); - if (optimized <= 0) { - exit->temperature = restart_backoff_counter(temperature); - GOTO_TIER_ONE(optimized < 0 ? NULL : target); - } - exit->temperature = initial_temperature_backoff_counter(); - } - exit->executor = executor; - } + tstate->jit_exit = exit; GOTO_TIER_TWO(exit->executor); } @@ -5375,7 +5351,7 @@ dummy_func( #ifndef _Py_JIT current_executor = (_PyExecutorObject*)executor; #endif - assert(((_PyExecutorObject *)executor)->vm_data.valid); + tstate->current_executor = (PyObject *)executor; } tier2 op(_MAKE_WARM, (--)) { @@ -5414,6 +5390,37 @@ dummy_func( assert(tstate->tracing || eval_breaker == FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version)); } + tier2 op(_COLD_EXIT, ( -- )) { + _PyExitData *exit = tstate->jit_exit; + assert(exit != NULL); + _Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target; + _Py_BackoffCounter temperature = exit->temperature; + if (!backoff_counter_triggers(temperature)) { + exit->temperature = advance_backoff_counter(temperature); + GOTO_TIER_ONE(target); + } + _PyExecutorObject *previous_executor = _PyExecutor_FromExit(exit); + assert(tstate->current_executor == (PyObject *)previous_executor); + _PyExecutorObject *executor; + if (target->op.code == ENTER_EXECUTOR) { + PyCodeObject *code = _PyFrame_GetCode(frame); + executor = code->co_executors->executors[target->op.arg]; + Py_INCREF(executor); + } + else { + int chain_depth = previous_executor->vm_data.chain_depth + 1; + int optimized = _PyOptimizer_Optimize(frame, target, &executor, chain_depth); + if (optimized <= 0) { + exit->temperature = restart_backoff_counter(temperature); + GOTO_TIER_ONE(optimized < 0 ? NULL : target); + } + exit->temperature = initial_temperature_backoff_counter(); + } + assert(tstate->jit_exit == exit); + exit->executor = executor; + GOTO_TIER_TWO(exit->executor); + } + label(pop_2_error) { stack_pointer -= 2; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/ceval.c b/Python/ceval.c index 50665defd382a2..a502cb6714c70e 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1152,7 +1152,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int uint64_t trace_uop_execution_counter = 0; #endif - assert(next_uop->opcode == _START_EXECUTOR); + assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT); tier2_dispatch: for (;;) { uopcode = next_uop->opcode; diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 187ec8fdd26584..9ebcefbcb85850 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -359,7 +359,6 @@ _PyFrame_SetStackPointer(frame, stack_pointer) do { \ OPT_STAT_INC(traces_executed); \ _PyExecutorObject *_executor = (EXECUTOR); \ - tstate->current_executor = (PyObject *)_executor; \ jit_func jitted = _executor->jit_code; \ /* Keep the shim frame alive via the executor: */ \ Py_INCREF(_executor); \ @@ -378,9 +377,8 @@ do { \ do { \ OPT_STAT_INC(traces_executed); \ _PyExecutorObject *_executor = (EXECUTOR); \ - tstate->current_executor = (PyObject *)_executor; \ next_uop = _executor->trace; \ - assert(next_uop->opcode == _START_EXECUTOR); \ + assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT); \ goto enter_tier_two; \ } while (0) #endif @@ -390,7 +388,6 @@ do { \ { \ tstate->current_executor = NULL; \ next_instr = (TARGET); \ - assert(tstate->current_executor == NULL); \ OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); \ _PyFrame_SetStackPointer(frame, stack_pointer); \ stack_pointer = _PyFrame_GetStackPointer(frame); \ diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index e152865e4ec9e8..8d46fe340685d4 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -7113,9 +7113,8 @@ case _EXIT_TRACE: { PyObject *exit_p = (PyObject *)CURRENT_OPERAND0(); _PyExitData *exit = (_PyExitData *)exit_p; - PyCodeObject *code = _PyFrame_GetCode(frame); - _Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target; #if defined(Py_DEBUG) && !defined(_Py_JIT) + _Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target; OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); if (frame->lltrace >= 2) { _PyFrame_SetStackPointer(frame, stack_pointer); @@ -7128,36 +7127,7 @@ stack_pointer = _PyFrame_GetStackPointer(frame); } #endif - if (exit->executor && !exit->executor->vm_data.valid) { - exit->temperature = initial_temperature_backoff_counter(); - _PyFrame_SetStackPointer(frame, stack_pointer); - Py_CLEAR(exit->executor); - stack_pointer = _PyFrame_GetStackPointer(frame); - } - if (exit->executor == NULL) { - _Py_BackoffCounter temperature = exit->temperature; - if (!backoff_counter_triggers(temperature)) { - exit->temperature = advance_backoff_counter(temperature); - GOTO_TIER_ONE(target); - } - _PyExecutorObject *executor; - if (target->op.code == ENTER_EXECUTOR) { - executor = code->co_executors->executors[target->op.arg]; - Py_INCREF(executor); - } - else { - int chain_depth = current_executor->vm_data.chain_depth + 1; - _PyFrame_SetStackPointer(frame, stack_pointer); - int optimized = _PyOptimizer_Optimize(frame, target, &executor, chain_depth); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (optimized <= 0) { - exit->temperature = restart_backoff_counter(temperature); - GOTO_TIER_ONE(optimized < 0 ? NULL : target); - } - exit->temperature = initial_temperature_backoff_counter(); - } - exit->executor = executor; - } + tstate->jit_exit = exit; GOTO_TIER_TWO(exit->executor); break; } @@ -7438,7 +7408,7 @@ #ifndef _Py_JIT current_executor = (_PyExecutorObject*)executor; #endif - assert(((_PyExecutorObject *)executor)->vm_data.valid); + tstate->current_executor = (PyObject *)executor; break; } @@ -7487,4 +7457,40 @@ break; } + case _COLD_EXIT: { + _PyExitData *exit = tstate->jit_exit; + assert(exit != NULL); + _Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target; + _Py_BackoffCounter temperature = exit->temperature; + if (!backoff_counter_triggers(temperature)) { + exit->temperature = advance_backoff_counter(temperature); + GOTO_TIER_ONE(target); + } + _PyFrame_SetStackPointer(frame, stack_pointer); + _PyExecutorObject *previous_executor = _PyExecutor_FromExit(exit); + stack_pointer = _PyFrame_GetStackPointer(frame); + assert(tstate->current_executor == (PyObject *)previous_executor); + _PyExecutorObject *executor; + if (target->op.code == ENTER_EXECUTOR) { + PyCodeObject *code = _PyFrame_GetCode(frame); + executor = code->co_executors->executors[target->op.arg]; + Py_INCREF(executor); + } + else { + int chain_depth = previous_executor->vm_data.chain_depth + 1; + _PyFrame_SetStackPointer(frame, stack_pointer); + int optimized = _PyOptimizer_Optimize(frame, target, &executor, chain_depth); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (optimized <= 0) { + exit->temperature = restart_backoff_counter(temperature); + GOTO_TIER_ONE(optimized < 0 ? NULL : target); + } + exit->temperature = initial_temperature_backoff_counter(); + } + assert(tstate->jit_exit == exit); + exit->executor = executor; + GOTO_TIER_TWO(exit->executor); + break; + } + #undef TIER_TWO diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index aa1eb373b7ba4b..7886353105ef16 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -5595,6 +5595,7 @@ } DISPATCH_GOTO(); } + assert(executor != tstate->interp->cold_executor); GOTO_TIER_TWO(executor); #else Py_FatalError("ENTER_EXECUTOR is not supported in this build"); @@ -7793,6 +7794,7 @@ this_instr[1].counter = initial_jump_backoff_counter(); stack_pointer = _PyFrame_GetStackPointer(frame); assert(tstate->current_executor == NULL); + assert(executor != tstate->interp->cold_executor); GOTO_TIER_TWO(executor); } } diff --git a/Python/jit.c b/Python/jit.c index e232cc1f7d9250..c0c2b35fcb3e91 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -553,7 +553,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz group->emit(code, data, executor, NULL, &state); code += group->code_size; data += group->data_size; - assert(trace[0].opcode == _START_EXECUTOR); + assert(trace[0].opcode == _START_EXECUTOR || trace[0].opcode == _COLD_EXIT); for (size_t i = 0; i < length; i++) { const _PyUOpInstruction *instruction = &trace[i]; group = &stencil_groups[instruction->opcode]; diff --git a/Python/optimizer.c b/Python/optimizer.c index 8d01d605ef4a2a..abd7bef85e96f5 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -584,6 +584,7 @@ translate_bytecode_to_trace( code->co_firstlineno, 2 * INSTR_IP(initial_instr, code)); ADD_TO_TRACE(_START_EXECUTOR, 0, (uintptr_t)instr, INSTR_IP(instr, code)); + ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0, 0); ADD_TO_TRACE(_MAKE_WARM, 0, 0, 0); uint32_t target = 0; @@ -1129,7 +1130,7 @@ sanity_check(_PyExecutorObject *executor) } bool ended = false; uint32_t i = 0; - CHECK(executor->trace[0].opcode == _START_EXECUTOR); + CHECK(executor->trace[0].opcode == _START_EXECUTOR || executor->trace[0].opcode == _COLD_EXIT); for (; i < executor->code_size; i++) { const _PyUOpInstruction *inst = &executor->trace[i]; uint16_t opcode = inst->opcode; @@ -1182,9 +1183,11 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil } /* Initialize exits */ + _PyExecutorObject *cold = _PyExecutor_GetColdExecutor(); for (int i = 0; i < exit_count; i++) { - executor->exits[i].executor = NULL; + executor->exits[i].index = i; executor->exits[i].temperature = initial_temperature_backoff_counter(); + executor->exits[i].executor = cold; } int next_exit = exit_count-1; _PyUOpInstruction *dest = (_PyUOpInstruction *)&executor->trace[length]; @@ -1462,6 +1465,36 @@ _Py_ExecutorInit(_PyExecutorObject *executor, const _PyBloomFilter *dependency_s link_executor(executor); } +_PyExecutorObject * +_PyExecutor_GetColdExecutor(void) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (interp->cold_executor != NULL) { + return interp->cold_executor; + } + _PyExecutorObject *cold = allocate_executor(0, 1); + if (cold == NULL) { + Py_FatalError("Cannot allocate core JIT code"); + } + ((_PyUOpInstruction *)cold->trace)->opcode = _COLD_EXIT; +#ifdef _Py_JIT + cold->jit_code = NULL; + cold->jit_side_entry = NULL; + cold->jit_size = 0; + // This is initialized to true so we can prevent the executor + // from being immediately detected as cold and invalidated. + cold->vm_data.warm = true; + if (_PyJIT_Compile(cold, cold->trace, 1)) { + Py_DECREF(cold); + Py_FatalError("Cannot allocate core JIT code"); + } +#endif + _Py_SetImmortal((PyObject *)cold); + interp->cold_executor = cold; + return cold; +} + + /* Detaches the executor from the code object (if any) that * holds a reference to it */ void @@ -1492,14 +1525,18 @@ executor_clear(PyObject *op) assert(executor->vm_data.valid == 1); unlink_executor(executor); executor->vm_data.valid = 0; + /* It is possible for an executor to form a reference * cycle with itself, so decref'ing a side exit could * free the executor unless we hold a strong reference to it */ + _PyExecutorObject *cold = _PyExecutor_GetColdExecutor(); Py_INCREF(executor); for (uint32_t i = 0; i < executor->exit_count; i++) { executor->exits[i].temperature = initial_unreachable_backoff_counter(); - Py_CLEAR(executor->exits[i].executor); + _PyExecutorObject *e = executor->exits[i].executor; + executor->exits[i].executor = cold; + Py_DECREF(e); } _Py_ExecutorDetach(executor); Py_DECREF(executor); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 41402200c1683e..4346f8a59c0314 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -3045,3 +3045,7 @@ break; } + case _COLD_EXIT: { + break; + } + diff --git a/Python/pystate.c b/Python/pystate.c index 0d4c26f92cec90..f60220cff58426 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1469,6 +1469,7 @@ init_threadstate(_PyThreadStateImpl *_tstate, tstate->datastack_limit = NULL; tstate->what_event = -1; tstate->current_executor = NULL; + tstate->jit_exit = NULL; tstate->dict_global_version = 0; _tstate->c_stack_soft_limit = UINTPTR_MAX; diff --git a/Tools/jit/template.c b/Tools/jit/template.c index 5ee26f93f1e266..5bb56163f9f4a6 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -51,7 +51,6 @@ do { \ OPT_STAT_INC(traces_executed); \ _PyExecutorObject *_executor = (EXECUTOR); \ - tstate->current_executor = (PyObject *)_executor; \ jit_func_preserve_none jitted = _executor->jit_side_entry; \ __attribute__((musttail)) return jitted(frame, stack_pointer, tstate); \ } while (0)
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: