Skip to content

Commit 8b228c0

Browse files
committed
gh-136459: Use frame pointers in the x86_64 perf trampolines
1 parent c176543 commit 8b228c0

File tree

2 files changed

+82
-16
lines changed

2 files changed

+82
-16
lines changed

Python/asm_trampoline.S

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@ _Py_trampoline_func_start:
1212
#if defined(__CET__) && (__CET__ & 1)
1313
endbr64
1414
#endif
15-
sub $8, %rsp
16-
call *%rcx
17-
add $8, %rsp
15+
push %rbp
16+
mov %rsp, %rbp
17+
call *%rcx
18+
pop %rbp
1819
ret
1920
#endif // __x86_64__
2021
#if defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__)

Python/perf_jit_trampoline.c

Lines changed: 78 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -401,10 +401,12 @@ enum {
401401
DWRF_CFA_nop = 0x0, // No operation
402402
DWRF_CFA_offset_extended = 0x5, // Extended offset instruction
403403
DWRF_CFA_def_cfa = 0xc, // Define CFA rule
404+
DWRF_CFA_def_cfa_register = 0xd, // Define CFA register
404405
DWRF_CFA_def_cfa_offset = 0xe, // Define CFA offset
405406
DWRF_CFA_offset_extended_sf = 0x11, // Extended signed offset
406407
DWRF_CFA_advance_loc = 0x40, // Advance location counter
407-
DWRF_CFA_offset = 0x80 // Simple offset instruction
408+
DWRF_CFA_offset = 0x80, // Simple offset instruction
409+
DWRF_CFA_restore = 0xc0 // Restore register
408410
};
409411

410412
/* DWARF Exception Handling pointer encodings */
@@ -519,6 +521,7 @@ typedef struct ELFObjectContext {
519521
uint8_t* p; // Current write position in buffer
520522
uint8_t* startp; // Start of buffer (for offset calculations)
521523
uint8_t* eh_frame_p; // Start of EH frame data (for relative offsets)
524+
uint8_t* fde_p; // Start of FDE data (for PC-relative calculations)
522525
uint32_t code_size; // Size of the code being described
523526
} ELFObjectContext;
524527

@@ -784,7 +787,7 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
784787
*
785788
* DWRF_SECTION(FDE,
786789
* DWRF_U32((uint32_t)(p - framep)); // Offset to CIE (relative from here)
787-
* DWRF_U32(-0x30); // Initial PC-relative location of the code
790+
* DWRF_U32(pc_relative_offset); // PC-relative location of the code (calculated dynamically)
788791
* DWRF_U32(ctx->code_size); // Code range covered by this FDE
789792
* DWRF_U8(0); // Augmentation data length (none)
790793
*
@@ -853,11 +856,15 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
853856
*
854857
* The FDE describes unwinding information specific to this function.
855858
* It references the CIE and provides function-specific CFI instructions.
859+
*
860+
* The PC-relative offset is calculated after the entire EH frame is built
861+
* to ensure accurate positioning relative to the synthesized DSO layout.
856862
*/
857863
DWRF_SECTION(FDE,
858864
DWRF_U32((uint32_t)(p - framep)); // Offset to CIE (backwards reference)
859-
DWRF_U32(-0x30); // Machine code offset relative to .text
860-
DWRF_U32(ctx->code_size); // Address range covered by this FDE (code lenght)
865+
ctx->fde_p = p; // Remember where PC offset field is located for later calculation
866+
DWRF_U32(0); // Placeholder for PC-relative offset (calculated at end of elf_init_ehframe)
867+
DWRF_U32(ctx->code_size); // Address range covered by this FDE (code length)
861868
DWRF_U8(0); // Augmentation data length (none)
862869

863870
/*
@@ -868,17 +875,22 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
868875
* conventions and register usage patterns.
869876
*/
870877
#ifdef __x86_64__
871-
/* x86_64 calling convention unwinding rules */
878+
/* x86_64 calling convention unwinding rules with frame pointer */
872879
# if defined(__CET__) && (__CET__ & 1)
873-
DWRF_U8(DWRF_CFA_advance_loc | 8); // Advance location by 8 bytes when CET protection is enabled
874-
# else
875-
DWRF_U8(DWRF_CFA_advance_loc | 4); // Advance location by 4 bytes
880+
DWRF_U8(DWRF_CFA_advance_loc | 4); // Advance past endbr64 (4 bytes)
876881
# endif
877-
DWRF_U8(DWRF_CFA_def_cfa_offset); // Redefine CFA offset
878-
DWRF_UV(16); // New offset: SP + 16
879-
DWRF_U8(DWRF_CFA_advance_loc | 6); // Advance location by 6 bytes
880-
DWRF_U8(DWRF_CFA_def_cfa_offset); // Redefine CFA offset
881-
DWRF_UV(8); // New offset: SP + 8
882+
DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance past push %rbp (1 byte)
883+
DWRF_U8(DWRF_CFA_def_cfa_offset); // def_cfa_offset 16
884+
DWRF_UV(16);
885+
DWRF_U8(DWRF_CFA_offset | DWRF_REG_BP); // offset r6 at cfa-16
886+
DWRF_UV(2);
887+
DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance past mov %rsp,%rbp (3 bytes)
888+
DWRF_U8(DWRF_CFA_def_cfa_register); // def_cfa_register r6
889+
DWRF_UV(DWRF_REG_BP);
890+
DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance past call *%rcx (2 bytes) + pop %rbp (1 byte) = 3
891+
DWRF_U8(DWRF_CFA_def_cfa); // def_cfa r7 ofs 8
892+
DWRF_UV(DWRF_REG_SP);
893+
DWRF_UV(8);
882894
#elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__)
883895
/* AArch64 calling convention unwinding rules */
884896
DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance location by 1 instruction (stp x29, x30)
@@ -902,6 +914,58 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
902914
)
903915

904916
ctx->p = p; // Update context pointer to end of generated data
917+
918+
/* Calculate and update the PC-relative offset in the FDE
919+
*
920+
* When perf processes the jitdump, it creates a synthesized DSO with this layout:
921+
*
922+
* Synthesized DSO Memory Layout:
923+
* ┌─────────────────────────────────────────────────────────────┐ < code_start
924+
* │ Code Section │
925+
* │ (round_up(code_size, 8) bytes) │
926+
* ├─────────────────────────────────────────────────────────────┤ < start of EH frame data
927+
* │ EH Frame Data │
928+
* │ ┌─────────────────────────────────────────────────────┐ │
929+
* │ │ CIE data │ │
930+
* │ └─────────────────────────────────────────────────────┘ │
931+
* │ ┌─────────────────────────────────────────────────────┐ │
932+
* │ │ FDE Header: │ │
933+
* │ │ - CIE offset (4 bytes) │ │
934+
* │ │ - PC offset (4 bytes) <─ fde_offset_in_frame ─────┼────┼─> points to code_start
935+
* │ │ - address range (4 bytes) │ │ (this specific field)
936+
* │ │ CFI Instructions... │ │
937+
* │ └─────────────────────────────────────────────────────┘ │
938+
* ├─────────────────────────────────────────────────────────────┤ < reference_point
939+
* │ EhFrameHeader │
940+
* │ (navigation metadata) │
941+
* └─────────────────────────────────────────────────────────────┘
942+
*
943+
* The PC offset field in the FDE must contain the distance from itself to code_start:
944+
*
945+
* distance = code_start - fde_pc_field
946+
*
947+
* Where:
948+
* fde_pc_field_location = reference_point - eh_frame_size + fde_offset_in_frame
949+
* code_start_location = reference_point - eh_frame_size - round_up(code_size, 8)
950+
*
951+
* Therefore:
952+
* distance = code_start_location - fde_pc_field_location
953+
* = (ref - eh_frame_size - rounded_code_size) - (ref - eh_frame_size + fde_offset_in_frame)
954+
* = -rounded_code_size - fde_offset_in_frame
955+
* = -(round_up(code_size, 8) + fde_offset_in_frame)
956+
*
957+
* Note: fde_offset_in_frame is the offset from EH frame start to the PC offset field,
958+
*
959+
*/
960+
if (ctx->fde_p != NULL) {
961+
int32_t fde_offset_in_frame = (ctx->fde_p - ctx->startp);
962+
int32_t rounded_code_size = round_up(ctx->code_size, 8);
963+
int32_t pc_relative_offset = -(rounded_code_size + fde_offset_in_frame);
964+
965+
966+
// Update the PC-relative offset in the FDE
967+
*(int32_t*)ctx->fde_p = pc_relative_offset;
968+
}
905969
}
906970

907971
// =============================================================================
@@ -1092,6 +1156,7 @@ static void perf_map_jit_write_entry(void *state, const void *code_addr,
10921156
char buffer[1024]; // Buffer for DWARF data (1KB should be sufficient)
10931157
ctx.code_size = code_size;
10941158
ctx.startp = ctx.p = (uint8_t*)buffer;
1159+
ctx.fde_p = NULL; // Initialize to NULL, will be set when FDE is written
10951160

10961161
/* Generate EH frame (Exception Handling frame) data */
10971162
elf_init_ehframe(&ctx);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy