Skip to content

Commit d77aaa7

Browse files
authored
GH-139109: Partial reworking of JIT data structures (GH-144105)
* Halve size of buffers by reusing combined trace + optimizer buffers for TOS caching * Add simple buffer struct for more maintainable handling of buffers * Decouple JIT structs from thread state struct * Ensure terminator is added to trace, when optimizer gives up
1 parent fb690c3 commit d77aaa7

File tree

10 files changed

+228
-215
lines changed

10 files changed

+228
-215
lines changed

Include/internal/pycore_optimizer.h

Lines changed: 93 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,102 @@ extern "C" {
1616
#include <stdbool.h>
1717

1818

19+
typedef struct _PyJitUopBuffer {
20+
_PyUOpInstruction *start;
21+
_PyUOpInstruction *next;
22+
_PyUOpInstruction *end;
23+
} _PyJitUopBuffer;
24+
25+
26+
typedef struct _JitOptContext {
27+
char done;
28+
char out_of_space;
29+
bool contradiction;
30+
// Has the builtins dict been watched?
31+
bool builtins_watched;
32+
// The current "executing" frame.
33+
_Py_UOpsAbstractFrame *frame;
34+
_Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH];
35+
int curr_frame_depth;
36+
37+
// Arena for the symbolic types.
38+
ty_arena t_arena;
39+
40+
JitOptRef *n_consumed;
41+
JitOptRef *limit;
42+
JitOptRef locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
43+
_PyJitUopBuffer out_buffer;
44+
} JitOptContext;
45+
46+
47+
static inline void
48+
uop_buffer_init(_PyJitUopBuffer *trace, _PyUOpInstruction *start, uint32_t size)
49+
{
50+
trace->next = trace->start = start;
51+
trace->end = start + size;
52+
}
53+
54+
static inline _PyUOpInstruction *
55+
uop_buffer_last(_PyJitUopBuffer *trace)
56+
{
57+
assert(trace->next > trace->start);
58+
return trace->next-1;
59+
}
60+
61+
static inline int
62+
uop_buffer_length(_PyJitUopBuffer *trace)
63+
{
64+
return (int)(trace->next - trace->start);
65+
}
66+
67+
static inline int
68+
uop_buffer_remaining_space(_PyJitUopBuffer *trace)
69+
{
70+
return (int)(trace->end - trace->next);
71+
}
72+
73+
typedef struct _PyJitTracerInitialState {
74+
int stack_depth;
75+
int chain_depth;
76+
struct _PyExitData *exit;
77+
PyCodeObject *code; // Strong
78+
PyFunctionObject *func; // Strong
79+
struct _PyExecutorObject *executor; // Strong
80+
_Py_CODEUNIT *start_instr;
81+
_Py_CODEUNIT *close_loop_instr;
82+
_Py_CODEUNIT *jump_backward_instr;
83+
} _PyJitTracerInitialState;
84+
85+
typedef struct _PyJitTracerPreviousState {
86+
bool dependencies_still_valid;
87+
int instr_oparg;
88+
int instr_stacklevel;
89+
_Py_CODEUNIT *instr;
90+
PyCodeObject *instr_code; // Strong
91+
struct _PyInterpreterFrame *instr_frame;
92+
_PyBloomFilter dependencies;
93+
} _PyJitTracerPreviousState;
94+
95+
typedef struct _PyJitTracerTranslatorState {
96+
int jump_backward_seen;
97+
} _PyJitTracerTranslatorState;
98+
99+
typedef struct _PyJitTracerState {
100+
bool is_tracing;
101+
_PyJitTracerInitialState initial_state;
102+
_PyJitTracerPreviousState prev_state;
103+
_PyJitTracerTranslatorState translator_state;
104+
JitOptContext opt_context;
105+
_PyJitUopBuffer code_buffer;
106+
_PyJitUopBuffer out_buffer;
107+
_PyUOpInstruction uop_array[2 * UOP_MAX_TRACE_LENGTH];
108+
} _PyJitTracerState;
109+
19110
typedef struct _PyExecutorLinkListNode {
20111
struct _PyExecutorObject *next;
21112
struct _PyExecutorObject *previous;
22113
} _PyExecutorLinkListNode;
23114

24-
25115
typedef struct {
26116
uint8_t opcode;
27117
uint8_t oparg;
@@ -86,8 +176,8 @@ PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp);
86176

87177
int _Py_uop_analyze_and_optimize(
88178
_PyThreadStateImpl *tstate,
89-
_PyUOpInstruction *trace, int trace_len, int curr_stackentries,
90-
_PyBloomFilter *dependencies);
179+
_PyUOpInstruction *input, int trace_len, int curr_stackentries,
180+
_PyUOpInstruction *output, _PyBloomFilter *dependencies);
91181

92182
extern PyTypeObject _PyUOpExecutor_Type;
93183

Include/internal/pycore_optimizer_types.h

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -126,27 +126,6 @@ typedef struct ty_arena {
126126
JitOptSymbol arena[TY_ARENA_SIZE];
127127
} ty_arena;
128128

129-
typedef struct _JitOptContext {
130-
char done;
131-
char out_of_space;
132-
bool contradiction;
133-
// Has the builtins dict been watched?
134-
bool builtins_watched;
135-
// The current "executing" frame.
136-
_Py_UOpsAbstractFrame *frame;
137-
_Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH];
138-
int curr_frame_depth;
139-
140-
// Arena for the symbolic types.
141-
ty_arena t_arena;
142-
143-
JitOptRef *n_consumed;
144-
JitOptRef *limit;
145-
JitOptRef locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
146-
_PyUOpInstruction *out_buffer;
147-
int out_len;
148-
} JitOptContext;
149-
150129

151130
#ifdef __cplusplus
152131
}

Include/internal/pycore_tstate.h

Lines changed: 1 addition & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ extern "C" {
1212
#include "pycore_freelist_state.h" // struct _Py_freelists
1313
#include "pycore_interpframe_structs.h" // _PyInterpreterFrame
1414
#include "pycore_mimalloc.h" // struct _mimalloc_thread_state
15-
#include "pycore_optimizer_types.h" // JitOptContext
1615
#include "pycore_qsbr.h" // struct qsbr
1716
#include "pycore_uop.h" // struct _PyUOpInstruction
1817
#include "pycore_structs.h"
@@ -24,46 +23,6 @@ struct _gc_thread_state {
2423
};
2524
#endif
2625

27-
#if _Py_TIER2
28-
typedef struct _PyJitTracerInitialState {
29-
int stack_depth;
30-
int chain_depth;
31-
struct _PyExitData *exit;
32-
PyCodeObject *code; // Strong
33-
PyFunctionObject *func; // Strong
34-
struct _PyExecutorObject *executor; // Strong
35-
_Py_CODEUNIT *start_instr;
36-
_Py_CODEUNIT *close_loop_instr;
37-
_Py_CODEUNIT *jump_backward_instr;
38-
} _PyJitTracerInitialState;
39-
40-
typedef struct _PyJitTracerPreviousState {
41-
bool dependencies_still_valid;
42-
int code_max_size;
43-
int code_curr_size;
44-
int instr_oparg;
45-
int instr_stacklevel;
46-
_Py_CODEUNIT *instr;
47-
PyCodeObject *instr_code; // Strong
48-
struct _PyInterpreterFrame *instr_frame;
49-
_PyBloomFilter dependencies;
50-
} _PyJitTracerPreviousState;
51-
52-
typedef struct _PyJitTracerTranslatorState {
53-
int jump_backward_seen;
54-
} _PyJitTracerTranslatorState;
55-
56-
typedef struct _PyJitTracerState {
57-
bool is_tracing;
58-
_PyJitTracerInitialState initial_state;
59-
_PyJitTracerPreviousState prev_state;
60-
_PyJitTracerTranslatorState translator_state;
61-
JitOptContext opt_context;
62-
_PyUOpInstruction code_buffer[UOP_MAX_TRACE_LENGTH];
63-
_PyUOpInstruction out_buffer[UOP_MAX_TRACE_LENGTH];
64-
} _PyJitTracerState;
65-
66-
#endif
6726

6827
// Every PyThreadState is actually allocated as a _PyThreadStateImpl. The
6928
// PyThreadState fields are exposed as part of the C API, although most fields
@@ -141,7 +100,7 @@ typedef struct _PyThreadStateImpl {
141100
Py_ssize_t reftotal; // this thread's total refcount operations
142101
#endif
143102
#if _Py_TIER2
144-
_PyJitTracerState *jit_tracer_state;
103+
struct _PyJitTracerState *jit_tracer_state;
145104
#endif
146105
} _PyThreadStateImpl;
147106

Include/internal/pycore_uop.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,10 @@ typedef struct _PyUOpInstruction{
3838
// This is the length of the trace we translate initially.
3939
#ifdef Py_DEBUG
4040
// With asserts, the stencils are a lot larger
41-
#define UOP_MAX_TRACE_LENGTH 2000
41+
#define UOP_MAX_TRACE_LENGTH 1000
4242
#else
43-
#define UOP_MAX_TRACE_LENGTH 5000
43+
#define UOP_MAX_TRACE_LENGTH 2500
4444
#endif
45-
#define UOP_BUFFER_SIZE (UOP_MAX_TRACE_LENGTH * sizeof(_PyUOpInstruction))
4645

4746
/* Bloom filter with m = 256
4847
* https://en.wikipedia.org/wiki/Bloom_filter */

Python/ceval_macros.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,7 @@ do { \
433433
JUMP_TO_LABEL(error); \
434434
} \
435435
if (keep_tracing_bit) { \
436-
assert(((_PyThreadStateImpl *)tstate)->jit_tracer_state->prev_state.code_curr_size == 2); \
436+
assert(uop_buffer_length(&((_PyThreadStateImpl *)tstate)->jit_tracer_state->code_buffer)); \
437437
ENTER_TRACING(); \
438438
DISPATCH_NON_TRACING(); \
439439
} \

0 commit comments

Comments
 (0)