Skip to content

Commit 4027203

Browse files
chleroympe
authored andcommitted
powerpc/bpf: Reallocate BPF registers to volatile registers when possible on PPC32
When the BPF routine doesn't call any function, the non volatile registers can be reallocated to volatile registers in order to avoid having to save them/restore on the stack. Before this patch, the test torvalds#359 ADD default X is: 0: 7c 64 1b 78 mr r4,r3 4: 38 60 00 00 li r3,0 8: 94 21 ff b0 stwu r1,-80(r1) c: 60 00 00 00 nop 10: 92 e1 00 2c stw r23,44(r1) 14: 93 01 00 30 stw r24,48(r1) 18: 93 21 00 34 stw r25,52(r1) 1c: 93 41 00 38 stw r26,56(r1) 20: 39 80 00 00 li r12,0 24: 39 60 00 00 li r11,0 28: 3b 40 00 00 li r26,0 2c: 3b 20 00 00 li r25,0 30: 7c 98 23 78 mr r24,r4 34: 7c 77 1b 78 mr r23,r3 38: 39 80 00 42 li r12,66 3c: 39 60 00 00 li r11,0 40: 7d 8c d2 14 add r12,r12,r26 44: 39 60 00 00 li r11,0 48: 7d 83 63 78 mr r3,r12 4c: 82 e1 00 2c lwz r23,44(r1) 50: 83 01 00 30 lwz r24,48(r1) 54: 83 21 00 34 lwz r25,52(r1) 58: 83 41 00 38 lwz r26,56(r1) 5c: 38 21 00 50 addi r1,r1,80 60: 4e 80 00 20 blr After this patch, the same test has become: 0: 7c 64 1b 78 mr r4,r3 4: 38 60 00 00 li r3,0 8: 94 21 ff b0 stwu r1,-80(r1) c: 60 00 00 00 nop 10: 39 80 00 00 li r12,0 14: 39 60 00 00 li r11,0 18: 39 00 00 00 li r8,0 1c: 38 e0 00 00 li r7,0 20: 7c 86 23 78 mr r6,r4 24: 7c 65 1b 78 mr r5,r3 28: 39 80 00 42 li r12,66 2c: 39 60 00 00 li r11,0 30: 7d 8c 42 14 add r12,r12,r8 34: 39 60 00 00 li r11,0 38: 7d 83 63 78 mr r3,r12 3c: 38 21 00 50 addi r1,r1,80 40: 4e 80 00 20 blr Signed-off-by: Christophe Leroy <[email protected]> Signed-off-by: Michael Ellerman <[email protected]> Link: https://lore.kernel.org/r/b94562d7d2bb21aec89de0c40bb3cd91054b65a2.1616430991.git.christophe.leroy@csgroup.eu
1 parent 51c66ad commit 4027203

File tree

5 files changed

+51
-3
lines changed

5 files changed

+51
-3
lines changed

arch/powerpc/net/bpf_jit.h

+16
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,15 @@ static inline bool is_nearbranch(int offset)
116116
#define SEEN_STACK 0x40000000 /* uses BPF stack */
117117
#define SEEN_TAILCALL 0x80000000 /* uses tail calls */
118118

119+
#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */
120+
#define SEEN_NVREG_MASK 0x0003ffff /* Non volatile registers r14-r31 */
121+
122+
#ifdef CONFIG_PPC64
123+
extern const int b2p[MAX_BPF_JIT_REG + 2];
124+
#else
125+
extern const int b2p[MAX_BPF_JIT_REG + 1];
126+
#endif
127+
119128
struct codegen_context {
120129
/*
121130
* This is used to track register usage as well
@@ -129,6 +138,7 @@ struct codegen_context {
129138
unsigned int seen;
130139
unsigned int idx;
131140
unsigned int stack_size;
141+
int b2p[ARRAY_SIZE(b2p)];
132142
};
133143

134144
static inline void bpf_flush_icache(void *start, void *end)
@@ -147,11 +157,17 @@ static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
147157
ctx->seen |= 1 << (31 - i);
148158
}
149159

160+
static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
161+
{
162+
ctx->seen &= ~(1 << (31 - i));
163+
}
164+
150165
void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
151166
int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
152167
u32 *addrs, bool extra_pass);
153168
void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
154169
void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
170+
void bpf_jit_realloc_regs(struct codegen_context *ctx);
155171

156172
#endif
157173

arch/powerpc/net/bpf_jit64.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
4040

4141
/* BPF to ppc register mappings */
42-
static const int b2p[] = {
42+
const int b2p[MAX_BPF_JIT_REG + 2] = {
4343
/* function return value */
4444
[BPF_REG_0] = 8,
4545
/* function arguments */

arch/powerpc/net/bpf_jit_comp.c

+2
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
143143
}
144144

145145
memset(&cgctx, 0, sizeof(struct codegen_context));
146+
memcpy(cgctx.b2p, b2p, sizeof(cgctx.b2p));
146147

147148
/* Make sure that the stack is quadword aligned. */
148149
cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
@@ -167,6 +168,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
167168
}
168169
}
169170

171+
bpf_jit_realloc_regs(&cgctx);
170172
/*
171173
* Pretend to build prologue, given the features we've seen. This will
172174
* update ctgtx.idx as it pretends to output instructions, then we can

arch/powerpc/net/bpf_jit_comp32.c

+28-2
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
#define TMP_REG (MAX_BPF_JIT_REG + 0)
3838

3939
/* BPF to ppc register mappings */
40-
static const int b2p[] = {
40+
const int b2p[MAX_BPF_JIT_REG + 1] = {
4141
/* function return value */
4242
[BPF_REG_0] = 12,
4343
/* function arguments */
@@ -60,7 +60,7 @@ static const int b2p[] = {
6060

6161
static int bpf_to_ppc(struct codegen_context *ctx, int reg)
6262
{
63-
return b2p[reg];
63+
return ctx->b2p[reg];
6464
}
6565

6666
/* PPC NVR range -- update this if we ever use NVRs below r17 */
@@ -77,6 +77,32 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
7777
return BPF_PPC_STACKFRAME(ctx) - 4;
7878
}
7979

80+
void bpf_jit_realloc_regs(struct codegen_context *ctx)
81+
{
82+
if (ctx->seen & SEEN_FUNC)
83+
return;
84+
85+
while (ctx->seen & SEEN_NVREG_MASK &&
86+
(ctx->seen & SEEN_VREG_MASK) != SEEN_VREG_MASK) {
87+
int old = 32 - fls(ctx->seen & (SEEN_NVREG_MASK & 0xaaaaaaab));
88+
int new = 32 - fls(~ctx->seen & (SEEN_VREG_MASK & 0xaaaaaaaa));
89+
int i;
90+
91+
for (i = BPF_REG_0; i <= TMP_REG; i++) {
92+
if (ctx->b2p[i] != old)
93+
continue;
94+
ctx->b2p[i] = new;
95+
bpf_set_seen_register(ctx, new);
96+
bpf_clear_seen_register(ctx, old);
97+
if (i != TMP_REG) {
98+
bpf_set_seen_register(ctx, new - 1);
99+
bpf_clear_seen_register(ctx, old - 1);
100+
}
101+
break;
102+
}
103+
}
104+
}
105+
80106
void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
81107
{
82108
int i;

arch/powerpc/net/bpf_jit_comp64.c

+4
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
6464
BUG();
6565
}
6666

67+
void bpf_jit_realloc_regs(struct codegen_context *ctx)
68+
{
69+
}
70+
6771
void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
6872
{
6973
int i;

0 commit comments

Comments
 (0)