mirror of
https://github.com/torvalds/linux
synced 2024-10-15 15:59:15 +00:00
bpf: Track aligned st store as imprecise spilled registers
With patch set [1], precision backtracing supports register spill/fill to/from the stack. The patch [2] allows initial imprecise register spill with content 0. This is a common case for cpuv3 and lower for initializing the stack variables with pattern r1 = 0 *(u64 *)(r10 - 8) = r1 and the [2] has demonstrated good verification improvement. For cpuv4, the initialization could be *(u64 *)(r10 - 8) = 0 The current verifier marks the r10-8 contents with STACK_ZERO. Similar to [2], let us permit the above insn to behave like imprecise register spill which can reduce number of verified states. The change is in function check_stack_write_fixed_off(). Before this patch, spilled zero will be marked as STACK_ZERO which can provide precise values. In check_stack_write_var_off(), STACK_ZERO will be maintained if writing a const zero so later it can provide precise values if needed. The above handling of '*(u64 *)(r10 - 8) = 0' as a spill will have issues in check_stack_write_var_off() as the spill will be converted to STACK_MISC and the precise value 0 is lost. To fix this issue, if the spill slots with const zero and the BPF_ST write also with const zero, the spill slots are preserved, which can later provide precise values if needed. Without the change in check_stack_write_var_off(), the test_verifier subtest 'BPF_ST_MEM stack imm zero, variable offset' will fail. I checked cpuv3 and cpuv4 with and without this patch with veristat. There is no state change for cpuv3 since '*(u64 *)(r10 - 8) = 0' is only generated with cpuv4. For cpuv4: $ ../veristat -C old.cpuv4.csv new.cpuv4.csv -e file,prog,insns,states -f 'insns_diff!=0' File Program Insns (A) Insns (B) Insns (DIFF) States (A) States (B) States (DIFF) ------------------------------------------ ------------------- --------- --------- --------------- ---------- ---------- ------------- local_storage_bench.bpf.linked3.o get_local 228 168 -60 (-26.32%) 17 14 -3 (-17.65%) pyperf600_bpf_loop.bpf.linked3.o on_event 6066 4889 -1177 (-19.40%) 403 321 -82 (-20.35%) test_cls_redirect.bpf.linked3.o cls_redirect 35483 35387 -96 (-0.27%) 2179 2177 -2 (-0.09%) test_l4lb_noinline.bpf.linked3.o balancer_ingress 4494 4522 +28 (+0.62%) 217 219 +2 (+0.92%) test_l4lb_noinline_dynptr.bpf.linked3.o balancer_ingress 1432 1455 +23 (+1.61%) 92 94 +2 (+2.17%) test_xdp_noinline.bpf.linked3.o balancer_ingress_v6 3462 3458 -4 (-0.12%) 216 216 +0 (+0.00%) verifier_iterating_callbacks.bpf.linked3.o widening 52 41 -11 (-21.15%) 4 3 -1 (-25.00%) xdp_synproxy_kern.bpf.linked3.o syncookie_tc 12412 11719 -693 (-5.58%) 345 330 -15 (-4.35%) xdp_synproxy_kern.bpf.linked3.o syncookie_xdp 12478 11794 -684 (-5.48%) 346 331 -15 (-4.34%) test_l4lb_noinline and test_l4lb_noinline_dynptr has minor regression, but pyperf600_bpf_loop and local_storage_bench gets pretty good improvement. [1] https://lore.kernel.org/all/20231205184248.1502704-1-andrii@kernel.org/ [2] https://lore.kernel.org/all/20231205184248.1502704-9-andrii@kernel.org/ Cc: Kuniyuki Iwashima <kuniyu@amazon.com> Cc: Martin KaFai Lau <kafai@fb.com> Signed-off-by: Yonghong Song <yonghong.song@linux.dev> Tested-by: Kuniyuki Iwashima <kuniyu@amazon.com> Acked-by: Andrii Nakryiko <andrii@kernel.org> Link: https://lore.kernel.org/r/20240110051348.2737007-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
parent
3893f0b6a0
commit
9a4c57f52b
|
@ -4516,7 +4516,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
|
|||
if (!reg_value_fits)
|
||||
state->stack[spi].spilled_ptr.id = 0;
|
||||
} else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
|
||||
insn->imm != 0 && env->bpf_capable) {
|
||||
env->bpf_capable) {
|
||||
struct bpf_reg_state fake_reg = {};
|
||||
|
||||
__mark_reg_known(&fake_reg, insn->imm);
|
||||
|
@ -4663,7 +4663,20 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Erase all spilled pointers. */
|
||||
/* If writing_zero and the spi slot contains a spill of value 0,
|
||||
* maintain the spill type.
|
||||
*/
|
||||
if (writing_zero && *stype == STACK_SPILL &&
|
||||
is_spilled_scalar_reg(&state->stack[spi])) {
|
||||
struct bpf_reg_state *spill_reg = &state->stack[spi].spilled_ptr;
|
||||
|
||||
if (tnum_is_const(spill_reg->var_off) && spill_reg->var_off.value == 0) {
|
||||
zero_used = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* Erase all other spilled pointers. */
|
||||
state->stack[spi].spilled_ptr.type = NOT_INIT;
|
||||
|
||||
/* Update the slot type. */
|
||||
|
|
|
@ -493,14 +493,14 @@ char single_byte_buf[1] SEC(".data.single_byte_buf");
|
|||
SEC("raw_tp")
|
||||
__log_level(2)
|
||||
__success
|
||||
/* make sure fp-8 is all STACK_ZERO */
|
||||
__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8_w=00000000")
|
||||
/* fp-8 is spilled IMPRECISE value zero (represented by a zero value fake reg) */
|
||||
__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8_w=0")
|
||||
/* but fp-16 is spilled IMPRECISE zero const reg */
|
||||
__msg("4: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=0 R10=fp0 fp-16_w=0")
|
||||
/* validate that assigning R2 from STACK_ZERO doesn't mark register
|
||||
/* validate that assigning R2 from STACK_SPILL with zero value doesn't mark register
|
||||
* precise immediately; if necessary, it will be marked precise later
|
||||
*/
|
||||
__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8_w=00000000")
|
||||
__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8_w=0")
|
||||
/* similarly, when R2 is assigned from spilled register, it is initially
|
||||
* imprecise, but will be marked precise later once it is used in precise context
|
||||
*/
|
||||
|
@ -518,14 +518,14 @@ __msg("mark_precise: frame0: regs=r0 stack= before 3: (b7) r0 = 0")
|
|||
__naked void partial_stack_load_preserves_zeros(void)
|
||||
{
|
||||
asm volatile (
|
||||
/* fp-8 is all STACK_ZERO */
|
||||
/* fp-8 is value zero (represented by a zero value fake reg) */
|
||||
".8byte %[fp8_st_zero];" /* LLVM-18+: *(u64 *)(r10 -8) = 0; */
|
||||
|
||||
/* fp-16 is const zero register */
|
||||
"r0 = 0;"
|
||||
"*(u64 *)(r10 -16) = r0;"
|
||||
|
||||
/* load single U8 from non-aligned STACK_ZERO slot */
|
||||
/* load single U8 from non-aligned spilled value zero slot */
|
||||
"r1 = %[single_byte_buf];"
|
||||
"r2 = *(u8 *)(r10 -1);"
|
||||
"r1 += r2;"
|
||||
|
@ -537,7 +537,7 @@ __naked void partial_stack_load_preserves_zeros(void)
|
|||
"r1 += r2;"
|
||||
"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
|
||||
|
||||
/* load single U16 from non-aligned STACK_ZERO slot */
|
||||
/* load single U16 from non-aligned spilled value zero slot */
|
||||
"r1 = %[single_byte_buf];"
|
||||
"r2 = *(u16 *)(r10 -2);"
|
||||
"r1 += r2;"
|
||||
|
@ -549,7 +549,7 @@ __naked void partial_stack_load_preserves_zeros(void)
|
|||
"r1 += r2;"
|
||||
"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
|
||||
|
||||
/* load single U32 from non-aligned STACK_ZERO slot */
|
||||
/* load single U32 from non-aligned spilled value zero slot */
|
||||
"r1 = %[single_byte_buf];"
|
||||
"r2 = *(u32 *)(r10 -4);"
|
||||
"r1 += r2;"
|
||||
|
|
Loading…
Reference in a new issue