From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 3 Apr 2023 14:52:59 +0300 Subject: [PATCH] intel/fs: fix scheduling of HALT instructions With the following test : dEQP-VK.spirv_assembly.instruction.terminate_invocation.terminate.no_out_of_bounds_load There is a : shader_start: ... <- no control flow g0 = some_alu g1 = fbl g2 = broadcast g3, g1 g4 = get_buffer_size g2 ... <- no control flow halt <- on some lanes g5 = send , g4 eliminate_find_live_channel will remove the fbl/broadcast because it assumes lane0 is active at get_buffer_size : shader_start: ... <- no control flow g0 = some_alu g4 = get_buffer_size g0 ... <- no control flow halt <- on some lanes g5 = send , g4 But then the instruction scheduler will move the get_buffer_size after the halt : shader_start: ... <- no control flow halt <- on some lanes g0 = some_alu g4 = get_buffer_size g0 g5 = send , g4 get_buffer_size pulls the surface index from lane0 in g0 which could have been turned off by the halt and we end up accessing an invalid surface handle. Signed-off-by: Lionel Landwerlin Cc: mesa-stable --- .../compiler/brw_schedule_instructions.cpp | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index 3286e3f83b96..43f63784b2e8 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -651,6 +651,7 @@ public: ralloc_free(this->mem_ctx); } void add_barrier_deps(schedule_node *n); + void add_cross_lane_deps(schedule_node *n); void add_dep(schedule_node *before, schedule_node *after, int latency); void add_dep(schedule_node *before, schedule_node *after); @@ -1098,6 +1099,28 @@ is_scheduling_barrier(const backend_instruction *inst) inst->has_side_effects(); } +static bool +has_cross_lane_access(const fs_inst *inst) +{ + if (inst->opcode == SHADER_OPCODE_BROADCAST || + inst->opcode == SHADER_OPCODE_READ_SR_REG || + inst->opcode == SHADER_OPCODE_CLUSTER_BROADCAST || + inst->opcode == SHADER_OPCODE_SHUFFLE || + inst->opcode == FS_OPCODE_LOAD_LIVE_CHANNELS || + inst->opcode == SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL || + inst->opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL) + return true; + + for (unsigned s = 0; s < inst->sources; s++) { + if (inst->src[s].file == VGRF) { + if (inst->src[s].stride == 0) + return true; + } + } + + return false; +} + /** * Sometimes we really want this node to execute after everything that * was before it and before everything that followed it. This adds @@ -1128,6 +1151,25 @@ instruction_scheduler::add_barrier_deps(schedule_node *n) } } +/** + * Because some instructions like HALT can disable lanes, scheduling prior to + * a cross lane access should not be allowed, otherwise we could end up with + * later instructions accessing uninitialized data. + */ +void +instruction_scheduler::add_cross_lane_deps(schedule_node *n) +{ + schedule_node *prev = (schedule_node *)n->prev; + + if (prev) { + while (!prev->is_head_sentinel()) { + if (has_cross_lane_access((fs_inst *)prev->inst)) + add_dep(prev, n, 0); + prev = (schedule_node *)prev->prev; + } + } +} + /* instruction scheduling needs to be aware of when an MRF write * actually writes 2 MRFs. */ @@ -1165,6 +1207,10 @@ fs_instruction_scheduler::calculate_deps() if (is_scheduling_barrier(inst)) add_barrier_deps(n); + if (inst->opcode == BRW_OPCODE_HALT || + inst->opcode == SHADER_OPCODE_HALT_TARGET) + add_cross_lane_deps(n); + /* read-after-write deps. */ for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF) {