Skip to content

Commit

Permalink
wip: simple peephole store-load forwarding pass
Browse files Browse the repository at this point in the history
  • Loading branch information
Mike-Leo-Smith committed Jan 10, 2025
1 parent 10719a9 commit dd93d02
Show file tree
Hide file tree
Showing 8 changed files with 189 additions and 11 deletions.
3 changes: 2 additions & 1 deletion include/luisa/luisa-compute.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,8 @@
#include <luisa/xir/passes/dce.h>
#include <luisa/xir/passes/dom_tree.h>
#include <luisa/xir/passes/outline.h>
#include <luisa/xir/passes/ref_arg_usage.h>
#include <luisa/xir/passes/peephole_store_forward.h>
#include <luisa/xir/passes/pointer_usage.h>
#include <luisa/xir/passes/sink_alloca.h>
#include <luisa/xir/passes/trace_gep.h>
#include <luisa/xir/pool.h>
Expand Down
30 changes: 30 additions & 0 deletions include/luisa/xir/passes/peephole_store_forward.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#pragma once

#include <luisa/core/dll_export.h>
#include <luisa/core/stl/unordered_map.h>

namespace luisa::compute::xir {

class Value;
class LoadInst;
class StoreInst;
class BasicBlock;
class Function;
class Module;

// This pass is used to forward stores to loads for scalar variables
// within the same basic block. It is a simple peephole optimization
// that can be used to reduce the number of memory operations.
// Note: this pass does not remove the original store instructions.
// It only forwards the values to the loads. To remove the original
// store instructions, a DCE pass should be used after this pass.

struct PeepholeStoreForwardInfo {
luisa::unordered_map<LoadInst *, StoreInst *> forwarded_instructions;
};

[[nodiscard]] LC_XIR_API PeepholeStoreForwardInfo peephole_store_forward_pass_run_on_basic_block(BasicBlock *block) noexcept;
[[nodiscard]] LC_XIR_API PeepholeStoreForwardInfo peephole_store_forward_pass_run_on_function(Function *function) noexcept;
[[nodiscard]] LC_XIR_API PeepholeStoreForwardInfo peephole_store_forward_pass_run_on_module(Module *module) noexcept;

}// namespace luisa::compute::xir
33 changes: 33 additions & 0 deletions include/luisa/xir/passes/pointer_usage.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#pragma once

#include <luisa/core/stl/unordered_map.h>
#include <luisa/xir/passes/aggregate_field_bitmask.h>

namespace luisa::compute::xir {

class Value;
class BasicBlock;

// This pass analyzes the usage of pointers in a function,
// including reference arguments, alloca's, and GEP's.
// It records whether each scalar field of each pointer is
// - Killed: the field is definitely written to;
// - Touched: the field is possibly written to; or
// - Live: the field might be read from in the future.

struct PointerUsage {
AggregateFieldBitmask kill;
AggregateFieldBitmask touch;
AggregateFieldBitmask live;
};

using PointerUsageMap = luisa::unordered_map<Value *, luisa::unique_ptr<PointerUsage>>;

struct BasicBlockPointerUsage {
PointerUsageMap in;
PointerUsageMap out;
};



}
7 changes: 0 additions & 7 deletions include/luisa/xir/passes/ref_arg_usage.h

This file was deleted.

8 changes: 7 additions & 1 deletion src/backends/fallback/fallback_shader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <luisa/xir/instructions/print.h>

#include <luisa/xir/passes/dce.h>
#include <luisa/xir/passes/peephole_store_forward.h>

#include "../common/shader_print_formatter.h"

Expand Down Expand Up @@ -172,8 +173,13 @@ FallbackShader::FallbackShader(FallbackDevice *device, const ShaderOption &optio

// run some simple optimization passes on XIR to reduce the size of LLVM IR
Clock opt_clk;
auto store_forward_info = xir::peephole_store_forward_pass_run_on_module(xir_module);
auto dce_info = xir::dce_pass_run_on_module(xir_module);
LUISA_INFO("DCE removed {} instructions in {} ms.", dce_info.removed_instructions.size(), opt_clk.toc());
LUISA_INFO("Forwarded {} store instruction(s), "
"removed {} dead instructions in {} ms.",
store_forward_info.forwarded_instructions.size(),
dce_info.removed_instructions.size(),
opt_clk.toc());

// dump for debugging
if (LUISA_SHOULD_DUMP_XIR) {
Expand Down
3 changes: 2 additions & 1 deletion src/xir/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ set(LUISA_COMPUTE_XIR_SOURCES
passes/sink_alloca.cpp
passes/trace_gep.cpp
passes/aggregate_field_bitmask.cpp
passes/ref_arg_usage.cpp
passes/peephole_store_forward.cpp
passes/pointer_usage.cpp
)

add_library(luisa-compute-xir SHARED ${LUISA_COMPUTE_XIR_SOURCES})
Expand Down
114 changes: 114 additions & 0 deletions src/xir/passes/peephole_store_forward.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#include <luisa/xir/function.h>
#include <luisa/xir/module.h>
#include <luisa/xir/builder.h>
#include <luisa/xir/passes/peephole_store_forward.h>

namespace luisa::compute::xir {

namespace detail {

[[nodiscard]] AllocaInst *trace_pointer_base_local_alloca_inst(Value *pointer) noexcept {
if (pointer == nullptr || pointer->derived_value_tag() != DerivedValueTag::INSTRUCTION) {
return nullptr;
}
switch (auto inst = static_cast<Instruction *>(pointer); inst->derived_instruction_tag()) {
case DerivedInstructionTag::ALLOCA: {
if (auto alloca_inst = static_cast<AllocaInst *>(inst); alloca_inst->space() == AllocSpace::LOCAL) {
return alloca_inst;
}
return nullptr;
}
case DerivedInstructionTag::GEP: {
auto gep_inst = static_cast<GEPInst *>(inst);
return trace_pointer_base_local_alloca_inst(gep_inst->base());
}
default: break;
}
return nullptr;
}

// TODO: we only handle scalars for now
static void run_peephole_store_forward_on_basic_block(BasicBlock *block, PeepholeStoreForwardInfo &info) noexcept {

luisa::unordered_map<AllocaInst *, luisa::vector<Value *>> variable_pointers;// maps variables to pointers
luisa::unordered_map<Value *, StoreInst *> latest_stores; // maps pointers to the latest store instruction
luisa::unordered_map<LoadInst *, StoreInst *> removable_loads; // maps loads to the store that can be forwarded

auto invalidate_interfering_stores = [&](Value *ptr) noexcept -> AllocaInst * {
if (auto alloca_inst = trace_pointer_base_local_alloca_inst(ptr)) {
variable_pointers[alloca_inst].emplace_back(ptr);
for (auto interfering_ptr : variable_pointers[alloca_inst]) {
latest_stores.erase(interfering_ptr);
}
return alloca_inst;
}
return nullptr;
};

for (auto &&inst : block->instructions()) {
switch (inst.derived_instruction_tag()) {
case DerivedInstructionTag::LOAD: {
auto load = static_cast<LoadInst *>(&inst);
if (auto iter = latest_stores.find(load->variable()); iter != latest_stores.end()) {
removable_loads.emplace(load, iter->second);
}
break;
}
case DerivedInstructionTag::STORE: {
auto store = static_cast<StoreInst *>(&inst);
// if this is a store to (part of) a local alloca, we might be able to forward it
if (auto pointer = store->variable(); invalidate_interfering_stores(pointer)) {
latest_stores[pointer] = store;
}
break;
}
case DerivedInstructionTag::GEP: {
// users of GEPs will handle the forwarding, so we don't need to do anything here
break;
}
default: {// for other instructions, we invalidate possibly interfering stores
for (auto op_use : inst.operand_uses()) {
invalidate_interfering_stores(op_use->value());
}
break;
}
}
}
for (auto &&[load, store] : removable_loads) {
load->replace_all_uses_with(store->value());
load->remove_self();
info.forwarded_instructions.emplace(load, store);
}
}

void run_peephole_store_forward_on_function(Function *function, PeepholeStoreForwardInfo &info) noexcept {
if (auto definition = function->definition()) {
definition->traverse_basic_blocks([&](BasicBlock *block) noexcept {
run_peephole_store_forward_on_basic_block(block, info);
});
}
}

}// namespace detail

PeepholeStoreForwardInfo peephole_store_forward_pass_run_on_basic_block(BasicBlock *block) noexcept {
PeepholeStoreForwardInfo info;
detail::run_peephole_store_forward_on_basic_block(block, info);
return info;
}

PeepholeStoreForwardInfo peephole_store_forward_pass_run_on_function(Function *function) noexcept {
PeepholeStoreForwardInfo info;
detail::run_peephole_store_forward_on_function(function, info);
return info;
}

PeepholeStoreForwardInfo peephole_store_forward_pass_run_on_module(Module *module) noexcept {
PeepholeStoreForwardInfo info;
for (auto &&f : module->functions()) {
detail::run_peephole_store_forward_on_function(&f, info);
}
return info;
}

}// namespace luisa::compute::xir
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include <luisa/xir/passes/ref_arg_usage.h>
#include <luisa/xir/passes/pointer_usage.h>

namespace luisa::compute::xir {

Expand Down

0 comments on commit dd93d02

Please sign in to comment.