diff --git a/.github/workflows/makefile.yml b/.github/workflows/makefile.yml index af49d011..a53bc16c 100644 --- a/.github/workflows/makefile.yml +++ b/.github/workflows/makefile.yml @@ -13,6 +13,18 @@ jobs: steps: - uses: actions/checkout@v3 + with: + fetch-depth: 0 # fetches all history so that you can switch branches + + + # Cache Solidity & Wasm tools + - name: Cache solidity and wasm tools + uses: actions/cache@v2 + with: + path: | + /usr/bin/solc + /usr/bin/wabt + key: ${{ runner.os }}-solidity-wasm-tools - name: Install solidity & wasm tools run: | @@ -20,23 +32,43 @@ jobs: sudo apt-get update sudo apt-get install -y wabt solc + # Cache Rustup toolchain and targets + - name: Cache rustup toolchain and targets + uses: actions/cache@v2 + with: + path: | + ~/.rustup + ~/.cargo/bin + key: ${{ runner.os }}-rustup-toolchain-targets-${{ hashFiles('**/rust-toolchain') }} + + - name: Install Rust toolchain + if: steps.cache-rustup-toolchain-targets.outputs.cache-hit != 'true' + run: rustup self update + - name: Add wasi target + if: steps.cache-rustup-toolchain-targets.outputs.cache-hit != 'true' run: rustup target add wasm32-wasi - + - name: Add unknown target + if: steps.cache-rustup-toolchain-targets.outputs.cache-hit != 'true' run: rustup target add wasm32-unknown-unknown + + # Cache Rust dependencies + - name: Cache cargo registry + uses: actions/cache@v2 + with: + path: ~/.cargo/registry + key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} + + - name: Cache cargo target + uses: actions/cache@v2 + with: + path: target + key: ${{ runner.os }}-cargo-target-${{ hashFiles('**/Cargo.lock') }} - name: Install dependecies run: cargo fetch - - name: Cache dependencies - uses: actions/cache@v2 - with: - path: ~/.cargo - key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - restore-keys: | - ${{ runner.os }}-cargo- - - name: Build run: make @@ -58,14 +90,31 @@ jobs: curl https://wasmtime.dev/install.sh -sSf | bash export WASMTIME_HOME="$HOME/.wasmtime" export PATH="$WASMTIME_HOME/bin:$PATH"SELENIUM_JAR_PATH=/usr/share/java/selenium-server.jar - bash -c "python3 tools/tests.py" + + - name: Install pandas + run: pip3 install pandas - name: Run benchmark run: python3 tools/bench.py - - uses: JamesIves/github-pages-deploy-action@v4.4.1 - with: - branch: gh-pages - folder: tools/benchmark/pages - clean: false + # - name: Some debug + # run: | + # env + # git log + + # - name: Checkout gh-pages branch + # run: | + # git fetch origin gh-pages + # git checkout gh-pages + + # - name: Copy generated CSV files + # run: cp -r tools/benchmark/csvs/* csvs/ + + # - name: Commit and push changes to gh-pages + # run: | + # git config user.name "MCJOHN974" + # git config user.email "mcviktor@tut.by" + # git add csvs/ + # git commit -m "Benchmark workflow" + # git push origin gh-pages diff --git a/.gitignore b/.gitignore index ca822203..a34c17bd 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,6 @@ # Debugging artifacts **/*.dot + +# Compiled contracts +*.wasm diff --git a/Makefile b/Makefile index d7efe289..46d36b4b 100644 --- a/Makefile +++ b/Makefile @@ -25,11 +25,11 @@ evm2near-macos: evm2near-macos-arm evm2near-macos-x86 $(LIPO) -create -output $@ $^ evm2near-macos-arm: bin/evm2near/Cargo.toml $(EVM2NEAR_FILES) Makefile evmlib.wasi evmlib.wasm - $(CARGO) build --package=evm2near --release --frozen --target=aarch64-apple-darwin + $(CARGO) build --package=evm2near --release --target=aarch64-apple-darwin ln -sf target/aarch64-apple-darwin/release/evm2near $@ evm2near-macos-x86: bin/evm2near/Cargo.toml $(EVM2NEAR_FILES) Makefile evmlib.wasi evmlib.wasm - $(CARGO) build --package=evm2near --release --frozen --target=x86_64-apple-darwin + $(CARGO) build --package=evm2near --release --target=x86_64-apple-darwin ln -sf target/x86_64-apple-darwin/release/evm2near $@ evm2near-windows-arm.exe: bin/evm2near/Cargo.toml $(EVM2NEAR_FILES) Makefile evmlib.wasi evmlib.wasm @@ -41,20 +41,20 @@ evm2near-windows-x86.exe: bin/evm2near/Cargo.toml $(EVM2NEAR_FILES) Makefile evm ln -sf target/x86_64-pc-windows-gnu/release/evm2near.exe $@ evm2near-linux-arm: bin/evm2near/Cargo.toml $(EVM2NEAR_FILES) Makefile evmlib.wasi evmlib.wasm - $(CARGO) build --package=evm2near --release --frozen --target=aarch64-unknown-linux-musl + $(CARGO) build --package=evm2near --release --target=aarch64-unknown-linux-musl ln -sf target/aarch64-unknown-linux-musl/release/evm2near $@ evm2near-linux-x86: bin/evm2near/Cargo.toml $(EVM2NEAR_FILES) Makefile evmlib.wasi evmlib.wasm - $(CARGO) build --package=evm2near --release --frozen --target=x86_64-unknown-linux-musl + $(CARGO) build --package=evm2near --release --target=x86_64-unknown-linux-musl ln -sf target/x86_64-unknown-linux-musl/release/evm2near $@ evmlib.wasm: lib/evmlib/Cargo.toml $(EVMLIB_FILES) Makefile - $(CARGO) build --package=evmlib --release --frozen --target=wasm32-unknown-unknown --no-default-features --features=gas,pc,near + $(CARGO) build --package=evmlib --release --target=wasm32-unknown-unknown --no-default-features --features=gas,pc,near $(WASM_STRIP) target/wasm32-unknown-unknown/release/$@ ln -sf target/wasm32-unknown-unknown/release/$@ $@ evmlib.wasi: lib/evmlib/Cargo.toml $(EVMLIB_FILES) Makefile - $(CARGO) build --package=evmlib --release --frozen --target=wasm32-wasi --no-default-features --features=gas,pc + $(CARGO) build --package=evmlib --release --target=wasm32-wasi --no-default-features --features=gas,pc $(WASM_STRIP) target/wasm32-wasi/release/evmlib.wasm ln -sf target/wasm32-wasi/release/evmlib.wasm $@ diff --git a/docs/README.md b/docs/README.md index e69de29b..3fb44393 100644 --- a/docs/README.md +++ b/docs/README.md @@ -0,0 +1,120 @@ +# Relooper Algorithm Description + +## Definitions +In this chapter, we will review some well-known compiler definitions and introduce some definitions specific to this project. + +**Control Flow Graph (CFG)** is an abstraction used in compilers and programming language analysis to represent the flow of control within a program. It is a directed graph that models the different paths or sequences of instructions that can be executed in a program. The nodes in the graph represent basic blocks, which are sequences of instructions with no branching or jumping within them. + +**Node N dominates node M** if and only if every path from the entry node to node M in the CFG contains node N. The entry node is considered to dominate all nodes in the CFG, including itself. + +There are two important cases of dominance: + +- **Immediate Dominator (IDOM):** Node N is the immediate dominator of node M if N is the dominator of M, and there is no other dominator of M that also dominates N. In other words, N is the closest dominator to M among all dominators of M. + +- **Dominance Frontier:** The dominance frontier of a node N is the set of all nodes that are not strictly dominated by N but have at least one predecessor that is dominated by N. It helps in analyzing control dependencies in a program. + +**The Domination Tree** (or Dominator Tree) is a data structure used in graph theory and compiler optimizations, specifically in the context of Control Flow Graphs (CFGs). It provides a hierarchical representation of the dominance relationships between nodes in the CFG. + +The Domination Tree is a tree where each node corresponds to a basic block in the CFG. The root of the tree represents the entry node of the CFG, and each node's children are the nodes that are immediately dominated by it. In other words, if node A is the immediate dominator of node B in the CFG, then in the Domination Tree, there is an edge from node A to node B. + +**Properties of the Domination Tree:** +- It is a directed tree: The edges in the Domination Tree point from the immediate dominator (parent) to the dominated node (child). +- Unique paths: For each node in the CFG, there is a unique path in the Domination Tree from the root (entry node) to that node, representing the chain of immediate dominators. + +**A reducible CFG** is one with edges that can be partitioned into two disjoint sets: forward edges and back edges, such that: +- Forward edges form a directed acyclic graph with all nodes reachable from the entry node. +- For all back edges (A, B), node B dominates node A. + +If some of these definitions were new to you, I recommend reading this page: [Control-flow_graph](https://en.wikipedia.org/wiki/Control-flow_graph) + +## The Problem +EVM bytecode is very similar to well-known assembly languages, and control flow in it is defined by JMP and CJMP instructions, while WASM control flow looks like control flow of high-level languages -- branching, loops, scopes, BR instruction which just breaks from scopes, and no GOTO-like instruction. So, this algorithm converts GOTO-style control flow to high-level-style control flow. + +## Approach +The full description of the algorithm can be read in this paper: [Link to the paper](https://dl.acm.org/doi/pdf/10.1145/3547621). Here are the main steps: + +1) **Deal with dynamic edges:** Without extra analysis and information, we must assume that each JMP or CJMP instruction can jump to each JUMPDEST instruction. This outgoing edge of the CFG with an undefined destination is called a "dynamic edge". The Relooper algorithm can't work with dynamic edges, so we need to change all dynamic edges to sets of static edges (edges with known destinations) in our CFG. + +2) **Domination tree building:** The domination tree is a helper structure built on top of the control flow graph. This structure is widely used in compilers and in the Relooper algorithm as well. + +3) **"Reduce" the control flow graph:** All control flow graphs are reducible or irreducible. The Relooper algorithm can deal only with reducible ones. So, the next step is building an equivalent reducible CFG to the given irreducible one. + +4) **Nodes and edges labeling:** The Relooper algorithm needs some flags for all nodes and edges. Nodes can have the following flags: (if, loop, merge). Nodes can have each combination of these flags (even all true or all false). Edges can be forward or backward following way: if the DFS number of an edge's origin is less than the DFS number of its destination, then this edge is forward, and backward otherwise. + +5) **Relooping:** It is the final stage of the algorithm. + +## Dive Deeper +In this section, we will review all steps more detailed and mention files with code that produce these computations. + +1) **Dynamic edges:** This code is processed in the function `basic_cfg(program: &Program) -> BasicCfg` in `bin/evm2near/src/analyze.rs` file. The main approach is creating an extra CFG node without any code called "Dynamic," and all nodes with a dynamic edge now have a static edge to the dynamic node. The dynamic node has a special "switch" edge that points to one of the JUMPDEST instructions according to the jumptable. Later, this edge will be changed to a "switch" wasm instruction. + +2) **Domination tree:** The algorithm implemented in the current project is quite big, and you can find the full description [here](https://dl.acm.org/doi/pdf/10.1145/357062.357071). Also, you can find a bit faster but far more complicated algorithm [here](https://dl.acm.org/doi/10.5555/982792.982922) (from my opinion, the priority of upgrading to this algorithm is low). Currently, this implementation contains a bug that is located somewhere in the LINK-EVAL data structure implementation. To reproduce this bug, you need to compile some contract with a big CFG. For example, `test/big-cfg.sol` or `test/Collatz.sol`. If you replace the LINK-EVAL implementation with the naive one, everything will work. Also, there is a more naive implementation of the domination tree algorithm in earlier commits. The implementation of the algorithm is [here](lib/relooper/src/graph/dominators.rs). If you want to make changes in this code, I strongly recommend reading the paper because the algorithm is pretty big, and the code is very close to the paper. If you don't have time to read all the mathematics in the paper, I can recommend you to focus on the semidominator definition, theorem 4, corollary 1, chapters 3, 4, and appendix B. + +3) **Reducing:** The main idea of creating an equivalent reducible graph to the given irreducible one is node duplicating. Let's look at the next CFG: A->B, A->C, B->C, C->B, A is origin. This CFG is irreducible since the B-C loop has two headers. We can duplicate node B and create node B', and redirect edges in the following way: A->B, A->C, B->C, C->B', B'->C. You can see that with the same input, both graphs will provide the same execution, but the new graph has only one loop -- C-B', and this loop has exactly one origin -- C, thus it is reducible. This was an idea; now let's take a look at the approach on how to do it for any input CFG. The code is located in `lib/relooper/src/graph/reduction/mod.rs` and `lib/relooper/src/graph/supergraph.rs` for older (deprecated) version. You can find documentation for that approaches in code. + +4) **Labeling:** Labeling is a pretty easy step. Each node that has more than one in-edge is called a merge node. Each node that has more than one out-edge is called an if node. Each node that has at least one backward in-edge is called a loop node. Edges are divided into backward and forward following way: if the DFS number of an edge's origin is less than the DFS number of its destination, then this edge is forward, and backward otherwise. You can find code that performs this labeling in `lib/relooper/src/graph/enrichments.rs`. + +5) **Relooping:** It is the final stage of the algorithm. This part is also quite difficult and I recommend to read the paper. But there is a short description. Algorithm manipulates with next functions: +• Function doTree is called on a subtree of the dominator tree, rooted at node 𝑋; doTree +returns the translation of the subtree, which includes 𝑋 and everything that 𝑋 dominates. +Function doTree first creates a syntactic template based on the properties of 𝑋 from section 4, +then fills the template with the translations of 𝑋 ’s children. These children are the nodes that +𝑋 immediately dominates. +• Function doBranch is called on the labels of two nodes 𝑋 and 𝑌; it returns code that, when +placed after the translation of 𝑋, transfers control to the translation of 𝑌. If 𝑋 is 𝑌’s only +forward-edge predecessor, doBranch simply returns the translation of 𝑌 (and everything +that 𝑌 dominates). Otherwise 𝑌’s translation already appears in the context, and doBranch +returns a br instruction. +• Function nodeWithin is an auxiliary function; it places the translation of a single node into +a nest of blocks. Function nodeWithin is called by doTree 𝑋, which looks at 𝑋’s children in +the dominator tree and computes Ys: the children of 𝑋 that are merge nodes. Then doTree +passes 𝑋 and Ys to nodeWithin, which returns the translation of 𝑋 (and its other children) +nested inside one block for each element of Ys. The elements of Ys are ordered with higher +reverse postorder numbers first. +This functions get information from domination tree, labeling, and manipulates with one other structure, `context`. It describes the syntactic context into +which WebAssembly code is placed. That context determines the behavior of `br` instructions. +You can find the code that perform relooping in `lib/relooper/src/graph/relooper.rs` + +## Helpers +In this section, we briefly introduce some helpful code that is not related to the main algorithm but can be useful for testing, debugging, and benchmarking. + +1. **Printing CFG in .dot format:** You can use the following code to print your CFG in .dot format and save it to a file named "cfg.dot". + + ```rust + let debug = format!("digraph {{{}}}", cfg.cfg_to_dot("cfg")); + std::fs::write("cfg.dot", debug).expect("fs error while writing debug file"); + ``` + +2. **Graph Traversals:** If you need to perform some graph traversals (DFS, BFS), check out the code from `lib/relooper/src/traversal`. It is very likely that the traversal you need is already implemented here. + +3. **Initializing CFG:** There are several ways to easily initialize your CFG. For example, you can use the `from_edges(entry: TLabel, edges: HashMap>) -> Self` function. Also, there are many useful functions for manipulating CFGs, such as `add_edge_or_promote(&mut self, from: T, to: T)`, `remove_edge(&mut self, from: T, edge: &CfgEdge)`, and others. You can find all of them in `lib/relooper/src/graph/cfg/mod.rs`. + +4. **Test Contracts:** You can find some simple contracts in the `test/` directory and use them as input for the compiler. + +5. **Tools:** There are some Python scripts for testing and debugging in the `tools/` directory. `tools/test.py` compiles some contracts, runs them in Wasmtime, calls some functions, and asserts that the output is correct. `tools/bench.py` compiles contracts, runs them in the NEAR localnet, measures gas consumption, and produces a CSV with gas consumption of different contracts with different inputs. You can find benchmarking data in the following CSV: `tools/benchmark/csvs/.csv`. Be careful, Rust code in `tools/benchmark/` makes some assumptions (for example, that contracts are compiled), so it is better not to run this code manually, just run it with `tools/bench.py`. + +## Some words about how we store the CFG +Mainly, we have two structures -- `CfgLabel` (node) and `CfgEdge` (edge). Label usually is a id number with some extra information, but a lot of function that process CfgLabels are generic. `CfgEdge` is more interesting structure, it is a enum defined by following: +``` +pub enum CfgEdge { + Uncond(TLabel), + Cond(TLabel, TLabel), + Switch(Vec<(usize, TLabel)>), + Terminal, +} +``` +In Cfg each `CfgLabel` have exactly one `CfgLabel`, so it can't contain two Uncond edges. Mapping this structure to mathematical representation of graph is following: +Uncond means that this CFG node have exactly one outedge. +Cond means that this node have exactly two outedges, first is for case when condition is true and second for the opposite case. +Switch means that this node have more than two outedges and store table that maps number on top of stack to destination node. +Terminal means that this node have no outedges and program terminates if we trapped here. + +`lib/relooper/test_data` contains some files with CFGs in following format: +``` +cfg_origin +/// edges: +edge_origin edge_dest second_edge_dest(if edge is cond) +``` +You can parse this files using code from `lib/relooper/src/graph/cfg/cfg_parsing.rs.` + +If you still have any questions, don't hesitate to mail: [mcviktor@tut.by](mailto:mcviktor@tut.by) diff --git a/docs/irreducible-loop.png b/docs/irreducible-loop.png deleted file mode 100644 index c91a5dcb..00000000 Binary files a/docs/irreducible-loop.png and /dev/null differ diff --git a/docs/quick-intro.md b/docs/quick-intro.md deleted file mode 100644 index 586c2fbf..00000000 --- a/docs/quick-intro.md +++ /dev/null @@ -1,213 +0,0 @@ ---- -marp: true -paginate: true -size: 16:9 ---- - -# evm2near quick intro - ---- - -# evm bytecode - -``` -0x00: PUSH1 0x80 -0x02: PUSH1 0x40 -0x04: MSTORE -0x05: CALLVALUE -0x06: DUP1 -0x07: ISZERO -0x08: PUSH2 0x10 -0x0B: JUMPI --* -0x0C: PUSH1 0x00 <-* -0x0E: DUP1 | -0x0F: REVERT | -0x10: JUMPDEST <-* -0x11: POP -0x12: PUSH1 0x40 -... -``` - ---- - -# WASM - -tries to be similar to programming languages control flow, not other low-level asm/bytecodes - -![bg right:60% contain](wasm-fib.png) - ---- - -# (un)structured control flow - - - - - - - - - - -
unstructured
(any goto-like jump)
structured
- -``` -0x00: PUSH1 0x80 -0x02: PUSH1 0x40 -0x04: MSTORE -0x05: CALLVALUE -0x06: DUP1 -0x07: ISZERO -0x08: PUSH2 0x10 -0x0B: JUMPI --* -0x0C: PUSH1 0x00 <-* -0x0E: DUP1 | -0x0F: REVERT | -0x10: JUMPDEST <-* -... -``` - - - -![w:1000](structured.png) -
- ---- - - -# simplest interpreter - -* current position bytecode decoding -* switch statement that chooses appropriate handler and calls it -* handlers for each bytecode operation - * modifies state - * returns next bytecode position (to be executed) - ---- - -# translator vs compiler? - -* translator - * converts everything to everything - * java -> js - * lisp -> C - * java bytecode -> .net IL code - * asm -> native code - * and even data formats conversion - * so, compiler is a special case of translator - * not all translators should be considired compilers -* compiler: source code -> "some executable form": - * native code (asm <-> native is usually trivially-conversible) - * VM bytecode (that would be interpreted/JITted/AOT-compiled later) -* **but this is not a rule, and many disagree whether something is a compiler** - ---- - -# what we had at the beginning - -sometimes called "interpreter specialized by program": -* we still have "operation handlers" that modify state -* "compiled" program consists of calls to handlers defined by specified evm listing - * `call(PUSH1)`, `call(DUP2)`, ... -* "handlers" are written in Rust and compiled to wasm module - -```wasm -if $I0 - loop $L1 - block $B2 - call $jumpdest - call $pop - i32.const 4 - call $push1 - call $calldatasize - call $lt - call $_evm_pop_u32 -``` - ---- - -# what we had at the beginning (part 2) - -* due to difference in (un)structured CFG, control flow was implemented naivly: - * loop is just `call(loop_body)`, so it consumes one stack frame on each iteration - * to solve that, we needed `relooper`-family algorithm -* overall, it does not differs from interpreted code that much - * no dispatch code (switch in interpreter case) - * much more closer to "real compiler", can be gradually improved to be - ---- - -# CFG (control flow graph) - -* generated from EVM bytecode -* each node contains set of sequential instructions (basic block) -* edges between nodes -- control flow transfer (`return`, `if`, `loop`, `continue`, `break` + function calls) -* todo image - ---- - -# relooper - -* transform loops/conditionals to structured CFG -* wasn't needed before wasm that much -* first mention -- Emscripten (C/C++ -> WASM) - * theory was there since at least late 80 -* requires input CFG to be reducible -* we used "Beyond Relooper" by Norman Ramsey (2022) - ---- - -# (ir)reducible loops - -![bg right fit](irreducible-loop.png) - -* more than one "header" node -* some nodes should be duplicated to "reduce" graph - ---- - -# first approach: supernodes - -* proposed by "Beyond Relooper" paper -* series of graph traversal with two actions: - * merge: merge two nodes to a single `supernode` - * split: duplicate one node into two, preserving cfg edges -* can produce HUGE graphs (96 initial nodes -> ~5500 output nodes) - ---- - -# second approach: DJ-graphs - -* two main papers: - * "Handling Irreducible Loops: Optimized Node Splitting vs. DJ-Graphs" by Sebastian Unger and Frank Mueller - * "Identifying Loops Using DJ Graphs" by Vugranam C. Sreedhar, Guang R. Gao, Yong-Fong Lee -* much more complex algorithm -* produces **much** more compact graphs (96 nodes -> 105 output nodes!) -* took months to implement properly - ---- - -# what else? - -* huge bunch of algorithms & data structures around graph manupulation in general and CFG specifically - * traversals, dominators, node orderings, debug-printing, loop detection, heuristics, ... -* evm bytecode analysis rewritten to match new (more complex) CFG structure -* replaced wasm decode & encode tools (unsupported dependency) - * `parity-wasm` -> `wasmprinter` + `wasm-encoder` + gule code because WASM ecosystem is garbage -* some kind of CI/CD testing - ---- - -# future work - -* switch "operation handlers" to "aurora host functions" - * depends on `synchronous wasm` - * would enable cross-contract calls -* move towards "real compiler": - * rewrite some operation handlers to WASM-native instructions - * get rid of "virtual EVM stack" and utilize WASM stack directly - * requires data flow analysis & "register allocation" algorithms -* compiler stabilization (or even having a switch for stable compilation) -* multiple-file compilation model -* function splitting (no matter how many function there are, they are represented by single linear EVM listing) -* of cource, much more testing (we havent even tested memory operations properly!) \ No newline at end of file diff --git a/docs/quick-intro.pdf b/docs/quick-intro.pdf deleted file mode 100644 index 5df829db..00000000 Binary files a/docs/quick-intro.pdf and /dev/null differ diff --git a/docs/structured.png b/docs/structured.png deleted file mode 100644 index 51961647..00000000 Binary files a/docs/structured.png and /dev/null differ diff --git a/docs/wasm-fib.png b/docs/wasm-fib.png deleted file mode 100644 index 4e960c3f..00000000 Binary files a/docs/wasm-fib.png and /dev/null differ diff --git a/lib/relooper/BACKLOG.md b/lib/relooper/BACKLOG.md index 2fe3840b..9c0d0b53 100644 --- a/lib/relooper/BACKLOG.md +++ b/lib/relooper/BACKLOG.md @@ -1,6 +1,3 @@ # Here will be a list of potential improvements. 1) Rewrite domination tree building algorithm. There is an algorithm with better time complexety but much harder to implement. -2) Find optimal order of split and merge in reducing algorithm - Different orders generate different graphs and different graphs provide different performance (both gas and time) - of output programs diff --git a/lib/relooper/src/graph/supergraph.rs b/lib/relooper/src/graph/supergraph.rs index 83b7c21b..3fd5e4ac 100644 --- a/lib/relooper/src/graph/supergraph.rs +++ b/lib/relooper/src/graph/supergraph.rs @@ -1,6 +1,26 @@ /// simple but working algorithm for graph reduction /// produces too much copied nodes for fairly simple examples, which complicates every later stage /// was replaced by more efficient algorithm (`relooper::graph::reduction`) for that reasons +/// +/// The algorithm: firstly, let's define a supernode. +/// A supernode is a group of nodes of the initial CFG. +/// Initially, we put each CFG node in a separate supernode. +/// Then, we perform two operations -- merge and split. +/// Merge: If all in-edges of all CFG nodes of some supernode A have the origin in supernode A and/or in exactly one other supernode B, +/// we can merge these supernodes -- just assign that now all CFG nodes of supernode A are in supernode B. +/// Split: Now, let's say we have a supernode A and a set of supernodes {B0, B1, ... Bn} +/// such that all in-edges of CFG nodes in supernode A have the origin in supernode A or in one of supernodes Bi. +/// Then, we can perform a split -- duplicate node A n times, now we have supernodes {A0, A1, ... An} with the same code inside. +/// And for each of these supernodes, we will cut all in-edges that are not from Ai or from Bi. +/// For example, for node A3, we will cut all in-edges that are not from A3 or B3. Then we will perform n merges (Ai with Bi). +/// We perform these operations until there will be exactly one supernode. +/// After it, we just return the graph contained in this supernode. +/// In each step, there is a variety of operations we can do -- we can do some splits and some merges, but we need to choose one. +/// These choices affect the execution time and, what is more important, the size of the resulting CFG. +/// But, we didn't find the best way to make these decisions, and currently, we use a greedy strategy. +/// Some words about correctness: it is easy to see that if we have more than one supernode and CFG is connected, we can perform merge or split. +/// Also, both merge and split reduce the number of supernodes by one, so after `size(CFG)` iterations, the algorithm will be finished. +/// The proof that each irreducible loop will be broken by a split is quite big, and we left it for the reader. use super::reduction::SLabel; use super::{GEdgeColl, GEdgeCollMappable, Graph, GraphMut}; use crate::graph::cfg::{Cfg, CfgLabel}; diff --git a/test/bench-mini.sol b/test/bench-mini.sol new file mode 100644 index 00000000..7df6825d --- /dev/null +++ b/test/bench-mini.sol @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: Unlicense +pragma solidity ^0.8.16; + +contract Bench { + function cpu_ram_soak_test(uint32 loop_limit) public pure { + uint8[100] memory buf; + uint32 len = 100; + for (uint32 i=0; i < loop_limit; i++) { + uint32 j = (i * 7 + len / 2) % len; + uint32 k = (i * 3) % len; + uint8 tmp = buf[k]; + buf[k] = buf[j]; + buf[j] = tmp; + } + } +} \ No newline at end of file diff --git a/test/bench.sol b/test/bench.sol index 4055d649..01fb6589 100644 --- a/test/bench.sol +++ b/test/bench.sol @@ -13,4 +13,4 @@ contract Bench { buf[j] = tmp; } } -} +} \ No newline at end of file diff --git a/test/bench_with_writing_in_memory.sol b/test/bench_with_writing_in_memory.sol new file mode 100644 index 00000000..a89b8e95 --- /dev/null +++ b/test/bench_with_writing_in_memory.sol @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: Unlicense +pragma solidity ^0.8.16; + +contract Bench { + + function cpu_ram_soak_test(uint32 loop_limit) public pure { + uint32[1024] memory buf; + for (uint32 i=0; i < loop_limit; i++) { + uint32 j = (i * 7 + 100500 / 2) % 100500; + uint32 k = (i * 3) % 100500; + uint32 abra = (i + 15) / 13; + buf[i] = k + j; + + } + } +} diff --git a/test/bench_without_memory_allocation.sol b/test/bench_without_memory_allocation.sol new file mode 100644 index 00000000..2f90efd7 --- /dev/null +++ b/test/bench_without_memory_allocation.sol @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: Unlicense +pragma solidity ^0.8.16; + +contract Bench { + + function cpu_ram_soak_test(uint32 loop_limit) public pure { + for (uint32 i=0; i < loop_limit; i++) { + uint32 j = (i * 7 + 100500 / 2) % 100500; + uint32 k = (i * 3) % 100500; + uint32 abra = (i + 15) / 13; + uint32 rrr = k + j; + + } + } +} diff --git a/tools/bench.py b/tools/bench.py index 4ed740d0..6c9582cf 100644 --- a/tools/bench.py +++ b/tools/bench.py @@ -2,13 +2,12 @@ import os -contracts = [ - 'calc' - # 'bench', - # 'Collatz', - # 'echo', - # 'const' -] + +print(os.listdir("tools/benchmark/inputs")) + +contracts = list(map(lambda x: x[:-5], os.listdir("tools/benchmark/inputs"))) + +print(f"contracts = {contracts}") def compile(name: str): @@ -40,45 +39,99 @@ def clean(): def run_bench(): os.chdir('tools/benchmark') - os.system('cargo run') + assert os.system('cargo run') == 0 os.chdir('../../') -def check_ci(): - with open("tools/benchmark/pages/index.html", "w") as html: - lines = [ -"" -"", -" ", -" ", -" CSV File Viewer", -" ", -" ", -" ", -"

CSV File Viewer Checking CI222

", -" ", -" ", -" ", -" ", -" ", -"
", -" ", -""] - html.writelines(lines) + + +import pandas as pd + + +import subprocess + if __name__ == "__main__": - clean() - compile_contracts() - print("Contracts compiled") - copy_contracts() - print("Benchmark started") - run_bench() - print("Benchmark ended, see results in tools/benchmark/benchmark.csv") - print("Clean started") - clean() - print("Clean ended") - - - check_ci() - \ No newline at end of file + + if os.environ.get("GITHUB_SHA") is None: + # script running locally + result = subprocess.run(['bash', '-c', 'git rev-parse --short HEAD'], stdout=subprocess.PIPE) + commit = result.stdout.decode('utf-8') + commit = commit[:-1] + else: + # script running in github actions + event_name = os.environ.get('GITHUB_EVENT_NAME') + ref = os.environ.get('GITHUB_REF') + + if event_name == "push" and ref == "refs/heads/master": + # push to master + result = subprocess.run(['bash', '-c', 'git rev-parse --short HEAD'], stdout=subprocess.PIPE) + commit = result.stdout.decode('utf-8') + commit = commit[:-1] + else: + # pull request + result = subprocess.run(['bash', '-c', 'git log --pretty=format:\"%h\" -n 2 | tail -1'], stdout=subprocess.PIPE) + commit = result.stdout.decode('utf-8') + + print(f'Commit = {commit}') + + dataframes = [] + + for i in range(20): + clean() + compile_contracts() + print("Contracts compiled") + copy_contracts() + print("Benchmark started") + run_bench() + print(f"Benchmark ended, see results in tools/benchmark/csvs/{commit}.csv") + print("Clean started") + clean() + print("Clean ended") + print(os.getcwd()) + dataframes.append(pd.read_csv(f'tools/benchmark/csvs/{commit}.csv')) + + # Extract the 5th column from each DataFrame + Tgas_used = pd.concat([df.iloc[:, 5] for df in dataframes], axis=1) + + # Calculate the mean, variance, min, and max values for each row in the 5th columns + mean_Tgas_used = Tgas_used.mean(axis=1) + variance_Tgas_used = Tgas_used.var(axis=1) + min_Tgas_used = Tgas_used.min(axis=1) + max_Tgas_used = Tgas_used.max(axis=1) + + # Create a new DataFrame using the first DataFrame as a template + new_df = dataframes[0].copy() + + # Replace the 5th column in the new DataFrame with the mean values + new_df.iloc[:, 5] = mean_Tgas_used + + # Add columns for variance, min, and max values + new_df['Variance'] = variance_Tgas_used + new_df['Min'] = min_Tgas_used + new_df['Max'] = max_Tgas_used + + # Save the new DataFrame to a CSV file + new_df.to_csv(f"tools/benchmark/csvs/{commit}.csv", index=False) + + # extract mean and variance for bench with loop_limit = 3000 + mean = new_df.iloc[-1, 5] + variance = new_df.iloc[-1, 6] + + print(f"Mean = {mean}\nVariance = {variance}") + + UPPER_BOUND_MEAN = 204.5 + LOWER_BOUND_MEAN = 203.5 + + # I runned code three times and values was 109.6, 77.7, 49.3, so this bounds will be changed soon, I think + UPPER_BOUND_VARIANCE = 0.1 + LOWER_BOUND_VARIANCE = 0 + + assert mean <= UPPER_BOUND_MEAN + assert mean >= LOWER_BOUND_MEAN + + assert variance <= UPPER_BOUND_VARIANCE + assert variance >= LOWER_BOUND_VARIANCE + + diff --git a/tools/benchmark/.gitignore b/tools/benchmark/.gitignore index 0d0fd47e..e3c0d4bb 100644 --- a/tools/benchmark/.gitignore +++ b/tools/benchmark/.gitignore @@ -1,2 +1,2 @@ target/ -benchmark.csv +*.csv diff --git a/tools/benchmark/csvs/c814a18.csv b/tools/benchmark/csvs/c814a18.csv new file mode 100644 index 00000000..446eec82 --- /dev/null +++ b/tools/benchmark/csvs/c814a18.csv @@ -0,0 +1,28 @@ +Contract,Method,Gas burned,Gas used,Tgas burned,Tgas used,Input,Commit +bench_without_memory_allocation,cpu_ram_soak_test,2428001493624,49912634307180,2,49,"{""loop_limit"":1000}",c814a18 +bench_without_memory_allocation,cpu_ram_soak_test,2427999257690,27548834923078,2,27,"{""loop_limit"":500}",c814a18 +bench_without_memory_allocation,cpu_ram_soak_test,2427999257690,9657841151878,2,9,"{""loop_limit"":100}",c814a18 +bench,cpu_ram_soak_test,2428001493624,257529488708460,2,257,"{""loop_limit"":3000}",c814a18 +bench,cpu_ram_soak_test,2428001493624,205769908748460,2,205,"{""loop_limit"":2000}",c814a18 +bench,cpu_ram_soak_test,2428001493624,179890118768460,2,179,"{""loop_limit"":1500}",c814a18 +bench,cpu_ram_soak_test,2428001493624,154010328788460,2,154,"{""loop_limit"":1000}",c814a18 +bench,cpu_ram_soak_test,2427999257690,128130481638358,2,128,"{""loop_limit"":500}",c814a18 +bench,cpu_ram_soak_test,2427999257690,107426649654358,2,107,"{""loop_limit"":100}",c814a18 +const,value,2427936651538,5176979724410,2,5,{},c814a18 +bench_with_writing_in_memory,cpu_ram_soak_test,2428001493624,59866801749312,2,59,"{""loop_limit"":1000}",c814a18 +bench_with_writing_in_memory,cpu_ram_soak_test,2427999257690,33014045629210,2,33,"{""loop_limit"":500}",c814a18 +bench_with_writing_in_memory,cpu_ram_soak_test,2427999257690,11531886469210,2,11,"{""loop_limit"":100}",c814a18 +bench-mini,cpu_ram_soak_test,2428001493624,250164097100640,2,250,"{""loop_limit"":5000}",c814a18 +bench-mini,cpu_ram_soak_test,2428001493624,152214995300640,2,152,"{""loop_limit"":3000}",c814a18 +bench-mini,cpu_ram_soak_test,2428001493624,103240444400640,2,103,"{""loop_limit"":2000}",c814a18 +bench-mini,cpu_ram_soak_test,2428001493624,54265893500640,2,54,"{""loop_limit"":1000}",c814a18 +calc,multiply,2427967954614,5203419174300,2,5,"{""a"":6,""b"":7}",c814a18 +calc,multiply,2427988078020,5204030759118,2,5,"{""a"":2424,""b"":-242552}",c814a18 +calc,multiply,2427997021756,5203718774249,2,5,"{""a"":-43222,""b"":-23422424}",c814a18 +calc,multiply,2427994785822,5203686286827,2,5,"{""a"":-113444,""b"":1344114}",c814a18 +calc,multiply,2427985842086,5203861215199,2,5,"{""a"":0,""b"":133944141}",c814a18 +calc,multiply,2427997021756,5204356097702,2,5,"{""a"":31333,""b"":-144422424}",c814a18 +calc,multiply,2428005965492,5204558022886,2,5,"{""a"":2424422442,""b"":242456969}",c814a18 +calc,multiply,2428008201426,5203997303232,2,5,"{""a"":-42343435435,""b"":-2444224}",c814a18 +calc,multiply,2427985842086,5204328540607,2,5,"{""a"":424242424,""b"":0}",c814a18 +calc,multiply,2427967954614,5204660202795,2,5,"{""a"":0,""b"":0}",c814a18 diff --git a/tools/benchmark/inputs/bench.json b/tools/benchmark/inputs/bench.json new file mode 100644 index 00000000..684b6187 --- /dev/null +++ b/tools/benchmark/inputs/bench.json @@ -0,0 +1,8 @@ +[ + {"method": "cpu_ram_soak_test", "input": {"loop_limit": 100}}, + {"method": "cpu_ram_soak_test", "input": {"loop_limit": 500}}, + {"method": "cpu_ram_soak_test", "input": {"loop_limit": 1000}}, + {"method": "cpu_ram_soak_test", "input": {"loop_limit": 1500}}, + {"method": "cpu_ram_soak_test", "input": {"loop_limit": 2000}}, + {"method": "cpu_ram_soak_test", "input": {"loop_limit": 3000}} +] diff --git a/tools/benchmark/inputs/calc.json b/tools/benchmark/inputs/calc.json deleted file mode 100644 index 26033e34..00000000 --- a/tools/benchmark/inputs/calc.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - {"method": "multiply", "input": {"a": 6, "b": 7}}, - {"method": "multiply", "input": {"a": 2424, "b": -242552}}, - {"method": "multiply", "input": {"a": -43222, "b": -23422424}}, - {"method": "multiply", "input": {"a":-113444, "b": 1344114}}, - {"method": "multiply", "input": {"a": 0, "b": 133944141}}, - {"method": "multiply", "input": {"a": 31333, "b": -144422424}}, - {"method": "multiply", "input": {"a": 2424422442, "b": 242456969}}, - {"method": "multiply", "input": {"a": -42343435435, "b": -2444224}}, - {"method": "multiply", "input": {"a": 424242424, "b": 0}}, - {"method": "multiply", "input": {"a": 0, "b": 0}} -] diff --git a/tools/benchmark/pages/index.html b/tools/benchmark/pages/index.html deleted file mode 100644 index 56a89fbc..00000000 --- a/tools/benchmark/pages/index.html +++ /dev/null @@ -1,48 +0,0 @@ - - - - - CSV File Viewer - - - -

CSV File Viewer Checking CI

- - - - - -
- - - - \ No newline at end of file diff --git a/tools/benchmark/src/main.rs b/tools/benchmark/src/main.rs index 471e46af..0789f4e8 100644 --- a/tools/benchmark/src/main.rs +++ b/tools/benchmark/src/main.rs @@ -1,7 +1,9 @@ use csv::Writer; -use serde::{Deserialize}; +use serde::Deserialize; use serde_json::{json, Value}; -use std::fs::File; +use std::env; +use std::process::Command; +use std::{ffi::OsString, fs::File}; #[derive(Debug, Deserialize)] struct Input { @@ -11,32 +13,42 @@ struct Input { const TERA: u64 = 1000000000000_u64; -async fn bench_contract(wtr: &mut Writer, name: &str) -> anyhow::Result<()> { +async fn bench_contract( + wtr: &mut Writer, + name_os: OsString, + commit: String, +) -> anyhow::Result<()> { + let name = &name_os.to_str().unwrap()[0..name_os.len() - 5]; + println!("Name = {}", name); let worker = near_workspaces::sandbox().await?; let wasm = std::fs::read(format!("{}.wasm", name))?; let contract = worker.dev_deploy(&wasm).await?; let inputs: Vec = serde_json::from_str( - &std::fs::read_to_string(format!("inputs/{}.json", name)) - .expect("Unable to read file"), + &std::fs::read_to_string(format!("inputs/{}.json", name)).expect("Unable to read file"), ) .expect("JSON does not have correct format."); - + let deposit = 10000000000000000000000_u128; for input in &inputs { let outcome = contract .call(&input.method) .args_json(json!(input.input)) + .deposit(deposit) + .gas(near_units::parse_gas!("300 TGas") as u64) .transact() .await?; + for failure in &outcome.failures() { + println!("{:#?}", failure); + } assert!(outcome.is_success()); + wtr.write_record(&[ + commit.clone(), name.to_string(), input.method.to_string(), - outcome.outcome().gas_burnt.to_string(), - outcome.total_gas_burnt.to_string(), + input.input.to_string(), (outcome.outcome().gas_burnt / TERA).to_string(), (outcome.total_gas_burnt / TERA).to_string(), - input.input.to_string(), ])?; } wtr.flush()?; @@ -45,23 +57,77 @@ async fn bench_contract(wtr: &mut Writer, name: &str) -> anyhow::Result<() #[tokio::main] async fn main() -> anyhow::Result<()> { - let contracts = vec!["calc"]; - let mut wtr = Writer::from_path("benchmark.csv")?; + let paths = std::fs::read_dir("inputs/").unwrap(); + + let contracts = paths + .into_iter() + .map(|dir| dir.unwrap().file_name()) + .collect::>(); + + + let commit = match env::var("GITHUB_SHA") { + Ok(_) => { + println!("Running in github action"); + let event_name = env::var("GITHUB_EVENT_NAME").unwrap_or_default(); + let ref_name = env::var("GITHUB_REF").unwrap_or_default(); + + if event_name == "push" && ref_name == "refs/heads/master" { + // push to master + let output = Command::new("sh") + .arg("-c") + .arg("git rev-parse --short HEAD") + .output() + .expect("failed to execute process"); + let stdout = output.stdout; + let mut tmp = std::str::from_utf8(&stdout).unwrap().to_string(); + tmp.pop(); // to remove \n in the end + tmp + } else { + // pull request + let output = Command::new("sh") + .arg("-c") + .arg("git log --pretty=format:\"%h\" -n 2 | tail -1") + .output() + .expect("failed to execute process"); + + let stdout = output.stdout; + let tmp = std::str::from_utf8(&stdout).unwrap().to_string(); + tmp + } + } + Err(_) => { + println!("Running locally"); + let output = Command::new("sh") + .arg("-c") + .arg("git rev-parse --short HEAD") + .output() + .expect("failed to execute process"); + let stdout = output.stdout; + let mut tmp = std::str::from_utf8(&stdout).unwrap().to_string(); + tmp.pop(); // to remove \n in the end + tmp + } + }; + println!("Commit = {}", commit); + + let mut wtr = Writer::from_path(format!("csvs/{}.csv", commit))?; + wtr.write_record([ + "Commit", "Contract", "Method", - "Gas burned", - "Gas used", - "Tgas burned", - "Tgas used", "Input", + "Tgas burned", + "Avg Tgas used", ])?; for contract in contracts { - bench_contract(&mut wtr, contract).await?; + bench_contract(&mut wtr, contract, commit.clone()).await?; } wtr.flush()?; Ok(()) } + +