diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..a9d37c5
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+target
+Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..fe7206a
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,44 @@
+[package]
+name = "reed-solomon-16"
+version = "0.1.0"
+license = "MIT AND BSD-3-Clause"
+description = "Reed-Solomon GF(2^16) erasure coding with O(n log n) complexity"
+repository = "https://github.com/malaire/reed-solomon-16"
+keywords = [ "erasure", "reed-solomon" ]
+categories = [ "algorithms" ]
+edition = "2021"
+
+include = [
+    "/benches",
+    "/src",
+    "LICENSE",
+    "README.md",
+    "algorithm.md",
+    "build.rs",
+]
+
+[dependencies]
+bytemuck = "1.7.3"
+fixedbitset = "0.4.0"
+once_cell = "1.8.0"
+
+[build-dependencies]
+readme-rustdocifier = "0.1.0"
+
+[dev-dependencies]
+criterion = { version = "0.3", features = [ "html_reports" ] }
+hex = "0.4.3"
+rand = "0.8.4"
+rand_chacha = "0.3.1"
+sha2 = "0.10.0"
+
+# These are only for `examples/quick-comparison.rs`.
+reed-solomon-erasure = { version = "5.0.1", features = [ "simd-accel" ] }
+reed-solomon-novelpoly = "1.0.0"
+
+[lib]
+bench = false
+
+[[bench]]
+name = "benchmarks"
+harness = false
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..3d512c7
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,55 @@
+All code from me (Markus Laire) is under MIT License (1st license below).
+
+This crate is based on [1] which uses BSD-3-Clause License (2nd license below).
+
+[1] https://github.com/catid/leopard
+
+-----
+
+Copyright (c) 2022 Markus Laire
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+-----
+
+Copyright (c) 2017 Christopher A. Taylor.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+* Neither the name of Leopard-RS nor the names of its contributors may be
+  used to endorse or promote products derived from this software without
+  specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..3278813
--- /dev/null
+++ b/README.md
@@ -0,0 +1,186 @@
+# reed-solomon-16
+
+A library for Reed-Solomon `GF(2^16)` erasure coding, featuring:
+
+- `O(n log n)` complexity.
+- Any combination of 1 - 32768 original shards with 1 - 32768 recovery shards.
+- Up to 65535 original or recovery shards with some limitations.
+- SIMD optimizations are planned, but not yet implemented.
+
+## Simple usage
+
+1. Divide data into equal-sized original shards.
+   Shard size must be multiple of 64 bytes.
+2. Decide how many recovery shards you want.
+3. Generate recovery shards with [`reed_solomon_16::encode`].
+4. When some original shards get lost, restore them with [`reed_solomon_16::decode`].
+    - You must provide at least as many shards as there were original shards in total,
+      in any combination of original shards and recovery shards.
+
+### Example
+
+Divide data into 3 original shards of 64 bytes each and generate 5 recovery shards.
+Assume then that original shards #0 and #2 are lost
+and restore them by providing 1 original shard and 2 recovery shards.
+
+```rust
+let original = [
+    b"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do ",
+    b"eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut e",
+    b"nim ad minim veniam, quis nostrud exercitation ullamco laboris n",
+];
+
+let recovery = reed_solomon_16::encode(
+    3, // total number of original shards
+    5, // total number of recovery shards
+    original, // all original shards
+)?;
+
+let restored = reed_solomon_16::decode(
+    3, // total number of original shards
+    5, // total number of recovery shards
+    [  // provided original shards with indexes
+        (1, &original[1]),
+    ],
+    [  // provided recovery shards with indexes
+        (1, &recovery[1]),
+        (4, &recovery[4]),
+    ],
+)?;
+
+assert_eq!(restored[&0], original[0]);
+assert_eq!(restored[&2], original[2]);
+# Ok::<(), reed_solomon_16::Error>(())
+```
+
+## Basic usage
+
+[`ReedSolomonEncoder`] and [`ReedSolomonDecoder`] give more control
+of the encoding/decoding process.
+
+Here's the above example using these instead:
+
+```rust
+use reed_solomon_16::{ReedSolomonDecoder, ReedSolomonEncoder};
+use std::collections::HashMap;
+
+let original = [
+    b"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do ",
+    b"eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut e",
+    b"nim ad minim veniam, quis nostrud exercitation ullamco laboris n",
+];
+
+let mut encoder = ReedSolomonEncoder::new(
+    3, // total number of original shards
+    5, // total number of recovery shards
+    64, // shard size in bytes
+)?;
+
+for original in original {
+    encoder.add_original_shard(original)?;
+}
+
+let result = encoder.encode()?;
+let recovery: Vec<_> = result.recovery_iter().collect();
+
+let mut decoder = ReedSolomonDecoder::new(
+    3, // total number of original shards
+    5, // total number of recovery shards
+    64, // shard size in bytes
+)?;
+
+decoder.add_original_shard(1, original[1])?;
+decoder.add_recovery_shard(1, recovery[1])?;
+decoder.add_recovery_shard(4, recovery[4])?;
+
+let result = decoder.decode()?;
+let restored: HashMap<_, _> = result.restored_original_iter().collect();
+
+assert_eq!(restored[&0], original[0]);
+assert_eq!(restored[&2], original[2]);
+# Ok::<(), reed_solomon_16::Error>(())
+```
+
+## Advanced usage
+
+See [`rate`] module for advanced encoding/decoding
+using chosen [`Engine`] and [`Rate`].
+
+## Benchmarks
+
+- These benchmarks are from `cargo bench main`
+  with 3.4 GHz i5-3570K (Ivy Bridge, 3rd gen.).
+- Shards are 1024 bytes.
+- MiB/s is total amount of data,
+  i.e. original shards + recovery shards.
+    - For decoder this includes missing shards.
+- Encode benchmark
+    - Includes [`add_original_shard`][RSE::add_original_shard] and
+      [`encode`][RSE::encode] of [`ReedSolomonEncoder`].
+- Decode benchmark
+    - Has two MiB/s values for 1% and 100% original shard loss, of maximum possible.
+    - Provides minimum required amount of shards to decoder.
+    - Includes [`add_original_shard`][RSD::add_original_shard],
+      [`add_recovery_shard`][RSD::add_recovery_shard] and
+      [`decode`][RSD::decode] of [`ReedSolomonDecoder`].
+
+| original : recovery | MiB/s (encode) | MiB/s (decode) |
+| ------------------- | -------------- | -------------- |
+| 100 : 100           | 229            | 73 ; 71        |
+| 100 : 1 000         | 229            | 66 ; 66        |
+| 1 000 : 100         | 222            | 65 ; 64        |
+| 1 000 : 1 000       | 171            | 77 ; 74        |
+| 1 000 : 10 000      | 149            | 53 ; 53        |
+| 10 000 : 1 000      | 154            | 55 ; 55        |
+| 10 000 : 10 000     | 103            | 39 ; 38        |
+| 16 385 : 16 385     |  89            | 31 ; 31        |
+| 32 768 : 32 768     | 107            | 50 ; 49        |
+
+## Benchmarks against other crates
+
+Use `cargo run --release --example quick-comparison`
+to run few simple benchmarks against [`reed-solomon-erasure`]
+and [`reed-solomon-novelpoly`] crates.
+
+This crate is fastest when shard count exceeds 256 shards,
+except for one-time initialization (< 10 ms)
+which can dominate at really small data amounts.
+
+[`reed-solomon-erasure`]: https://crates.io/crates/reed-solomon-erasure
+[`reed-solomon-novelpoly`]: https://crates.io/crates/reed-solomon-novelpoly
+
+## Running tests
+
+Some larger tests are marked `#[ignore]` and are not run with `cargo test`.
+Use `cargo test -- --ignored` to run those.
+
+## Safety
+
+This crate doesn't currently use any `unsafe` code.
+
+However planned SIMD-optimized engines will need to use `unsafe`,
+but the intention is that nothing else will use `unsafe`.
+
+## Credits
+
+This crate is based on [Leopard-RS] by Christopher A. Taylor.
+
+[Leopard-RS]: https://github.com/catid/leopard
+
+[`Naive`]: https://docs.rs/reed-solomon-16/0.1.0/reed_solomon_16/engine/struct.Naive.html
+[`NoSimd`]: https://docs.rs/reed-solomon-16/0.1.0/reed_solomon_16/engine/struct.NoSimd.html
+
+[`ReedSolomonEncoder`]: https://docs.rs/reed-solomon-16/0.1.0/reed_solomon_16/struct.ReedSolomonEncoder.html
+[RSE::add_original_shard]: https://docs.rs/reed-solomon-16/0.1.0/reed_solomon_16/struct.ReedSolomonEncoder.html#method.add_original_shard
+[RSE::encode]: https://docs.rs/reed-solomon-16/0.1.0/reed_solomon_16/struct.ReedSolomonEncoder.html#method.encode
+
+[`ReedSolomonDecoder`]: https://docs.rs/reed-solomon-16/0.1.0/reed_solomon_16/struct.ReedSolomonDecoder.html
+[RSD::add_original_shard]: https://docs.rs/reed-solomon-16/0.1.0/reed_solomon_16/struct.ReedSolomonDecoder.html#method.add_original_shard
+[RSD::add_recovery_shard]: https://docs.rs/reed-solomon-16/0.1.0/reed_solomon_16/struct.ReedSolomonDecoder.html#method.add_recovery_shard
+[RSD::decode]: https://docs.rs/reed-solomon-16/0.1.0/reed_solomon_16/struct.ReedSolomonDecoder.html#method.decode
+
+[`Engine`]: https://docs.rs/reed-solomon-16/0.1.0/reed_solomon_16/engine/trait.Engine.html
+[`Rate`]: https://docs.rs/reed-solomon-16/0.1.0/reed_solomon_16/rate/trait.Rate.html
+
+[`reed_solomon_16::encode`]: https://docs.rs/reed-solomon-16/0.1.0/reed_solomon_16/fn.encode.html
+[`reed_solomon_16::decode`]: https://docs.rs/reed-solomon-16/0.1.0/reed_solomon_16/fn.decode.html
diff --git a/algorithm.md b/algorithm.md
new file mode 100644
index 0000000..419eecc
--- /dev/null
+++ b/algorithm.md
@@ -0,0 +1,149 @@
+Algorithm documentation.
+
+As I don't understand algorithm fully myself,
+I'll just document some parts which I do understand.
+
+# Shard
+
+- Reed-Solomon `GF(2^16)` erasure coding works on 16-bit elements ([`GfElement`]).
+- A **shard** is a byte-array which is interpreted as an array of [`GfElement`]:s.
+
+A naive implementation could e.g. require shards to be a multiple of **2 bytes**
+and then interpret each byte-pair as low/high parts of a single [`GfElement`]:
+
+```text
+[ low_0, high_0, low_1, high_1, ...]
+```
+
+However that approach isn't good for SIMD optimizations.
+Instead shards are required to be a multiple of **64 bytes**.
+In each 64-byte block first 32 bytes are low parts of 32 [`GfElement`]:s
+and last 32 bytes are high parts of those 32 [`GfElement`]:s.
+
+```text
+[ low_0, low_1, ..., low_31, high_0, high_1, ..., high_31 ]
+```
+
+A shard then consists of one or more of these 64-byte blocks:
+
+```text
+// -------- first 64-byte block --------- | --------- second 64-byte block ---------- | ...
+[ low_0, ..., low_31, high_0, ..., high_31, low_32, ..., low_63, high_32, ..., high_63, ... ]
+```
+
+## Original shards and recovery shards
+
+- The data which is going to be protected by Reed-Solomon erasure coding
+  is split into equal-sized **original shards**.
+    - **`original_count`** is the number of original shards.
+- Additional **recovery shards** of same size are then created
+  which contain recovery data so that original data can be fully restored
+  from any set of **`original_count`** shards, original or recovery.
+    - **`recovery_count`** is the number of recovery shards.
+
+Algorithm supports any combination of
+1 - 32768 original shards with 1 - 32768 recovery shards.
+Up to 65535 original or recovery shards is also possible with following limitations:
+
+| `original_count` | `recovery_count` |
+| ---------------- | ---------------- |
+| `<= 2^16 - 2^n`  | `<= 2^n`         |
+| `<= 61440`       | `<= 4096`        |
+| `<= 57344`       | `<= 8192`        |
+| `<= 49152`       | `<= 16384`       |
+| **`<= 32768`**   | **`<= 32768`**   |
+| `<= 16384`       | `<= 49152`       |
+| `<= 8192`        | `<= 57344`       |
+| `<= 4096`        | `<= 61440`       |
+| `<= 2^n`         | `<= 2^16 - 2^n`  |
+
+# Rate
+
+Encoding and decoding both have two variations:
+
+- **High rate** refers to having more original shards than recovery shards.
+    - High rate must be used when there are over 32768 original shards.
+    - High rate encoding uses **chunks** of `recovery_count.next_power_of_two()` shards.
+- **Low rate** refers to having more recovery shards than original shards.
+    - Low rate must be used when there are over 32768 recovery shards.
+    - Low rate encoding uses **chunks** of `original_count.next_power_of_two()` shards.
+- Because of padding either rate can be used when there are
+  at most 32768 original shards and at most 32768 recovery shards.
+    - High rate and low rate are not [^1] compatible with each other,
+      i.e. decoding must use same rate that encoding used.
+    - With multiple chunks "correct" rate is generally faster in encoding
+      and not-slower in decoding.
+    - With single chunk "wrong" rate is generally faster in decoding
+      if `original_count` and `recovery_count` differ a lot.
+
+[^1]: They seem to be compatible with single chunk. However I don't quite
+    understand why and I don't recommend relying on this.
+
+## Benchmarks
+
+- These benchmarks are from `cargo bench rate`
+  and use similar setup than [main benchmarks],
+  except with maximum possible shard loss.
+
+| original : recovery | Chunks  | HighRateEncoder | LowRateEncoder | HighRateDecoder | LowRateDecoder |
+| ------------------- | ------- | --------------- | -------------- | --------------- | -------------- |
+| 1024 : 1024         | 1x 1024 | 175 MiB/s       | 176 MiB/s      | 76 MiB/s        | 75 MiB/s       |
+| 1024 : 1025 (Low)   | 2x 1024 | 140             | **153**        | 47              | **59**         |
+| 1025 : 1024 (High)  | 2x 1024 | **152**         | 132            | **60**          | 46             |
+| 1024 : 2048 (Low)   | 2x 1024 | 157             | **169**        | 70              | 70             |
+| 2048 : 1024 (High)  | 2x 1024 | **167**         | 151            | 69              | 68             |
+| 1025 : 1025         | 1x 2048 | 125             | 126            | 44              | 43             |
+| 1025 : 2048 (Low)   | 1x 2048 | 144             | 144            | **65** **!!!**  | 53             |
+| 2048 : 1025 (High)  | 1x 2048 | 144             | 145            | 53              | **62** **!!!** |
+| 2048 : 2048         | 1x 2048 | 156             | 157            | 70              | 69             |
+
+[main benchmarks]: crate#benchmarks
+
+# Encoding
+
+Encoding takes original shards as input and generates recovery shards.
+
+## High rate encoding
+
+- Encoding is done in **chunks** of `recovery_count.next_power_of_two()` shards.
+- Original shards are split into chunks and last chunk
+  is padded with zero-filled shards if needed.
+- Recovery shards fit into a single chunk
+  which is padded with unused shards if needed.
+- Recovery chunk is generated with following algorithm
+
+```text
+recovery_chunk = FFT(
+    IFFT(original_chunk_0, skew_0) xor
+    IFFT(original_chunk_1, skew_1) xor
+    ...
+)
+```
+
+This is implemented in [`HighRateEncoder`].
+
+## Low rate encoding
+
+- Encoding is done in **chunks** of `original_count.next_power_of_two()` shards.
+- Original shards fit into a single chunk
+  which is padded with zero-filled shards if needed.
+- Recovery shards are generated in chunks and last chunk
+  is padded with unused shards if needed.
+- Recovery chunks are generated with following algorithm
+
+```text
+recovery_chunk_0 = FFT( IFFT(original_chunk), skew_0 )
+recovery_chunk_1 = FFT( IFFT(original_chunk), skew_1 )
+...
+```
+
+This is implemented in [`LowRateEncoder`].
+
+# Decoding
+
+**TODO**
+
+
+[`GfElement`]: crate::engine::GfElement
+[`HighRateEncoder`]: crate::rate::HighRateEncoder
+[`LowRateEncoder`]: crate::rate::LowRateEncoder
diff --git a/benches/benchmarks.rs b/benches/benchmarks.rs
new file mode 100644
index 0000000..2475c3e
--- /dev/null
+++ b/benches/benchmarks.rs
@@ -0,0 +1,359 @@
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
+use rand::{Rng, SeedableRng};
+use rand_chacha::ChaCha8Rng;
+
+use reed_solomon_16::{
+    engine::{DefaultEngine, Engine, GfElement, Naive, NoSimd, ShardsRefMut, GF_ORDER},
+    rate::{
+        HighRateDecoder, HighRateEncoder, LowRateDecoder, LowRateEncoder, RateDecoder, RateEncoder,
+    },
+    ReedSolomonDecoder, ReedSolomonEncoder,
+};
+
+// ======================================================================
+// CONST
+
+const SHARD_BYTES: usize = 1024;
+
+// ======================================================================
+// UTIL
+
+fn generate_shards(shard_count: usize, shard_bytes: usize, seed: u8) -> Vec<Vec<u8>> {
+    let mut rng = ChaCha8Rng::from_seed([seed; 32]);
+    let mut shards = vec![vec![0u8; shard_bytes]; shard_count];
+    for shard in &mut shards {
+        rng.fill::<[u8]>(shard);
+    }
+    shards
+}
+
+// ======================================================================
+// BENCHMARKS - MAIN
+
+fn benchmarks_main(c: &mut Criterion) {
+    let mut group = c.benchmark_group("main");
+
+    for (original_count, recovery_count) in [
+        (100, 100),
+        (100, 1000),
+        (1000, 100),
+        (1000, 1000),
+        (1000, 10000),
+        (10000, 1000),
+        (10000, 10000),
+        (16385, 16385), // 2^n + 1
+        (32768, 32768), // 2^n
+    ] {
+        if original_count >= 1000 && recovery_count >= 1000 {
+            group.sample_size(10);
+        } else {
+            group.sample_size(100);
+        }
+
+        let original = generate_shards(original_count, SHARD_BYTES, 0);
+        let recovery = reed_solomon_16::encode(original_count, recovery_count, &original).unwrap();
+
+        group.throughput(Throughput::Bytes(
+            ((original_count + recovery_count) * SHARD_BYTES) as u64,
+        ));
+
+        // ReedSolomonEncoder
+
+        let mut encoder =
+            ReedSolomonEncoder::new(original_count, recovery_count, SHARD_BYTES).unwrap();
+
+        let id = format!("{}:{}", original_count, recovery_count);
+
+        group.bench_with_input(
+            BenchmarkId::new("ReedSolomonEncoder", &id),
+            &original,
+            |b, original| {
+                b.iter(|| {
+                    for original in original {
+                        encoder.add_original_shard(original).unwrap();
+                    }
+                    encoder.encode().unwrap();
+                });
+            },
+        );
+
+        // ReedSolomonDecoder
+
+        let max_original_loss_count = std::cmp::min(original_count, recovery_count);
+
+        for loss_percent in [1, 100] {
+            let original_loss_count = max_original_loss_count * loss_percent / 100;
+            let original_provided_count = original_count - original_loss_count;
+            let recovery_provided_count = original_loss_count;
+
+            let mut decoder =
+                ReedSolomonDecoder::new(original_count, recovery_count, SHARD_BYTES).unwrap();
+
+            let id = format!("{}:{} ({}%)", original_count, recovery_count, loss_percent);
+
+            group.bench_with_input(
+                BenchmarkId::new("ReedSolomonDecoder", &id),
+                &recovery,
+                |b, recovery| {
+                    b.iter(|| {
+                        for index in 0..original_provided_count {
+                            decoder.add_original_shard(index, &original[index]).unwrap();
+                        }
+                        for index in 0..recovery_provided_count {
+                            decoder.add_recovery_shard(index, &recovery[index]).unwrap();
+                        }
+                        decoder.decode().unwrap();
+                    });
+                },
+            );
+        }
+    }
+
+    group.finish();
+}
+
+// ======================================================================
+// BENCHMARKS - RATE
+
+fn benchmarks_rate(c: &mut Criterion) {
+    // benchmarks_rate_one(c, "rate-Naive", Naive::new());
+    benchmarks_rate_one(c, "rate", DefaultEngine::new());
+}
+
+fn benchmarks_rate_one<E: Engine>(c: &mut Criterion, name: &str, engine: E) {
+    let mut group = c.benchmark_group(name);
+    group.sample_size(10);
+
+    for (original_count, recovery_count) in [
+        (1024, 1024),
+        (1024, 1025),
+        (1025, 1024),
+        (1024, 2048),
+        (2048, 1024),
+        (1025, 1025),
+        (1025, 2048),
+        (2048, 1025),
+        (2048, 2048),
+    ] {
+        let original = generate_shards(original_count, SHARD_BYTES, 0);
+        let recovery = reed_solomon_16::encode(original_count, recovery_count, &original).unwrap();
+
+        group.throughput(Throughput::Bytes(
+            ((original_count + recovery_count) * SHARD_BYTES) as u64,
+        ));
+
+        // ENCODE
+
+        let id = format!("{}:{}", original_count, recovery_count);
+
+        // HighRateEncoder
+
+        let mut encoder = HighRateEncoder::new(
+            original_count,
+            recovery_count,
+            SHARD_BYTES,
+            engine.clone(),
+            None,
+        )
+        .unwrap();
+
+        group.bench_with_input(
+            BenchmarkId::new("HighRateEncoder", &id),
+            &original,
+            |b, original| {
+                b.iter(|| {
+                    for original in original {
+                        encoder.add_original_shard(original).unwrap();
+                    }
+                    encoder.encode().unwrap();
+                });
+            },
+        );
+
+        // LowRateEncoder
+
+        let mut encoder = LowRateEncoder::new(
+            original_count,
+            recovery_count,
+            SHARD_BYTES,
+            engine.clone(),
+            None,
+        )
+        .unwrap();
+
+        group.bench_with_input(
+            BenchmarkId::new("LowRateEncoder", &id),
+            &original,
+            |b, original| {
+                b.iter(|| {
+                    for original in original {
+                        encoder.add_original_shard(original).unwrap();
+                    }
+                    encoder.encode().unwrap();
+                });
+            },
+        );
+
+        // DECODE
+
+        let original_loss_count = std::cmp::min(original_count, recovery_count);
+        let original_provided_count = original_count - original_loss_count;
+        let recovery_provided_count = original_loss_count;
+
+        // HighRateDecoder
+
+        let mut decoder = HighRateDecoder::new(
+            original_count,
+            recovery_count,
+            SHARD_BYTES,
+            engine.clone(),
+            None,
+        )
+        .unwrap();
+
+        let id = format!("{}:{}", original_count, recovery_count);
+
+        group.bench_with_input(
+            BenchmarkId::new("HighRateDecoder", &id),
+            &recovery,
+            |b, recovery| {
+                b.iter(|| {
+                    for index in 0..original_provided_count {
+                        decoder.add_original_shard(index, &original[index]).unwrap();
+                    }
+                    for index in 0..recovery_provided_count {
+                        decoder.add_recovery_shard(index, &recovery[index]).unwrap();
+                    }
+                    decoder.decode().unwrap();
+                });
+            },
+        );
+
+        // LowRateDecoder
+
+        let mut decoder = LowRateDecoder::new(
+            original_count,
+            recovery_count,
+            SHARD_BYTES,
+            engine.clone(),
+            None,
+        )
+        .unwrap();
+
+        let id = format!("{}:{}", original_count, recovery_count);
+
+        group.bench_with_input(
+            BenchmarkId::new("LowRateDecoder", &id),
+            &recovery,
+            |b, recovery| {
+                b.iter(|| {
+                    for index in 0..original_provided_count {
+                        decoder.add_original_shard(index, &original[index]).unwrap();
+                    }
+                    for index in 0..recovery_provided_count {
+                        decoder.add_recovery_shard(index, &recovery[index]).unwrap();
+                    }
+                    decoder.decode().unwrap();
+                });
+            },
+        );
+    }
+
+    group.finish();
+}
+
+// ======================================================================
+// BENCHMARKS - ENGINES
+
+fn benchmarks_engine(c: &mut Criterion) {
+    benchmarks_engine_one(c, "engine-Naive", Naive::new());
+    benchmarks_engine_one(c, "engine-NoSimd", NoSimd::new());
+}
+
+fn benchmarks_engine_one<E: Engine>(c: &mut Criterion, name: &str, engine: E) {
+    let mut group = c.benchmark_group(name);
+
+    // XOR MUL
+
+    let mut x = &mut generate_shards(1, SHARD_BYTES, 0)[0];
+    let y = &generate_shards(1, SHARD_BYTES, 1)[0];
+
+    group.bench_function("xor", |b| {
+        b.iter(|| E::xor(black_box(&mut x), black_box(&y)))
+    });
+
+    group.bench_function("mul", |b| {
+        b.iter(|| engine.mul(black_box(&mut x), black_box(12345)))
+    });
+
+    // XOR_WITHIN
+
+    let shards_256_data = &mut generate_shards(1, 256 * SHARD_BYTES, 0)[0];
+    let mut shards_256 = ShardsRefMut::new(256, SHARD_BYTES, shards_256_data.as_mut());
+
+    group.bench_function("xor_within 128*2", |b| {
+        b.iter(|| {
+            E::xor_within(
+                black_box(&mut shards_256),
+                black_box(0),
+                black_box(128),
+                black_box(128),
+            )
+        })
+    });
+
+    // FORMAL DERIVATIVE
+
+    let shards_128_data = &mut generate_shards(1, 128 * SHARD_BYTES, 0)[0];
+    let mut shards_128 = ShardsRefMut::new(128, SHARD_BYTES, shards_128_data.as_mut());
+
+    group.bench_function("formal_derivative 128", |b| {
+        b.iter(|| E::formal_derivative(black_box(&mut shards_128)))
+    });
+
+    // FFT IFFT
+
+    group.bench_function("FFT 128", |b| {
+        b.iter(|| {
+            engine.fft(
+                black_box(&mut shards_128),
+                black_box(0),
+                black_box(128),
+                black_box(128),
+                black_box(128),
+            )
+        })
+    });
+
+    group.bench_function("IFFT 128", |b| {
+        b.iter(|| {
+            engine.ifft(
+                black_box(&mut shards_128),
+                black_box(0),
+                black_box(128),
+                black_box(128),
+                black_box(128),
+            )
+        })
+    });
+
+    // FWHT
+
+    let mut fwht_data = [0 as GfElement; GF_ORDER];
+    let mut rng = ChaCha8Rng::from_seed([0; 32]);
+    rng.fill::<[u16]>(&mut fwht_data);
+
+    group.bench_function("FWHT", |b| {
+        b.iter(|| E::fwht(black_box(&mut fwht_data), black_box(GF_ORDER)))
+    });
+
+    group.finish();
+}
+
+// ======================================================================
+// MAIN
+
+criterion_group!(benches_main, benchmarks_main);
+criterion_group!(benches_rate, benchmarks_rate);
+criterion_group!(benches_engine, benchmarks_engine);
+criterion_main!(benches_main, benches_rate, benches_engine);
diff --git a/build.rs b/build.rs
new file mode 100644
index 0000000..86cd518
--- /dev/null
+++ b/build.rs
@@ -0,0 +1,23 @@
+use std::{env, error::Error, fs, path::PathBuf};
+
+// ======================================================================
+// CONST
+
+const CRATE_NAME: &str = "reed_solomon_16";
+
+// ======================================================================
+// MAIN
+
+fn main() -> Result<(), Box<dyn Error>> {
+    println!("cargo:rerun-if-changed=README.md");
+    fs::write(
+        PathBuf::from(env::var("OUT_DIR")?).join("README-rustdocified.md"),
+        readme_rustdocifier::rustdocify(
+            &fs::read_to_string("README.md")?,
+            &env::var("CARGO_PKG_NAME")?,
+            Some(&env::var("CARGO_PKG_VERSION")?),
+            Some(CRATE_NAME),
+        )?,
+    )?;
+    Ok(())
+}
diff --git a/examples/quick-comparison.rs b/examples/quick-comparison.rs
new file mode 100644
index 0000000..db200f1
--- /dev/null
+++ b/examples/quick-comparison.rs
@@ -0,0 +1,230 @@
+use std::time::Instant;
+
+use rand::{Rng, SeedableRng};
+use rand_chacha::ChaCha8Rng;
+use reed_solomon_16::engine::DefaultEngine;
+use reed_solomon_erasure::galois_16::ReedSolomon as ReedSolomon16;
+use reed_solomon_erasure::galois_8::ReedSolomon as ReedSolomon8;
+use reed_solomon_novelpoly::{CodeParams, WrappedShard};
+
+// ======================================================================
+// CONST
+
+const SHARD_BYTES: usize = 1024;
+
+// ======================================================================
+// MAIN
+
+fn main() {
+    println!("                           µs (init)   µs (encode)   µs (decode)");
+    println!("                           ---------   -----------   -----------");
+
+    for count in [32, 64, 128, 256, 512, 1024, 4 * 1024, 32 * 1024] {
+        println!("\n{}:{} ({} kiB)", count, count, SHARD_BYTES / 1024);
+        test_reed_solomon_16(count);
+        test_reed_solomon_novelpoly(count);
+        if count <= 128 {
+            test_reed_solomon_erasure_8(count);
+        }
+        if count <= 512 {
+            test_reed_solomon_erasure_16(count);
+        }
+    }
+}
+
+// ======================================================================
+// reed-solomon-16
+
+fn test_reed_solomon_16(count: usize) {
+    // INIT
+
+    let start = Instant::now();
+    // This initializes all the needed tables.
+    DefaultEngine::new();
+    let elapsed = start.elapsed();
+    print!("> reed-solomon-16          {:9}", elapsed.as_micros());
+
+    // CREATE ORIGINAL
+
+    let mut original = vec![vec![0u8; SHARD_BYTES]; count];
+    let mut rng = ChaCha8Rng::from_seed([0; 32]);
+    for original in &mut original {
+        rng.fill::<[u8]>(original);
+    }
+
+    // ENCODE
+
+    let start = Instant::now();
+    let recovery = reed_solomon_16::encode(count, count, &original).unwrap();
+    let elapsed = start.elapsed();
+    print!("{:14}", elapsed.as_micros());
+
+    // PREPARE DECODE
+
+    let decoder_recovery: Vec<_> = recovery.iter().enumerate().collect();
+
+    // DECODE
+
+    let start = Instant::now();
+    let restored = reed_solomon_16::decode(count, count, [(0, ""); 0], decoder_recovery).unwrap();
+    let elapsed = start.elapsed();
+    println!("{:14}", elapsed.as_micros());
+
+    // CHECK
+
+    for i in 0..count {
+        assert_eq!(restored[&i], original[i]);
+    }
+}
+
+// ======================================================================
+// reed-solomon-erasure
+
+fn test_reed_solomon_erasure_8(count: usize) {
+    // INIT
+
+    let start = Instant::now();
+    let r = ReedSolomon8::new(count, count).unwrap();
+    let elapsed = start.elapsed();
+    print!("> reed-solomon-erasure/8   {:9}", elapsed.as_micros());
+
+    // CREATE ORIGINAL
+
+    let mut original = vec![vec![0u8; SHARD_BYTES]; count];
+    let mut rng = ChaCha8Rng::from_seed([0; 32]);
+    for shard in &mut original {
+        rng.fill::<[u8]>(shard);
+    }
+
+    // ENCODE
+
+    let mut recovery = vec![vec![0; SHARD_BYTES]; count];
+
+    let start = Instant::now();
+    r.encode_sep(&original, &mut recovery).unwrap();
+    let elapsed = start.elapsed();
+    print!("{:14}", elapsed.as_micros());
+
+    // PREPARE DECODE
+
+    let mut decoder_shards = Vec::with_capacity(2 * count);
+    for _ in 0..count {
+        decoder_shards.push(None);
+    }
+    for i in 0..count {
+        decoder_shards.push(Some(recovery[i].clone()));
+    }
+
+    // DECODE
+
+    let start = Instant::now();
+    r.reconstruct(&mut decoder_shards).unwrap();
+    let elapsed = start.elapsed();
+    println!("{:14}", elapsed.as_micros());
+
+    // CHECK
+
+    for i in 0..count {
+        assert_eq!(decoder_shards[i].as_ref(), Some(&original[i]));
+    }
+}
+
+fn test_reed_solomon_erasure_16(count: usize) {
+    // INIT
+
+    let start = Instant::now();
+    let r = ReedSolomon16::new(count, count).unwrap();
+    let elapsed = start.elapsed();
+    print!("> reed-solomon-erasure/16  {:9}", elapsed.as_micros());
+
+    // CREATE ORIGINAL
+
+    let mut original = vec![vec![[0u8; 2]; SHARD_BYTES / 2]; count];
+    let mut rng = ChaCha8Rng::from_seed([0; 32]);
+    for shard in &mut original {
+        for element in shard.iter_mut() {
+            element[0] = rng.gen();
+            element[1] = rng.gen();
+        }
+    }
+
+    // ENCODE
+
+    let mut recovery = vec![vec![[0; 2]; SHARD_BYTES / 2]; count];
+
+    let start = Instant::now();
+    r.encode_sep(&original, &mut recovery).unwrap();
+    let elapsed = start.elapsed();
+    print!("{:14}", elapsed.as_micros());
+
+    // PREPARE DECODE
+
+    let mut decoder_shards = Vec::with_capacity(2 * count);
+    for _ in 0..count {
+        decoder_shards.push(None);
+    }
+    for i in 0..count {
+        decoder_shards.push(Some(recovery[i].clone()));
+    }
+
+    // DECODE
+
+    let start = Instant::now();
+    r.reconstruct(&mut decoder_shards).unwrap();
+    let elapsed = start.elapsed();
+    println!("{:14}", elapsed.as_micros());
+
+    // CHECK
+
+    for i in 0..count {
+        assert_eq!(decoder_shards[i].as_ref(), Some(&original[i]));
+    }
+}
+
+// ======================================================================
+// reed-solomon-novelpoly
+
+fn test_reed_solomon_novelpoly(count: usize) {
+    // INIT
+
+    let start = Instant::now();
+    let r = CodeParams::derive_parameters(2 * count, count)
+        .unwrap()
+        .make_encoder();
+    let elapsed = start.elapsed();
+    print!("> reed-solomon-novelpoly   {:9}", elapsed.as_micros());
+
+    // CREATE ORIGINAL
+
+    let mut original = vec![0u8; count * SHARD_BYTES];
+    let mut rng = ChaCha8Rng::from_seed([0; 32]);
+    rng.fill::<[u8]>(&mut original);
+
+    // ENCODE
+
+    let start = Instant::now();
+    let encoded = r.encode::<WrappedShard>(&original).unwrap();
+    let elapsed = start.elapsed();
+    print!("{:14}", elapsed.as_micros());
+
+    // PREPARE DECODE
+
+    let mut decoder_shards = Vec::with_capacity(2 * count);
+    for _ in 0..count {
+        decoder_shards.push(None);
+    }
+    for i in 0..count {
+        decoder_shards.push(Some(encoded[count + i].clone()));
+    }
+
+    // DECODE
+
+    let start = Instant::now();
+    let reconstructed = r.reconstruct(decoder_shards).unwrap();
+    let elapsed = start.elapsed();
+    println!("{:14}", elapsed.as_micros());
+
+    // CHECK
+
+    assert_eq!(reconstructed, original);
+}
diff --git a/examples/test-random-roundtrips.rs b/examples/test-random-roundtrips.rs
new file mode 100644
index 0000000..0b53385
--- /dev/null
+++ b/examples/test-random-roundtrips.rs
@@ -0,0 +1,256 @@
+use fixedbitset::FixedBitSet;
+use rand::Rng;
+
+use reed_solomon_16::{
+    engine::{Engine, Naive, NoSimd, GF_ORDER},
+    rate::{
+        DecoderWork, DefaultRate, EncoderWork, HighRate, LowRate, Rate, RateDecoder, RateEncoder,
+    },
+    Error,
+};
+
+// ======================================================================
+// CONST
+
+// Large shard sizes shouldn't need to be tested that much
+// as algorithms handle data in 64-byte blocks.
+const MIN_SHARD_BYTES_LOG: f64 = 6.0; // 2^6 = 64
+const MAX_SHARD_BYTES_LOG: f64 = 6.0;
+// const MAX_SHARD_BYTES_LOG: f64 = 8.0; // 2^8 = 256
+
+const MIN_ORIGINAL_COUNT_LOG: f64 = 0.0; // 2^0 = 1
+const MAX_ORIGINAL_COUNT_LOG: f64 = 16.0; // 2^16 = 65536
+
+const MIN_RECOVERY_COUNT_LOG: f64 = 0.0; // 2^0 = 1
+const MAX_RECOVERY_COUNT_LOG: f64 = 16.0; // 2^16 = 65536
+
+// ======================================================================
+// MACROS
+
+macro_rules! roundtrip {
+    (
+        $Rate: ident,
+        $original: expr,
+        $original_count: expr,
+        $recovery_count: expr,
+        $shard_bytes: expr,
+        $loss_indexes: expr,
+        $encoder_work: expr,
+        $decoder_work: expr $(,)?
+    ) => {
+        let recovery_naive = roundtrip::<_, $Rate<_>>(
+            $original,
+            $original_count,
+            $recovery_count,
+            $shard_bytes,
+            $loss_indexes,
+            $encoder_work,
+            $decoder_work,
+            Naive::new(),
+        )
+        .unwrap();
+
+        let recovery_nosimd = roundtrip::<_, $Rate<_>>(
+            $original,
+            $original_count,
+            $recovery_count,
+            $shard_bytes,
+            $loss_indexes,
+            $encoder_work,
+            $decoder_work,
+            NoSimd::new(),
+        )
+        .unwrap();
+
+        assert_eq!(recovery_naive, recovery_nosimd);
+    };
+}
+
+// ======================================================================
+// MAIN
+
+fn main() {
+    let mut encoder_work = Some(EncoderWork::new());
+    let mut decoder_work = Some(DecoderWork::new());
+
+    let mut rng = rand::thread_rng();
+
+    let max_shard_bytes = MAX_SHARD_BYTES_LOG.exp2() as usize;
+    let max_original_count = MAX_ORIGINAL_COUNT_LOG.exp2() as usize;
+    let mut original = vec![vec![0u8; max_shard_bytes]; max_original_count];
+    for original in &mut original {
+        rng.fill::<[u8]>(original);
+    }
+
+    let mut test_number = 1;
+
+    loop {
+        // Actual data shouldn't matter at all,
+        // but just in case keep changing data occasionally.
+        if test_number % 100 == 0 {
+            for original in &mut original {
+                rng.fill::<[u8]>(original);
+            }
+        }
+
+        let shard_bytes_log: f64 = rng.gen_range(MIN_SHARD_BYTES_LOG..=MAX_SHARD_BYTES_LOG);
+        let shard_bytes: usize = ((shard_bytes_log.exp2() / 64.0) as usize) * 64;
+
+        let mut original_count;
+        let mut recovery_count;
+        loop {
+            let original_count_log: f64 =
+                rng.gen_range(MIN_ORIGINAL_COUNT_LOG..=MAX_ORIGINAL_COUNT_LOG);
+            let recovery_count_log: f64 =
+                rng.gen_range(MIN_RECOVERY_COUNT_LOG..=MAX_RECOVERY_COUNT_LOG);
+
+            original_count = original_count_log.exp2() as usize;
+            recovery_count = recovery_count_log.exp2() as usize;
+
+            if std::cmp::min(original_count, recovery_count).next_power_of_two()
+                + std::cmp::max(original_count, recovery_count)
+                <= GF_ORDER
+            {
+                break;
+            }
+        }
+
+        // 50% chance of max loss
+        let loss_count = if rng.gen::<bool>() {
+            recovery_count
+        } else {
+            rng.gen_range(1..=recovery_count)
+        };
+
+        let loss_indexes: FixedBitSet =
+            rand::seq::index::sample(&mut rng, original_count + recovery_count, loss_count)
+                .iter()
+                .collect();
+
+        eprintln!();
+        eprintln!("{}", test_number);
+        eprintln!("original_count: {}", original_count);
+        eprintln!("recovery_count: {}", recovery_count);
+        eprintln!("loss_count    : {}", loss_count);
+        eprintln!("shard_bytes   : {}", shard_bytes);
+
+        roundtrip!(
+            DefaultRate,
+            &original,
+            original_count,
+            recovery_count,
+            shard_bytes,
+            &loss_indexes,
+            &mut encoder_work,
+            &mut decoder_work,
+        );
+
+        if HighRate::<NoSimd>::supports(original_count, recovery_count) {
+            println!("- High");
+            roundtrip!(
+                HighRate,
+                &original,
+                original_count,
+                recovery_count,
+                shard_bytes,
+                &loss_indexes,
+                &mut encoder_work,
+                &mut decoder_work,
+            );
+        }
+
+        if LowRate::<NoSimd>::supports(original_count, recovery_count) {
+            println!("- Low");
+            roundtrip!(
+                LowRate,
+                &original,
+                original_count,
+                recovery_count,
+                shard_bytes,
+                &loss_indexes,
+                &mut encoder_work,
+                &mut decoder_work,
+            );
+        }
+
+        test_number += 1;
+    }
+}
+
+// ======================================================================
+// FUNCTIONS
+
+fn roundtrip<E, R>(
+    original: &[Vec<u8>],
+    original_count: usize,
+    recovery_count: usize,
+    shard_bytes: usize,
+    loss_indexes: &FixedBitSet,
+    encoder_work: &mut Option<EncoderWork>,
+    decoder_work: &mut Option<DecoderWork>,
+    engine: E,
+) -> Result<Vec<Vec<u8>>, Error>
+where
+    E: Engine,
+    R: Rate<E>,
+{
+    // ENCODE
+
+    let mut encoder = R::encoder(
+        original_count,
+        recovery_count,
+        shard_bytes,
+        engine.clone(),
+        encoder_work.take(),
+    )?;
+
+    for original in &original[..original_count] {
+        encoder.add_original_shard(&original[..shard_bytes])?;
+    }
+
+    let result = encoder.encode()?;
+    let recovery: Vec<_> = result.recovery_iter().map(|s| s.to_vec()).collect();
+    drop(result);
+
+    // DECODE
+
+    let mut decoder = R::decoder(
+        original_count,
+        recovery_count,
+        shard_bytes,
+        engine,
+        decoder_work.take(),
+    )?;
+
+    for n in 0..original_count {
+        if !loss_indexes[n] {
+            decoder.add_original_shard(n, &original[n][..shard_bytes])?;
+        }
+    }
+
+    for n in 0..recovery_count {
+        if !loss_indexes[original_count + n] {
+            decoder.add_recovery_shard(n, &recovery[n])?;
+        }
+    }
+
+    // CHECK
+
+    let result = decoder.decode()?;
+    for n in 0..original_count {
+        if loss_indexes[n] {
+            assert_eq!(
+                result.restored_original(n).unwrap(),
+                &original[n][..shard_bytes]
+            );
+        }
+    }
+    drop(result);
+
+    // DONE
+
+    *encoder_work = Some(encoder.into_parts().1);
+    *decoder_work = Some(decoder.into_parts().1);
+
+    Ok(recovery)
+}
diff --git a/src/decoder_result.rs b/src/decoder_result.rs
new file mode 100644
index 0000000..0af46a6
--- /dev/null
+++ b/src/decoder_result.rs
@@ -0,0 +1,140 @@
+use crate::rate::DecoderWork;
+
+// ======================================================================
+// DecoderResult - PUBLIC
+
+/// Result of decoding. Contains the restored original shards.
+///
+/// This struct is created by [`ReedSolomonDecoder::decode`]
+/// and [`RateDecoder::decode`].
+///
+/// [`RateDecoder::decode`]: crate::rate::RateDecoder::decode
+/// [`ReedSolomonDecoder::decode`]: crate::ReedSolomonDecoder::decode
+pub struct DecoderResult<'a> {
+    work: &'a mut DecoderWork,
+}
+
+impl<'a> DecoderResult<'a> {
+    /// Returns restored original shard with given `index`
+    /// or `None` if given `index` doesn't correspond to
+    /// a missing original shard.
+    pub fn restored_original(&self, index: usize) -> Option<&[u8]> {
+        self.work.restored_original(index)
+    }
+
+    /// Returns iterator over all restored original shards
+    /// and their indexes, ordered by indexes.
+    pub fn restored_original_iter(&self) -> RestoredOriginal {
+        RestoredOriginal::new(self.work)
+    }
+}
+
+// ======================================================================
+// DecoderResult - CRATE
+
+impl<'a> DecoderResult<'a> {
+    pub(crate) fn new(work: &'a mut DecoderWork) -> Self {
+        Self { work }
+    }
+}
+
+// ======================================================================
+// DecoderResult - IMPL DROP
+
+impl<'a> Drop for DecoderResult<'a> {
+    fn drop(&mut self) {
+        self.work.reset_received();
+    }
+}
+
+// ======================================================================
+// RestoredOriginal - PUBLIC
+
+/// Iterator over restored original shards and their indexes.
+///
+/// This struct is created by [`DecoderResult::restored_original_iter`].
+pub struct RestoredOriginal<'a> {
+    ended: bool,
+    next_index: usize,
+    work: &'a DecoderWork,
+}
+
+// ======================================================================
+// RestoredOriginal - IMPL Iterator
+
+impl<'a> Iterator for RestoredOriginal<'a> {
+    type Item = (usize, &'a [u8]);
+    fn next(&mut self) -> Option<(usize, &'a [u8])> {
+        if self.ended {
+            None
+        } else {
+            let mut index = self.next_index;
+            while index < self.work.original_count() {
+                if let Some(original) = self.work.restored_original(index) {
+                    self.next_index = index + 1;
+                    return Some((index, original));
+                }
+                index += 1
+            }
+            self.ended = true;
+            None
+        }
+    }
+}
+
+// ======================================================================
+// RestoredOriginal - CRATE
+
+impl<'a> RestoredOriginal<'a> {
+    pub(crate) fn new(work: &'a DecoderWork) -> Self {
+        Self {
+            ended: false,
+            next_index: 0,
+            work,
+        }
+    }
+}
+
+// ======================================================================
+// TESTS
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::{test_util, ReedSolomonDecoder, ReedSolomonEncoder};
+
+    #[test]
+    // DecoderResult::restored_original
+    // DecoderResult::restored_original_iter
+    // RestoredOriginal
+    fn decoder_result() {
+        let original = test_util::generate_original(3, 1024, 0);
+
+        let mut encoder = ReedSolomonEncoder::new(3, 2, 1024).unwrap();
+        let mut decoder = ReedSolomonDecoder::new(3, 2, 1024).unwrap();
+
+        for original in &original {
+            encoder.add_original_shard(original).unwrap();
+        }
+
+        let result = encoder.encode().unwrap();
+        let recovery: Vec<_> = result.recovery_iter().collect();
+
+        decoder.add_original_shard(1, &original[1]).unwrap();
+        decoder.add_recovery_shard(0, recovery[0]).unwrap();
+        decoder.add_recovery_shard(1, recovery[1]).unwrap();
+
+        let result: DecoderResult = decoder.decode().unwrap();
+
+        assert_eq!(result.restored_original(0).unwrap(), original[0]);
+        assert!(result.restored_original(1).is_none());
+        assert_eq!(result.restored_original(2).unwrap(), original[2]);
+        assert!(result.restored_original(3).is_none());
+
+        let mut iter: RestoredOriginal = result.restored_original_iter();
+        assert_eq!(iter.next(), Some((0, original[0].as_slice())));
+        assert_eq!(iter.next(), Some((2, original[2].as_slice())));
+        assert_eq!(iter.next(), None);
+        assert_eq!(iter.next(), None);
+    }
+}
diff --git a/src/encoder_result.rs b/src/encoder_result.rs
new file mode 100644
index 0000000..8a6ebdf
--- /dev/null
+++ b/src/encoder_result.rs
@@ -0,0 +1,134 @@
+use crate::rate::EncoderWork;
+
+// ======================================================================
+// EncoderResult - PUBLIC
+
+/// Result of encoding. Contains the generated recovery shards.
+///
+/// This struct is created by [`ReedSolomonEncoder::encode`]
+/// and [`RateEncoder::encode`].
+///
+/// [`RateEncoder::encode`]: crate::rate::RateEncoder::encode
+/// [`ReedSolomonEncoder::encode`]: crate::ReedSolomonEncoder::encode
+pub struct EncoderResult<'a> {
+    work: &'a mut EncoderWork,
+}
+
+impl<'a> EncoderResult<'a> {
+    /// Returns recovery shard with given `index`
+    /// or `None` if `index >= recovery_count`.
+    ///
+    /// Recovery shards have indexes `0..recovery_count`
+    /// and these same indexes must be used when decoding.
+    pub fn recovery(&self, index: usize) -> Option<&[u8]> {
+        self.work.recovery(index)
+    }
+
+    /// Returns iterator over all recovery shards ordered by their indexes.
+    ///
+    /// Recovery shards have indexes `0..recovery_count`
+    /// and these same indexes must be used when decoding.
+    pub fn recovery_iter(&self) -> Recovery {
+        Recovery::new(self.work)
+    }
+}
+
+// ======================================================================
+// EncoderResult - CRATE
+
+impl<'a> EncoderResult<'a> {
+    pub(crate) fn new(work: &'a mut EncoderWork) -> Self {
+        Self { work }
+    }
+}
+
+// ======================================================================
+// EncoderResult - IMPL DROP
+
+impl<'a> Drop for EncoderResult<'a> {
+    fn drop(&mut self) {
+        self.work.reset_received();
+    }
+}
+
+// ======================================================================
+// Recovery - PUBLIC
+
+/// Iterator over generated recovery shards.
+///
+/// This struct is created by [`EncoderResult::recovery_iter`].
+pub struct Recovery<'a> {
+    ended: bool,
+    next_index: usize,
+    work: &'a EncoderWork,
+}
+
+// ======================================================================
+// Recovery - IMPL Iterator
+
+impl<'a> Iterator for Recovery<'a> {
+    type Item = &'a [u8];
+    fn next(&mut self) -> Option<&'a [u8]> {
+        if self.ended {
+            None
+        } else if let Some(next) = self.work.recovery(self.next_index) {
+            self.next_index += 1;
+            Some(next)
+        } else {
+            self.ended = true;
+            None
+        }
+    }
+}
+
+// ======================================================================
+// Recovery - CRATE
+
+impl<'a> Recovery<'a> {
+    pub(crate) fn new(work: &'a EncoderWork) -> Self {
+        Self {
+            ended: false,
+            next_index: 0,
+            work,
+        }
+    }
+}
+
+// ======================================================================
+// TESTS
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::{test_util, ReedSolomonEncoder};
+
+    #[test]
+    // EncoderResult::recovery
+    // EncoderResult::recovery_iter
+    // Recovery
+    fn encoder_result() {
+        let original = test_util::generate_original(2, 1024, 123);
+        let mut encoder = ReedSolomonEncoder::new(2, 3, 1024).unwrap();
+
+        for original in &original {
+            encoder.add_original_shard(original).unwrap();
+        }
+
+        let result: EncoderResult = encoder.encode().unwrap();
+
+        let mut all = Vec::new();
+        all.push(result.recovery(0).unwrap());
+        all.push(result.recovery(1).unwrap());
+        all.push(result.recovery(2).unwrap());
+        assert!(result.recovery(3).is_none());
+        test_util::assert_hash(all, test_util::LOW_2_3);
+
+        let mut iter: Recovery = result.recovery_iter();
+        let mut all = Vec::new();
+        all.push(iter.next().unwrap());
+        all.push(iter.next().unwrap());
+        all.push(iter.next().unwrap());
+        assert!(iter.next().is_none());
+        test_util::assert_hash(all, test_util::LOW_2_3);
+    }
+}
diff --git a/src/engine.rs b/src/engine.rs
new file mode 100644
index 0000000..c81486d
--- /dev/null
+++ b/src/engine.rs
@@ -0,0 +1,283 @@
+//! Low-level building blocks for Reed-Solomon encoding/decoding.
+//!
+//! **This is an advanced module which is not needed for [simple usage] or [basic usage].**
+//!
+//! This module is relevant if you want to
+//! - use [`rate`] module and need an [`Engine`] to use with it.
+//! - create your own [`Engine`].
+//! - understand/benchmark/test at low level.
+//!
+//! # Engines
+//!
+//! An [`Engine`] is an implementation of basic low-level algorithms
+//! needed for Reed-Solomon encoding/decoding.
+//!
+//! - [`Naive`]
+//!     - Simple reference implementation.
+//! - [`NoSimd`]
+//!     - Basic optimized engine without SIMD so that it works on all CPUs.
+//! - [`DefaultEngine`]
+//!     - Default engine which is used when no specific engine is given.
+//!     - Currently just alias to [`NoSimd`].
+//!
+//! # Benchmarks
+//!
+//! - These benchmarks are from `cargo bench engine`
+//!   with 3.4 GHz i5-3570K (Ivy Bridge, 3rd gen.).
+//! - Shards are 1024 bytes.
+//!
+//! | Benchmark         | Shards  | ns [`Naive`] | ns [`NoSimd`] |
+//! | ----------------- | ------- | ------------ | ------------- |
+//! | xor               | 1 * 2   | 60           | 32            |
+//! | mul               | 1       | 1 260        | 860           |
+//! | xor_within        | 128 * 2 | 5 870        | 5 780         |
+//! | formal_derivative | 128     | 21 300       | 15 800        |
+//! | FFT               | 128     | 764 000      | 545 000       |
+//! | IFFT              | 128     | 780 000      | 546 000       |
+//! | FWHT              | -       | 898 000      | 622 000       |
+//!
+//! [simple usage]: crate#simple-usage
+//! [basic usage]: crate#basic-usage
+//! [`ReedSolomonEncoder`]: crate::ReedSolomonEncoder
+//! [`ReedSolomonDecoder`]: crate::ReedSolomonDecoder
+//! [`rate`]: crate::rate
+
+pub(crate) use self::shards::Shards;
+
+pub use self::{engine_naive::Naive, engine_nosimd::NoSimd, shards::ShardsRefMut};
+
+mod engine_naive;
+mod engine_nosimd;
+mod shards;
+
+pub mod tables;
+
+// ======================================================================
+// CONST - PUBLIC
+
+/// Size of Galois field element [`GfElement`] in bits.
+pub const GF_BITS: usize = 16;
+
+/// Galois field order, i.e. number of elements.
+pub const GF_ORDER: usize = 65536;
+
+/// `GF_ORDER - 1`
+pub const GF_MODULUS: GfElement = 65535;
+
+/// Galois field polynomial.
+pub const GF_POLYNOMIAL: usize = 0x1002D;
+
+/// TODO
+pub const CANTOR_BASIS: [GfElement; GF_BITS] = [
+    0x0001, 0xACCA, 0x3C0E, 0x163E, 0xC582, 0xED2E, 0x914C, 0x4012, 0x6C98, 0x10D8, 0x6A72, 0xB900,
+    0xFDB8, 0xFB34, 0xFF38, 0x991E,
+];
+
+// ======================================================================
+// TYPE ALIASES - PUBLIC
+
+/// Galois field element.
+pub type GfElement = u16;
+
+/// Default [`Engine`], currently just alias to [`NoSimd`].
+pub type DefaultEngine = NoSimd;
+
+// ======================================================================
+// FUNCTIONS - PUBLIC - Galois field operations
+
+/// Some kind of addition.
+#[inline(always)]
+pub fn add_mod(x: GfElement, y: GfElement) -> GfElement {
+    let sum: usize = (x as usize) + (y as usize);
+    (sum + (sum >> GF_BITS)) as GfElement
+}
+
+/// Some kind of subtraction.
+#[inline(always)]
+pub fn sub_mod(x: GfElement, y: GfElement) -> GfElement {
+    let dif: usize = (x as usize).wrapping_sub(y as usize);
+    dif.wrapping_add(dif >> GF_BITS) as GfElement
+}
+
+// ======================================================================
+// FUNCTIONS - PUBLIC - misc
+
+/// Returns smallest value that is greater than or equal to `a` and multiple of `b`,
+/// or `None` if `b` is zero or operation would overflow.
+///
+/// - This function is available as [`usize::checked_next_multiple_of`] in nightly Rust.
+///
+/// # Examples
+///
+/// ```rust
+/// use reed_solomon_16::engine;
+///
+/// assert_eq!(engine::checked_next_multiple_of(20, 10), Some(20));
+/// assert_eq!(engine::checked_next_multiple_of(27, 10), Some(30));
+/// ```
+///
+/// [`usize::checked_next_multiple_of`]: https://doc.rust-lang.org/std/primitive.usize.html#method.checked_next_multiple_of
+pub fn checked_next_multiple_of(a: usize, b: usize) -> Option<usize> {
+    if b == 0 {
+        None
+    } else {
+        let mut x = a / b;
+        x += if a % b != 0 { 1 } else { 0 };
+        x.checked_mul(b)
+    }
+}
+
+// ======================================================================
+// Engine - PUBLIC
+
+/// Implementation of basic low-level algorithms needed
+/// for Reed-Solomon encoding/decoding.
+///
+/// These algorithms are not properly documented.
+///
+/// [`Naive`] engine is provided for those who want to
+/// study the source code to understand [`Engine`].
+pub trait Engine: Clone
+where
+    Self: Sized,
+{
+    // ============================================================
+    // REQUIRED
+
+    /// In-place decimation-in-time FFT (fast Fourier transform).
+    ///
+    /// - FFT is done on chunk `data[pos .. pos + size]`
+    /// - `size` must be `2^n`
+    /// - Before function call `data[pos .. pos + size]` must be valid.
+    /// - After function call
+    ///     - `data[pos .. pos + truncated_size]`
+    ///       contains valid FFT result.
+    ///     - `data[pos + truncated_size .. pos + size]`
+    ///       contains valid FFT result if this contained
+    ///       only `0u8`:s and garbage otherwise.
+    fn fft(
+        &self,
+        data: &mut ShardsRefMut,
+        pos: usize,
+        size: usize,
+        truncated_size: usize,
+        skew_delta: usize,
+    );
+
+    /// In-place FWHT (fast Walsh-Hadamard transform).
+    ///
+    /// - This is used only in [`Engine::eval_poly`],
+    ///   both directly and indirectly via [`initialize_log_walsh`].
+    /// - `truncated_size` must be handled so that
+    ///   [`Engine::eval_poly`] returns correct result.
+    ///
+    /// [`initialize_log_walsh`]: self::tables::initialize_log_walsh
+    fn fwht(data: &mut [GfElement; GF_ORDER], truncated_size: usize);
+
+    /// In-place decimation-in-time IFFT (inverse fast Fourier transform).
+    ///
+    /// - IFFT is done on chunk `data[pos .. pos + size]`
+    /// - `size` must be `2^n`
+    /// - Before function call `data[pos .. pos + size]` must be valid.
+    /// - After function call
+    ///     - `data[pos .. pos + truncated_size]`
+    ///       contains valid IFFT result.
+    ///     - `data[pos + truncated_size .. pos + size]`
+    ///       contains valid IFFT result if this contained
+    ///       only `0u8`:s and garbage otherwise.
+    fn ifft(
+        &self,
+        data: &mut ShardsRefMut,
+        pos: usize,
+        size: usize,
+        truncated_size: usize,
+        skew_delta: usize,
+    );
+
+    /// `x[] *= log_m`
+    fn mul(&self, x: &mut [u8], log_m: GfElement);
+
+    /// `x[] ^= y[]`
+    fn xor(x: &mut [u8], y: &[u8]);
+
+    // ============================================================
+    // PROVIDED
+
+    /// Evaluate polynomial.
+    fn eval_poly(erasures: &mut [GfElement; GF_ORDER], truncated_size: usize) {
+        let log_walsh = tables::initialize_log_walsh::<Self>();
+
+        Self::fwht(erasures, truncated_size);
+
+        for i in 0..GF_ORDER {
+            erasures[i] = (((erasures[i] as usize) * (log_walsh[i] as usize))
+                % (GF_MODULUS as usize)) as GfElement;
+        }
+
+        Self::fwht(erasures, GF_ORDER);
+    }
+
+    /// FFT with `skew_delta = pos + size`.
+    #[inline(always)]
+    fn fft_skew_end(
+        &self,
+        data: &mut ShardsRefMut,
+        pos: usize,
+        size: usize,
+        truncated_size: usize,
+    ) {
+        self.fft(data, pos, size, truncated_size, pos + size)
+    }
+
+    /// Formal derivative.
+    fn formal_derivative(data: &mut ShardsRefMut) {
+        for i in 1..data.len() {
+            let width: usize = ((i ^ (i - 1)) + 1) >> 1;
+            Self::xor_within(data, i - width, i, width);
+        }
+    }
+
+    /// IFFT with `skew_delta = pos + size`.
+    #[inline(always)]
+    fn ifft_skew_end(
+        &self,
+        data: &mut ShardsRefMut,
+        pos: usize,
+        size: usize,
+        truncated_size: usize,
+    ) {
+        self.ifft(data, pos, size, truncated_size, pos + size)
+    }
+
+    /// `data[x .. x + count] ^= data[y .. y + count]`
+    ///
+    /// Ranges must not overlap.
+    #[inline(always)]
+    fn xor_within(data: &mut ShardsRefMut, x: usize, y: usize, count: usize) {
+        let (xs, ys) = data.flat2_mut(x, y, count);
+        Self::xor(xs, ys);
+    }
+}
+
+// ======================================================================
+// TESTS
+
+// Engines are tested indirectly via roundtrip tests of HighRate and LowRate.
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // ============================================================
+    // checked_next_multiple_of
+
+    #[test]
+    fn test_checked_next_multiple_of() {
+        assert_eq!(checked_next_multiple_of(10, 0), None);
+        assert_eq!(checked_next_multiple_of(usize::MAX, 2), None);
+
+        assert_eq!(checked_next_multiple_of(99, 20), Some(100));
+        assert_eq!(checked_next_multiple_of(100, 20), Some(100));
+        assert_eq!(checked_next_multiple_of(101, 20), Some(120));
+    }
+}
diff --git a/src/engine/engine_naive.rs b/src/engine/engine_naive.rs
new file mode 100644
index 0000000..46f0286
--- /dev/null
+++ b/src/engine/engine_naive.rs
@@ -0,0 +1,188 @@
+use crate::engine::{
+    self,
+    tables::{self, Exp, Log, Skew},
+    Engine, GfElement, ShardsRefMut, GF_MODULUS, GF_ORDER,
+};
+
+// ======================================================================
+// Naive - PUBLIC
+
+/// Simple reference implementation of [`Engine`].
+///
+/// - [`Naive`] is meant for those who want to study
+///   the source code to understand [`Engine`].
+/// - [`Naive`] also includes some debug assertions
+///   which are not present in other implementations.
+#[derive(Clone)]
+pub struct Naive {
+    exp: &'static Exp,
+    log: &'static Log,
+    skew: &'static Skew,
+}
+
+impl Naive {
+    /// Creates new [`Naive`], initializing all tables
+    /// needed for encoding or decoding.
+    ///
+    /// Currently only difference between encoding/decoding is
+    /// `log_walsh` (128 kiB) which is only needed for decoding.
+    pub fn new() -> Self {
+        let (exp, log) = tables::initialize_exp_log();
+        let skew = tables::initialize_skew();
+
+        // This is used in `Engine::eval_poly`.
+        tables::initialize_log_walsh::<Self>();
+
+        Self { exp, log, skew }
+    }
+}
+
+impl Engine for Naive {
+    fn fft(
+        &self,
+        data: &mut ShardsRefMut,
+        pos: usize,
+        size: usize,
+        truncated_size: usize,
+        skew_delta: usize,
+    ) {
+        debug_assert!(size.is_power_of_two());
+        debug_assert!(truncated_size <= size);
+
+        let mut dist = size / 2;
+        while dist > 0 {
+            let mut r = 0;
+            while r < truncated_size {
+                let log_m = self.skew[r + dist + skew_delta - 1];
+                for i in r..r + dist {
+                    let (a, b) = data.dist2_mut(pos + i, dist);
+
+                    // FFT BUTTERFLY
+
+                    if log_m != GF_MODULUS {
+                        self.mul_add(a, b, log_m);
+                    }
+                    Self::xor(b, a);
+                }
+                r += dist * 2;
+            }
+            dist /= 2;
+        }
+    }
+
+    fn fwht(data: &mut [GfElement; GF_ORDER], truncated_size: usize) {
+        debug_assert!(truncated_size <= GF_ORDER);
+
+        let mut dist = 1;
+        while dist < GF_ORDER {
+            let mut r = 0;
+            while r < truncated_size {
+                for i in r..r + dist {
+                    let sum = engine::add_mod(data[i], data[i + dist]);
+                    let dif = engine::sub_mod(data[i], data[i + dist]);
+                    data[i] = sum;
+                    data[i + dist] = dif;
+                }
+                r += dist * 2;
+            }
+            dist *= 2;
+        }
+    }
+
+    fn ifft(
+        &self,
+        data: &mut ShardsRefMut,
+        pos: usize,
+        size: usize,
+        truncated_size: usize,
+        skew_delta: usize,
+    ) {
+        debug_assert!(size.is_power_of_two());
+        debug_assert!(truncated_size <= size);
+
+        let mut dist = 1;
+        while dist < size {
+            let mut r = 0;
+            while r < truncated_size {
+                let log_m = self.skew[r + dist + skew_delta - 1];
+                for i in r..r + dist {
+                    let (a, b) = data.dist2_mut(pos + i, dist);
+
+                    // IFFT BUTTERFLY
+
+                    Self::xor(b, a);
+                    if log_m != GF_MODULUS {
+                        self.mul_add(a, b, log_m);
+                    }
+                }
+                r += dist * 2;
+            }
+            dist *= 2;
+        }
+    }
+
+    fn mul(&self, x: &mut [u8], log_m: GfElement) {
+        let shard_bytes = x.len();
+        debug_assert!(shard_bytes & 63 == 0);
+
+        let mut pos = 0;
+        while pos < shard_bytes {
+            for i in 0..32 {
+                let lo = x[pos + i] as GfElement;
+                let hi = x[pos + i + 32] as GfElement;
+                let prod = tables::mul(lo | (hi << 8), log_m, self.exp, self.log);
+                x[pos + i] = prod as u8;
+                x[pos + i + 32] = (prod >> 8) as u8;
+            }
+            pos += 64;
+        }
+    }
+
+    fn xor(x: &mut [u8], y: &[u8]) {
+        let shard_bytes = x.len();
+        debug_assert!(shard_bytes & 63 == 0);
+        debug_assert_eq!(shard_bytes, y.len());
+
+        for i in 0..shard_bytes {
+            x[i] ^= y[i];
+        }
+    }
+}
+
+// ======================================================================
+// Naive - IMPL Default
+
+impl Default for Naive {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+// ======================================================================
+// Naive - PRIVATE
+
+impl Naive {
+    /// `x[] ^= y[] * log_m`
+    fn mul_add(&self, x: &mut [u8], y: &[u8], log_m: GfElement) {
+        let shard_bytes = x.len();
+        debug_assert!(shard_bytes & 63 == 0);
+        debug_assert_eq!(shard_bytes, y.len());
+
+        let mut pos = 0;
+        while pos < shard_bytes {
+            for i in 0..32 {
+                let lo = y[pos + i] as GfElement;
+                let hi = y[pos + i + 32] as GfElement;
+                let prod = tables::mul(lo | (hi << 8), log_m, self.exp, self.log);
+                x[pos + i] ^= prod as u8;
+                x[pos + i + 32] ^= (prod >> 8) as u8;
+            }
+            pos += 64;
+        }
+    }
+}
+
+// ======================================================================
+// TESTS
+
+// Engines are tested indirectly via roundtrip tests of HighRate and LowRate.
diff --git a/src/engine/engine_nosimd.rs b/src/engine/engine_nosimd.rs
new file mode 100644
index 0000000..32bfd75
--- /dev/null
+++ b/src/engine/engine_nosimd.rs
@@ -0,0 +1,388 @@
+use crate::engine::{
+    self,
+    tables::{self, Mul16, Skew},
+    Engine, GfElement, ShardsRefMut, GF_MODULUS, GF_ORDER,
+};
+
+// ======================================================================
+// NoSimd - PUBLIC
+
+/// Optimized [`Engine`] without SIMD.
+///
+/// [`NoSimd`] is a basic optimized engine which works on all CPUs.
+#[derive(Clone)]
+pub struct NoSimd {
+    mul16: &'static Mul16,
+    skew: &'static Skew,
+}
+
+impl NoSimd {
+    /// Creates new [`NoSimd`], initializing all tables
+    /// needed for encoding or decoding.
+    ///
+    /// Currently only difference between encoding/decoding is
+    /// `log_walsh` (128 kiB) which is only needed for decoding.
+    pub fn new() -> Self {
+        let mul16 = tables::initialize_mul16();
+        let skew = tables::initialize_skew();
+
+        // This is used in `Engine::eval_poly`.
+        tables::initialize_log_walsh::<Self>();
+
+        Self { mul16, skew }
+    }
+}
+
+impl Engine for NoSimd {
+    fn fft(
+        &self,
+        data: &mut ShardsRefMut,
+        pos: usize,
+        size: usize,
+        truncated_size: usize,
+        skew_delta: usize,
+    ) {
+        self.fft_private(data, pos, size, truncated_size, skew_delta);
+    }
+
+    fn fwht(data: &mut [GfElement; GF_ORDER], truncated_size: usize) {
+        Self::fwht_private(data, truncated_size);
+    }
+
+    fn ifft(
+        &self,
+        data: &mut ShardsRefMut,
+        pos: usize,
+        size: usize,
+        truncated_size: usize,
+        skew_delta: usize,
+    ) {
+        self.ifft_private(data, pos, size, truncated_size, skew_delta);
+    }
+
+    fn mul(&self, x: &mut [u8], log_m: GfElement) {
+        let lut = &self.mul16[log_m as usize];
+
+        let mut pos = 0;
+        while pos < x.len() {
+            for i in 0..32 {
+                let lo = x[pos + i] as usize;
+                let hi = x[pos + i + 32] as usize;
+                let prod = lut[0][lo & 15] ^ lut[1][lo >> 4] ^ lut[2][hi & 15] ^ lut[3][hi >> 4];
+                x[pos + i] = prod as u8;
+                x[pos + i + 32] = (prod >> 8) as u8;
+            }
+            pos += 64;
+        }
+    }
+
+    fn xor(x: &mut [u8], y: &[u8]) {
+        let x64: &mut [u64] = bytemuck::cast_slice_mut(x);
+        let y64: &[u64] = bytemuck::cast_slice(y);
+
+        for i in 0..x64.len() {
+            x64[i] ^= y64[i];
+        }
+    }
+}
+
+// ======================================================================
+// NoSimd - IMPL Default
+
+impl Default for NoSimd {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+// ======================================================================
+// NoSimd - PRIVATE
+
+impl NoSimd {
+    /// `x[] ^= y[] * log_m`
+    fn mul_add(&self, x: &mut [u8], y: &[u8], log_m: GfElement) {
+        let lut = &self.mul16[log_m as usize];
+
+        let mut pos = 0;
+        while pos < x.len() {
+            for i in 0..32 {
+                let lo = y[pos + i] as usize;
+                let hi = y[pos + i + 32] as usize;
+                let prod = lut[0][lo & 15] ^ lut[1][lo >> 4] ^ lut[2][hi & 15] ^ lut[3][hi >> 4];
+                x[pos + i] ^= prod as u8;
+                x[pos + i + 32] ^= (prod >> 8) as u8;
+            }
+            pos += 64;
+        }
+    }
+}
+
+// ======================================================================
+// NoSimd - PRIVATE - FWHT (fast Walsh-Hadamard transform)
+
+impl NoSimd {
+    #[inline(always)]
+    fn fwht_2(a: &mut GfElement, b: &mut GfElement) {
+        let sum = engine::add_mod(*a, *b);
+        let dif = engine::sub_mod(*a, *b);
+        *a = sum;
+        *b = dif;
+    }
+
+    #[inline(always)]
+    fn fwht_4(data: &mut [GfElement], dist: usize) {
+        let mut t0 = data[0];
+        let mut t1 = data[dist];
+        let mut t2 = data[dist * 2];
+        let mut t3 = data[dist * 3];
+
+        Self::fwht_2(&mut t0, &mut t1);
+        Self::fwht_2(&mut t2, &mut t3);
+        Self::fwht_2(&mut t0, &mut t2);
+        Self::fwht_2(&mut t1, &mut t3);
+
+        data[0] = t0;
+        data[dist] = t1;
+        data[dist * 2] = t2;
+        data[dist * 3] = t3;
+    }
+
+    #[inline(always)]
+    fn fwht_private(data: &mut [GfElement; GF_ORDER], truncated_size: usize) {
+        // TWO LAYERS AT TIME
+
+        let mut dist = 1;
+        let mut dist4 = 4;
+        while dist4 <= GF_ORDER {
+            let mut r = 0;
+            while r < truncated_size {
+                for i in r..r + dist {
+                    Self::fwht_4(&mut data[i..], dist)
+                }
+                r += dist4;
+            }
+
+            dist = dist4;
+            dist4 <<= 2;
+        }
+
+        // FINAL ODD LAYER
+
+        if dist < GF_ORDER {
+            for i in 0..dist {
+                // inlined manually as Rust doesn't like
+                // `fwht_2(&mut data[i], &mut data[i + dist])`
+                let sum = engine::add_mod(data[i], data[i + dist]);
+                let dif = engine::sub_mod(data[i], data[i + dist]);
+                data[i] = sum;
+                data[i + dist] = dif;
+            }
+        }
+    }
+}
+
+// ======================================================================
+// NoSimd - PRIVATE - FFT (fast Fourier transform)
+
+impl NoSimd {
+    // Partial butterfly, caller must do `GF_MODULUS` check with `xor`.
+    #[inline(always)]
+    fn fft_butterfly_partial(&self, x: &mut [u8], y: &mut [u8], log_m: GfElement) {
+        self.mul_add(x, y, log_m);
+        Self::xor(y, x);
+    }
+
+    #[inline(always)]
+    fn fft_butterfly_two_layers(
+        &self,
+        data: &mut ShardsRefMut,
+        pos: usize,
+        dist: usize,
+        log_m01: GfElement,
+        log_m23: GfElement,
+        log_m02: GfElement,
+    ) {
+        let (s0, s1, s2, s3) = data.dist4_mut(pos, dist);
+
+        // FIRST LAYER
+
+        if log_m02 == GF_MODULUS {
+            Self::xor(s2, s0);
+            Self::xor(s3, s1);
+        } else {
+            self.fft_butterfly_partial(s0, s2, log_m02);
+            self.fft_butterfly_partial(s1, s3, log_m02);
+        }
+
+        // SECOND LAYER
+
+        if log_m01 == GF_MODULUS {
+            Self::xor(s1, s0);
+        } else {
+            self.fft_butterfly_partial(s0, s1, log_m01);
+        }
+
+        if log_m23 == GF_MODULUS {
+            Self::xor(s3, s2);
+        } else {
+            self.fft_butterfly_partial(s2, s3, log_m23);
+        }
+    }
+
+    #[inline(always)]
+    fn fft_private(
+        &self,
+        data: &mut ShardsRefMut,
+        pos: usize,
+        size: usize,
+        truncated_size: usize,
+        skew_delta: usize,
+    ) {
+        // TWO LAYERS AT TIME
+
+        let mut dist4 = size;
+        let mut dist = size >> 2;
+        while dist != 0 {
+            let mut r = 0;
+            while r < truncated_size {
+                let base = r + dist + skew_delta - 1;
+
+                let log_m01 = self.skew[base];
+                let log_m02 = self.skew[base + dist];
+                let log_m23 = self.skew[base + dist * 2];
+
+                for i in r..r + dist {
+                    self.fft_butterfly_two_layers(data, pos + i, dist, log_m01, log_m23, log_m02)
+                }
+
+                r += dist4;
+            }
+            dist4 = dist;
+            dist >>= 2;
+        }
+
+        // FINAL ODD LAYER
+
+        if dist4 == 2 {
+            let mut r = 0;
+            while r < truncated_size {
+                let log_m = self.skew[r + skew_delta];
+
+                let (x, y) = data.dist2_mut(pos + r, 1);
+
+                if log_m == GF_MODULUS {
+                    Self::xor(y, x);
+                } else {
+                    self.fft_butterfly_partial(x, y, log_m)
+                }
+
+                r += 2;
+            }
+        }
+    }
+}
+
+// ======================================================================
+// NoSimd - PRIVATE - IFFT (inverse fast Fourier transform)
+
+impl NoSimd {
+    // Partial butterfly, caller must do `GF_MODULUS` check with `xor`.
+    #[inline(always)]
+    fn ifft_butterfly_partial(&self, x: &mut [u8], y: &mut [u8], log_m: GfElement) {
+        Self::xor(y, x);
+        self.mul_add(x, y, log_m);
+    }
+
+    #[inline(always)]
+    fn ifft_butterfly_two_layers(
+        &self,
+        data: &mut ShardsRefMut,
+        pos: usize,
+        dist: usize,
+        log_m01: GfElement,
+        log_m23: GfElement,
+        log_m02: GfElement,
+    ) {
+        let (s0, s1, s2, s3) = data.dist4_mut(pos, dist);
+
+        // FIRST LAYER
+
+        if log_m01 == GF_MODULUS {
+            Self::xor(s1, s0);
+        } else {
+            self.ifft_butterfly_partial(s0, s1, log_m01);
+        }
+
+        if log_m23 == GF_MODULUS {
+            Self::xor(s3, s2);
+        } else {
+            self.ifft_butterfly_partial(s2, s3, log_m23);
+        }
+
+        // SECOND LAYER
+
+        if log_m02 == GF_MODULUS {
+            Self::xor(s2, s0);
+            Self::xor(s3, s1);
+        } else {
+            self.ifft_butterfly_partial(s0, s2, log_m02);
+            self.ifft_butterfly_partial(s1, s3, log_m02);
+        }
+    }
+
+    #[inline(always)]
+    fn ifft_private(
+        &self,
+        data: &mut ShardsRefMut,
+        pos: usize,
+        size: usize,
+        truncated_size: usize,
+        skew_delta: usize,
+    ) {
+        // TWO LAYERS AT TIME
+
+        let mut dist = 1;
+        let mut dist4 = 4;
+        while dist4 <= size {
+            let mut r = 0;
+            while r < truncated_size {
+                let base = r + dist + skew_delta - 1;
+
+                let log_m01 = self.skew[base];
+                let log_m02 = self.skew[base + dist];
+                let log_m23 = self.skew[base + dist * 2];
+
+                for i in r..r + dist {
+                    self.ifft_butterfly_two_layers(data, pos + i, dist, log_m01, log_m23, log_m02)
+                }
+
+                r += dist4;
+            }
+            dist = dist4;
+            dist4 <<= 2;
+        }
+
+        // FINAL ODD LAYER
+
+        if dist < size {
+            let log_m = self.skew[dist + skew_delta - 1];
+            if log_m == GF_MODULUS {
+                Self::xor_within(data, pos + dist, pos, dist);
+            } else {
+                let (mut a, mut b) = data.split_at_mut(pos + dist);
+                for i in 0..dist {
+                    self.ifft_butterfly_partial(
+                        &mut a[pos + i], // data[pos + i]
+                        &mut b[i],       // data[pos + i + dist]
+                        log_m,
+                    );
+                }
+            }
+        }
+    }
+}
+
+// ======================================================================
+// TESTS
+
+// Engines are tested indirectly via roundtrip tests of HighRate and LowRate.
diff --git a/src/engine/shards.rs b/src/engine/shards.rs
new file mode 100644
index 0000000..4df3911
--- /dev/null
+++ b/src/engine/shards.rs
@@ -0,0 +1,221 @@
+use std::ops::{Bound, Index, IndexMut, RangeBounds};
+
+// ======================================================================
+// Shards - CRATE
+
+pub(crate) struct Shards {
+    shard_count: usize,
+    shard_bytes: usize,
+
+    // Flat array of `shard_count * shard_bytes` bytes.
+    data: Vec<u8>,
+}
+
+impl Shards {
+    pub(crate) fn as_ref_mut(&mut self) -> ShardsRefMut {
+        ShardsRefMut::new(self.shard_count, self.shard_bytes, self.data.as_mut())
+    }
+
+    pub(crate) fn new() -> Self {
+        Self {
+            shard_count: 0,
+            shard_bytes: 0,
+            data: Vec::new(),
+        }
+    }
+
+    pub(crate) fn resize(&mut self, shard_count: usize, shard_bytes: usize) {
+        assert!(shard_bytes > 0 && shard_bytes & 63 == 0);
+
+        self.shard_count = shard_count;
+        self.shard_bytes = shard_bytes;
+
+        self.data.resize(shard_count * shard_bytes, 0);
+    }
+}
+
+// ======================================================================
+// Shards - IMPL Index
+
+impl Index<usize> for Shards {
+    type Output = [u8];
+    fn index(&self, index: usize) -> &Self::Output {
+        &self.data[index * self.shard_bytes..(index + 1) * self.shard_bytes]
+    }
+}
+
+// ======================================================================
+// Shards - IMPL IndexMut
+
+impl IndexMut<usize> for Shards {
+    fn index_mut(&mut self, index: usize) -> &mut Self::Output {
+        &mut self.data[index * self.shard_bytes..(index + 1) * self.shard_bytes]
+    }
+}
+
+// ======================================================================
+// ShardsRefMut - PUBLIC
+
+/// Mutable reference to shard array implemented as flat byte array.
+pub struct ShardsRefMut<'a> {
+    shard_count: usize,
+    shard_bytes: usize,
+
+    // Flat array of `shard_count * shard_bytes` bytes.
+    data: &'a mut [u8],
+}
+
+impl<'a> ShardsRefMut<'a> {
+    /// Returns mutable references to shards at `pos` and `pos + dist`.
+    ///
+    /// See source code of [`Naive::fft`] for an example.
+    ///
+    /// # Panics
+    ///
+    /// If `dist` is `0`.
+    ///
+    /// [`Naive::fft`]: crate::engine::Naive#method.fft
+    pub fn dist2_mut(&mut self, mut pos: usize, mut dist: usize) -> (&mut [u8], &mut [u8]) {
+        pos *= self.shard_bytes;
+        dist *= self.shard_bytes;
+
+        let (a, b) = self.data[pos..].split_at_mut(dist);
+        (&mut a[..self.shard_bytes], &mut b[..self.shard_bytes])
+    }
+
+    /// Returns mutable references to shards at
+    /// `pos`, `pos + dist`, `pos + dist * 2` and `pos + dist * 3`.
+    ///
+    /// See source code of [`NoSimd::fft`] for an example
+    /// (specifically the private method `fft_butterfly_two_layers`).
+    ///
+    /// # Panics
+    ///
+    /// If `dist` is `0`.
+    ///
+    /// [`NoSimd::fft`]: crate::engine::NoSimd#method.fft
+    pub fn dist4_mut(
+        &mut self,
+        mut pos: usize,
+        mut dist: usize,
+    ) -> (&mut [u8], &mut [u8], &mut [u8], &mut [u8]) {
+        pos *= self.shard_bytes;
+        dist *= self.shard_bytes;
+
+        let (ab, cd) = self.data[pos..].split_at_mut(dist * 2);
+        let (a, b) = ab.split_at_mut(dist);
+        let (c, d) = cd.split_at_mut(dist);
+
+        (
+            &mut a[..self.shard_bytes],
+            &mut b[..self.shard_bytes],
+            &mut c[..self.shard_bytes],
+            &mut d[..self.shard_bytes],
+        )
+    }
+
+    /// Returns `true` if this contains no shards.
+    pub fn is_empty(&self) -> bool {
+        self.shard_count == 0
+    }
+
+    /// Returns number of shards.
+    pub fn len(&self) -> usize {
+        self.shard_count
+    }
+
+    /// Creates new [`ShardsRefMut`] that references given `data`.
+    ///
+    /// # Panics
+    ///
+    /// If `data` is smaller than `shard_count * shard_bytes` bytes.
+    pub fn new(shard_count: usize, shard_bytes: usize, data: &'a mut [u8]) -> Self {
+        Self {
+            shard_count,
+            shard_bytes,
+            data: &mut data[..shard_count * shard_bytes],
+        }
+    }
+
+    /// Splits this [`ShardsRefMut`] into two so that
+    /// first includes shards `0..mid` and second includes shards `mid..`.
+    pub fn split_at_mut(&mut self, mid: usize) -> (ShardsRefMut, ShardsRefMut) {
+        let (a, b) = self.data.split_at_mut(mid * self.shard_bytes);
+        (
+            ShardsRefMut::new(mid, self.shard_bytes, a),
+            ShardsRefMut::new(self.shard_count - mid, self.shard_bytes, b),
+        )
+    }
+
+    /// Fills the given shard-range with `0u8`:s.
+    pub fn zero<R: RangeBounds<usize>>(&mut self, range: R) {
+        let start = match range.start_bound() {
+            Bound::Included(start) => start * self.shard_bytes,
+            Bound::Excluded(start) => (start + 1) * self.shard_bytes,
+            Bound::Unbounded => 0,
+        };
+
+        let end = match range.end_bound() {
+            Bound::Included(end) => (end + 1) * self.shard_bytes,
+            Bound::Excluded(end) => end * self.shard_bytes,
+            Bound::Unbounded => self.shard_count * self.shard_bytes,
+        };
+
+        self.data[start..end].fill(0);
+    }
+}
+
+// ======================================================================
+// ShardsRefMut - IMPL Index
+
+impl<'a> Index<usize> for ShardsRefMut<'a> {
+    type Output = [u8];
+    fn index(&self, index: usize) -> &Self::Output {
+        &self.data[index * self.shard_bytes..(index + 1) * self.shard_bytes]
+    }
+}
+
+// ======================================================================
+// ShardsRefMut - IMPL IndexMut
+
+impl<'a> IndexMut<usize> for ShardsRefMut<'a> {
+    fn index_mut(&mut self, index: usize) -> &mut Self::Output {
+        &mut self.data[index * self.shard_bytes..(index + 1) * self.shard_bytes]
+    }
+}
+
+// ======================================================================
+// ShardsRefMut - CRATE
+
+impl<'a> ShardsRefMut<'a> {
+    pub(crate) fn copy_within(&mut self, mut src: usize, mut dest: usize, mut count: usize) {
+        src *= self.shard_bytes;
+        dest *= self.shard_bytes;
+        count *= self.shard_bytes;
+
+        self.data.copy_within(src..src + count, dest);
+    }
+
+    // Returns mutable references to flat-arrays of shard-ranges
+    // `x .. x + count` and `y .. y + count`.
+    //
+    // Ranges must not overlap.
+    pub(crate) fn flat2_mut(
+        &mut self,
+        mut x: usize,
+        mut y: usize,
+        mut count: usize,
+    ) -> (&mut [u8], &mut [u8]) {
+        x *= self.shard_bytes;
+        y *= self.shard_bytes;
+        count *= self.shard_bytes;
+
+        if x < y {
+            let (head, tail) = self.data.split_at_mut(y);
+            (&mut head[x..x + count], &mut tail[..count])
+        } else {
+            let (head, tail) = self.data.split_at_mut(x);
+            (&mut tail[..count], &mut head[y..y + count])
+        }
+    }
+}
diff --git a/src/engine/tables.rs b/src/engine/tables.rs
new file mode 100644
index 0000000..4e2abc8
--- /dev/null
+++ b/src/engine/tables.rs
@@ -0,0 +1,205 @@
+//! Lookup-tables used by [`Engine`]:s.
+//!
+//! All tables are global and each is initialized at most once.
+//!
+//! # Tables
+//!
+//! | Table        | Size    | Used in encoding | Used in decoding | By engines |
+//! | ------------ | ------- | ---------------- | ---------------- | ---------- |
+//! | [`Exp`]      | 128 kiB | yes              | yes              | all        |
+//! | [`Log`]      | 128 kiB | yes              | yes              | all        |
+//! | [`LogWalsh`] | 128 kiB | -                | yes              | all        |
+//! | [`Mul16`]    | 8 MiB   | yes              | yes              | [`NoSimd`] |
+//! | [`Skew`]     | 128 kiB | yes              | yes              | all        |
+//!
+//! [`NoSimd`]: crate::engine::NoSimd
+
+use once_cell::sync::OnceCell;
+
+use crate::engine::{
+    self, Engine, GfElement, CANTOR_BASIS, GF_BITS, GF_MODULUS, GF_ORDER, GF_POLYNOMIAL,
+};
+
+// ======================================================================
+// TYPE ALIASES - PUBLIC
+
+/// Used by [`Naive`] engine for multiplications
+/// and by all [`Engine`]:s to initialize other tables.
+///
+/// [`Naive`]: crate::engine::Naive
+pub type Exp = [GfElement; GF_ORDER];
+
+/// Used by [`Naive`] engine for multiplications
+/// and by all [`Engine`]:s to initialize other tables.
+///
+/// [`Naive`]: crate::engine::Naive
+pub type Log = [GfElement; GF_ORDER];
+
+/// Used by all [`Engine`]:s in [`Engine::eval_poly`].
+pub type LogWalsh = [GfElement; GF_ORDER];
+
+/// Used by [`NoSimd`] engine for multiplications.
+///
+/// [`NoSimd`]: crate::engine::NoSimd
+pub type Mul16 = [[[GfElement; 16]; 4]; GF_ORDER];
+
+/// Used by all [`Engine`]:s for FFT and IFFT.
+pub type Skew = [GfElement; GF_MODULUS as usize];
+
+// ======================================================================
+// ExpLog - PRIVATE
+
+struct ExpLog {
+    exp: Box<Exp>,
+    log: Box<Log>,
+}
+
+// ======================================================================
+// STATIC - PRIVATE
+
+static EXP_LOG: OnceCell<ExpLog> = OnceCell::new();
+static LOG_WALSH: OnceCell<Box<LogWalsh>> = OnceCell::new();
+static MUL16: OnceCell<Box<Mul16>> = OnceCell::new();
+static SKEW: OnceCell<Box<Skew>> = OnceCell::new();
+
+// ======================================================================
+// FUNCTIONS - PUBLIC - math
+
+/// Calculates `x * log_m` using [`Exp`] and [`Log`] tables.
+#[inline(always)]
+pub fn mul(x: GfElement, log_m: GfElement, exp: &Exp, log: &Log) -> GfElement {
+    if x == 0 {
+        0
+    } else {
+        exp[engine::add_mod(log[x as usize], log_m) as usize]
+    }
+}
+
+// ======================================================================
+// FUNCTIONS - PUBLIC - initialize tables
+
+/// Initializes and returns [`Exp`] and [`Log`] tables.
+#[allow(clippy::needless_range_loop)]
+pub fn initialize_exp_log() -> (&'static Exp, &'static Log) {
+    let exp_log = EXP_LOG.get_or_init(|| {
+        let mut exp = Box::new([0; GF_ORDER]);
+        let mut log = Box::new([0; GF_ORDER]);
+
+        // GENERATE LFSR TABLE
+
+        let mut state = 1;
+        for i in 0..GF_MODULUS {
+            exp[state] = i;
+            state <<= 1;
+            if state >= GF_ORDER {
+                state ^= GF_POLYNOMIAL;
+            }
+        }
+        exp[0] = GF_MODULUS;
+
+        // CONVERT TO CANTOR BASIS
+
+        log[0] = 0;
+        for i in 0..GF_BITS {
+            let width = 1usize << i;
+            for j in 0..width {
+                log[j + width] = log[j] ^ CANTOR_BASIS[i];
+            }
+        }
+
+        for i in 0..GF_ORDER {
+            log[i] = exp[log[i] as usize];
+        }
+
+        for i in 0..GF_ORDER {
+            exp[log[i] as usize] = i as GfElement;
+        }
+
+        exp[GF_MODULUS as usize] = exp[0];
+
+        ExpLog { exp, log }
+    });
+
+    (&exp_log.exp, &exp_log.log)
+}
+
+/// Initializes and returns [`LogWalsh`] table.
+pub fn initialize_log_walsh<E: Engine>() -> &'static LogWalsh {
+    LOG_WALSH.get_or_init(|| {
+        let (_, log) = initialize_exp_log();
+
+        let mut log_walsh: Box<LogWalsh> = Box::new([0; GF_ORDER]);
+
+        log_walsh.copy_from_slice(log.as_ref());
+        log_walsh[0] = 0;
+        E::fwht(log_walsh.as_mut(), GF_ORDER);
+
+        log_walsh
+    })
+}
+
+/// Initializes and returns [`Mul16`] table.
+pub fn initialize_mul16() -> &'static Mul16 {
+    MUL16.get_or_init(|| {
+        let (exp, log) = initialize_exp_log();
+
+        let mut mul16 = vec![[[0; 16]; 4]; GF_ORDER];
+
+        for log_m in 0..=GF_MODULUS {
+            let lut = &mut mul16[log_m as usize];
+            for i in 0..16 {
+                lut[0][i] = mul(i as GfElement, log_m, exp, log);
+                lut[1][i] = mul((i << 4) as GfElement, log_m, exp, log);
+                lut[2][i] = mul((i << 8) as GfElement, log_m, exp, log);
+                lut[3][i] = mul((i << 12) as GfElement, log_m, exp, log);
+            }
+        }
+
+        mul16.into_boxed_slice().try_into().unwrap()
+    })
+}
+
+/// Initializes and returns [`Skew`] table.
+#[allow(clippy::needless_range_loop)]
+pub fn initialize_skew() -> &'static Skew {
+    SKEW.get_or_init(|| {
+        let (exp, log) = initialize_exp_log();
+
+        let mut skew = Box::new([0; GF_MODULUS as usize]);
+
+        let mut temp = [0; GF_BITS - 1];
+
+        for i in 1..GF_BITS {
+            temp[i - 1] = 1 << i;
+        }
+
+        for m in 0..GF_BITS - 1 {
+            let step: usize = 1 << (m + 1);
+
+            skew[(1 << m) - 1] = 0;
+
+            for i in m..GF_BITS - 1 {
+                let s: usize = 1 << (i + 1);
+                let mut j = (1 << m) - 1;
+                while j < s {
+                    skew[j + s] = skew[j] ^ temp[i];
+                    j += step;
+                }
+            }
+
+            temp[m] =
+                GF_MODULUS - log[mul(temp[m], log[(temp[m] ^ 1) as usize], exp, log) as usize];
+
+            for i in m + 1..GF_BITS - 1 {
+                let sum = engine::add_mod(log[(temp[i] ^ 1) as usize], temp[m]);
+                temp[i] = mul(temp[i], sum, exp, log);
+            }
+        }
+
+        for i in 0..GF_MODULUS as usize {
+            skew[i] = log[skew[i] as usize];
+        }
+
+        skew
+    })
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..7ebd113
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,582 @@
+#![doc = include_str!(concat!(env!("OUT_DIR"), "/README-rustdocified.md"))]
+#![deny(missing_docs)]
+
+use std::{collections::HashMap, fmt};
+
+pub use crate::{
+    decoder_result::{DecoderResult, RestoredOriginal},
+    encoder_result::{EncoderResult, Recovery},
+    reed_solomon::{ReedSolomonDecoder, ReedSolomonEncoder},
+};
+
+#[cfg(test)]
+#[macro_use]
+mod test_util;
+
+mod decoder_result;
+mod encoder_result;
+mod reed_solomon;
+
+pub mod algorithm {
+    #![doc = include_str!("../algorithm.md")]
+}
+pub mod engine;
+pub mod rate;
+
+// ======================================================================
+// Error - PUBLIC
+
+/// Represents all possible errors that can occur in this library.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum Error {
+    /// Given shard has different size than given or inferred shard size.
+    ///
+    /// - Shard size is given explicitly to encoders/decoders
+    ///   and inferred for [`reed_solomon_16::encode`]
+    ///   and [`reed_solomon_16::decode`].
+    ///
+    /// [`reed_solomon_16::encode`]: crate::encode
+    /// [`reed_solomon_16::decode`]: crate::decode
+    DifferentShardSize {
+        /// Given or inferred shard size.
+        shard_bytes: usize,
+        /// Size of the given shard.
+        got: usize,
+    },
+
+    /// Decoder was given two original shards with same index.
+    DuplicateOriginalShardIndex {
+        /// Given duplicate index.
+        index: usize,
+    },
+
+    /// Decoder was given two recovery shards with same index.
+    DuplicateRecoveryShardIndex {
+        /// Given duplicate index.
+        index: usize,
+    },
+
+    /// Decoder was given original shard with invalid index,
+    /// i.e. `index >= original_count`.
+    InvalidOriginalShardIndex {
+        /// Configured number of original shards.
+        original_count: usize,
+        /// Given invalid index.
+        index: usize,
+    },
+
+    /// Decoder was given recovery shard with invalid index,
+    /// i.e. `index >= recovery_count`.
+    InvalidRecoveryShardIndex {
+        /// Configured number of recovery shards.
+        recovery_count: usize,
+        /// Given invalid index.
+        index: usize,
+    },
+
+    /// Given or inferred shard size is invalid:
+    /// Size must be non-zero and multiple of 64 bytes.
+    ///
+    /// - Shard size is given explicitly to encoders/decoders
+    ///   and inferred for [`reed_solomon_16::encode`]
+    ///   and [`reed_solomon_16::decode`].
+    ///
+    /// [`reed_solomon_16::encode`]: crate::encode
+    /// [`reed_solomon_16::decode`]: crate::decode
+    InvalidShardSize {
+        /// Given or inferred shard size.
+        shard_bytes: usize,
+    },
+
+    /// Decoder was given too few shards.
+    ///
+    /// Decoding requires as many shards as there were original shards
+    /// in total, in any combination of original shards and recovery shards.
+    NotEnoughShards {
+        /// Configured number of original shards.
+        original_count: usize,
+        /// Number of original shards given to decoder.
+        original_received_count: usize,
+        /// Number of recovery shards given to decoder.
+        recovery_received_count: usize,
+    },
+
+    /// Encoder was given less than `original_count` original shards.
+    TooFewOriginalShards {
+        /// Configured number of original shards.
+        original_count: usize,
+        /// Number of original shards given to encoder.
+        original_received_count: usize,
+    },
+
+    /// Encoder was given more than `original_count` original shards.
+    TooManyOriginalShards {
+        /// Configured number of original shards.
+        original_count: usize,
+    },
+
+    /// Given `original_count` / `recovery_count` combination is not supported.
+    UnsupportedShardCount {
+        /// Given number of original shards.
+        original_count: usize,
+        /// Given number of recovery shards.
+        recovery_count: usize,
+    },
+}
+
+// ======================================================================
+// Error - IMPL DISPLAY
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Error::DifferentShardSize { shard_bytes, got } => {
+                write!(
+                    f,
+                    "different shard size: expected {} bytes, got {} bytes",
+                    shard_bytes, got
+                )
+            }
+
+            Error::DuplicateOriginalShardIndex { index } => {
+                write!(f, "duplicate original shard index: {}", index)
+            }
+
+            Error::DuplicateRecoveryShardIndex { index } => {
+                write!(f, "duplicate recovery shard index: {}", index)
+            }
+
+            Error::InvalidOriginalShardIndex {
+                original_count,
+                index,
+            } => {
+                write!(
+                    f,
+                    "invalid original shard index: {} >= original_count {}",
+                    index, original_count,
+                )
+            }
+
+            Error::InvalidRecoveryShardIndex {
+                recovery_count,
+                index,
+            } => {
+                write!(
+                    f,
+                    "invalid recovery shard index: {} >= recovery_count {}",
+                    index, recovery_count,
+                )
+            }
+
+            Error::InvalidShardSize { shard_bytes } => {
+                write!(
+                    f,
+                    "invalid shard size: {} bytes (must non-zero and multiple of 64)",
+                    shard_bytes
+                )
+            }
+
+            Error::NotEnoughShards {
+                original_count,
+                original_received_count,
+                recovery_received_count,
+            } => {
+                write!(
+                    f,
+                    "not enough shards: {} original + {} recovery < {} original_count",
+                    original_received_count, recovery_received_count, original_count,
+                )
+            }
+
+            Error::TooFewOriginalShards {
+                original_count,
+                original_received_count,
+            } => {
+                write!(
+                    f,
+                    "too few original shards: got {} shards while original_count is {}",
+                    original_received_count, original_count
+                )
+            }
+
+            Error::TooManyOriginalShards { original_count } => {
+                write!(
+                    f,
+                    "too many original shards: got more than original_count ({}) shards",
+                    original_count
+                )
+            }
+
+            Error::UnsupportedShardCount {
+                original_count,
+                recovery_count,
+            } => {
+                write!(
+                    f,
+                    "unsupported shard count: {} original shards with {} recovery shards",
+                    original_count, recovery_count
+                )
+            }
+        }
+    }
+}
+
+// ======================================================================
+// Error - IMPL ERROR
+
+impl std::error::Error for Error {}
+
+// ======================================================================
+// FUNCTIONS - PUBLIC
+
+/// Encodes in one go using [`ReedSolomonEncoder`],
+/// returning generated recovery shards.
+///
+/// - Original shards have indexes `0..original_count`
+///   corresponding to the order in which they are given.
+/// - Recovery shards have indexes `0..recovery_count`
+///   corresponding to their position in the returned `Vec`.
+/// - These same indexes must be used when decoding.
+///
+/// See [simple usage](crate#simple-usage) for an example.
+pub fn encode<T>(
+    original_count: usize,
+    recovery_count: usize,
+    original: T,
+) -> Result<Vec<Vec<u8>>, Error>
+where
+    T: IntoIterator,
+    T::Item: AsRef<[u8]>,
+{
+    if !ReedSolomonEncoder::supports(original_count, recovery_count) {
+        return Err(Error::UnsupportedShardCount {
+            original_count,
+            recovery_count,
+        });
+    }
+
+    let mut original = original.into_iter();
+
+    let (shard_bytes, first) = if let Some(first) = original.next() {
+        (first.as_ref().len(), first)
+    } else {
+        return Err(Error::TooFewOriginalShards {
+            original_count,
+            original_received_count: 0,
+        });
+    };
+
+    let mut encoder = ReedSolomonEncoder::new(original_count, recovery_count, shard_bytes)?;
+
+    encoder.add_original_shard(first)?;
+    for original in original {
+        encoder.add_original_shard(original)?;
+    }
+
+    let result = encoder.encode()?;
+
+    Ok(result.recovery_iter().map(|s| s.to_vec()).collect())
+}
+
+/// Decodes in one go using [`ReedSolomonDecoder`],
+/// returning restored original shards with their indexes.
+///
+/// - Given shard indexes must be the same that were used in encoding.
+///
+/// See [simple usage](crate#simple-usage) for an example and more details.
+pub fn decode<O, R, OT, RT>(
+    original_count: usize,
+    recovery_count: usize,
+    original: O,
+    recovery: R,
+) -> Result<HashMap<usize, Vec<u8>>, Error>
+where
+    O: IntoIterator<Item = (usize, OT)>,
+    R: IntoIterator<Item = (usize, RT)>,
+    OT: AsRef<[u8]>,
+    RT: AsRef<[u8]>,
+{
+    if !ReedSolomonDecoder::supports(original_count, recovery_count) {
+        return Err(Error::UnsupportedShardCount {
+            original_count,
+            recovery_count,
+        });
+    }
+
+    let original = original.into_iter();
+    let mut recovery = recovery.into_iter();
+
+    let (shard_bytes, first_recovery) = if let Some(first_recovery) = recovery.next() {
+        (first_recovery.1.as_ref().len(), first_recovery)
+    } else {
+        // NO RECOVERY SHARDS
+
+        let original_received_count = original.count();
+        if original_received_count == original_count {
+            // Nothing to do, original data is complete.
+            return Ok(HashMap::new());
+        } else {
+            return Err(Error::NotEnoughShards {
+                original_count,
+                original_received_count,
+                recovery_received_count: 0,
+            });
+        }
+    };
+
+    let mut decoder = ReedSolomonDecoder::new(original_count, recovery_count, shard_bytes)?;
+
+    for (index, original) in original {
+        decoder.add_original_shard(index, original)?;
+    }
+
+    decoder.add_recovery_shard(first_recovery.0, first_recovery.1)?;
+    for (index, recovery) in recovery {
+        decoder.add_recovery_shard(index, recovery)?;
+    }
+
+    let mut result = HashMap::new();
+    for (index, original) in decoder.decode()?.restored_original_iter() {
+        result.insert(index, original.to_vec());
+    }
+
+    Ok(result)
+}
+
+// ======================================================================
+// TESTS
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::test_util;
+
+    // ============================================================
+    // ROUNDTRIP
+
+    #[test]
+    fn roundtrip() {
+        let original = test_util::generate_original(2, 1024, 123);
+
+        let recovery = encode(2, 3, &original).unwrap();
+
+        test_util::assert_hash(&recovery, test_util::LOW_2_3);
+
+        let restored = decode(2, 3, [(0, ""); 0], [(0, &recovery[0]), (1, &recovery[1])]).unwrap();
+
+        assert_eq!(restored.len(), 2);
+        assert_eq!(restored[&0], original[0]);
+        assert_eq!(restored[&1], original[1]);
+    }
+
+    // ============================================================
+    // encode
+
+    mod encode {
+        use super::super::*;
+        use crate::Error;
+
+        // ==================================================
+        // ERRORS
+
+        #[test]
+        fn different_shard_size_with_different_original_shard_sizes() {
+            assert_eq!(
+                encode(2, 1, &[&[0u8; 64] as &[u8], &[0u8; 128]]),
+                Err(Error::DifferentShardSize {
+                    shard_bytes: 64,
+                    got: 128
+                })
+            );
+        }
+
+        #[test]
+        fn invalid_shard_size_with_empty_shard() {
+            assert_eq!(
+                encode(1, 1, &[&[0u8; 0]]),
+                Err(Error::InvalidShardSize { shard_bytes: 0 })
+            );
+        }
+
+        #[test]
+        fn too_few_original_shards_with_zero_shards_given() {
+            assert_eq!(
+                encode(1, 1, &[] as &[&[u8]]),
+                Err(Error::TooFewOriginalShards {
+                    original_count: 1,
+                    original_received_count: 0,
+                })
+            );
+        }
+
+        #[test]
+        fn too_many_original_shards() {
+            assert_eq!(
+                encode(1, 1, &[[0u8; 64], [0u8; 64]]),
+                Err(Error::TooManyOriginalShards { original_count: 1 })
+            );
+        }
+
+        #[test]
+        fn unsupported_shard_count_with_zero_original_count() {
+            assert_eq!(
+                encode(0, 1, &[] as &[&[u8]]),
+                Err(Error::UnsupportedShardCount {
+                    original_count: 0,
+                    recovery_count: 1,
+                })
+            );
+        }
+
+        #[test]
+        fn unsupported_shard_count_with_zero_recovery_count() {
+            assert_eq!(
+                encode(1, 0, &[[0u8; 64]]),
+                Err(Error::UnsupportedShardCount {
+                    original_count: 1,
+                    recovery_count: 0,
+                })
+            );
+        }
+    }
+
+    // ============================================================
+    // decode
+
+    mod decode {
+        use super::super::*;
+        use crate::Error;
+
+        #[test]
+        fn no_original_missing_with_no_recovery_given() {
+            let restored = decode(1, 1, [(0, &[0u8; 64])], [(0, ""); 0]).unwrap();
+            assert!(restored.is_empty());
+        }
+
+        // ==================================================
+        // ERRORS
+
+        #[test]
+        fn different_shard_size_with_different_original_shard_sizes() {
+            assert_eq!(
+                decode(
+                    2,
+                    1,
+                    [(0, &[0u8; 64] as &[u8]), (1, &[0u8; 128])],
+                    [(0, &[0u8; 64])],
+                ),
+                Err(Error::DifferentShardSize {
+                    shard_bytes: 64,
+                    got: 128
+                })
+            );
+        }
+
+        #[test]
+        fn different_shard_size_with_different_recovery_shard_sizes() {
+            assert_eq!(
+                decode(
+                    1,
+                    2,
+                    [(0, &[0u8; 64])],
+                    [(0, &[0u8; 64] as &[u8]), (1, &[0u8; 128])],
+                ),
+                Err(Error::DifferentShardSize {
+                    shard_bytes: 64,
+                    got: 128
+                })
+            );
+        }
+
+        #[test]
+        fn different_shard_size_with_empty_original_shard() {
+            assert_eq!(
+                decode(1, 1, [(0, &[0u8; 0])], [(0, &[0u8; 64])]),
+                Err(Error::DifferentShardSize {
+                    shard_bytes: 64,
+                    got: 0
+                })
+            );
+        }
+
+        #[test]
+        fn duplicate_original_shard_index() {
+            assert_eq!(
+                decode(2, 1, [(0, &[0u8; 64]), (0, &[0u8; 64])], [(0, &[0u8; 64])]),
+                Err(Error::DuplicateOriginalShardIndex { index: 0 })
+            );
+        }
+
+        #[test]
+        fn duplicate_recovery_shard_index() {
+            assert_eq!(
+                decode(1, 2, [(0, &[0u8; 64])], [(0, &[0u8; 64]), (0, &[0u8; 64])]),
+                Err(Error::DuplicateRecoveryShardIndex { index: 0 })
+            );
+        }
+
+        #[test]
+        fn invalid_original_shard_index() {
+            assert_eq!(
+                decode(1, 1, [(1, &[0u8; 64])], [(0, &[0u8; 64])]),
+                Err(Error::InvalidOriginalShardIndex {
+                    original_count: 1,
+                    index: 1,
+                })
+            );
+        }
+
+        #[test]
+        fn invalid_recovery_shard_index() {
+            assert_eq!(
+                decode(1, 1, [(0, &[0u8; 64])], [(1, &[0u8; 64])]),
+                Err(Error::InvalidRecoveryShardIndex {
+                    recovery_count: 1,
+                    index: 1,
+                })
+            );
+        }
+
+        #[test]
+        fn invalid_shard_size_with_empty_recovery_shard() {
+            assert_eq!(
+                decode(1, 1, [(0, &[0u8; 64])], [(0, &[0u8; 0])]),
+                Err(Error::InvalidShardSize { shard_bytes: 0 })
+            );
+        }
+
+        #[test]
+        fn not_enough_shards() {
+            assert_eq!(
+                decode(1, 1, [(0, ""); 0], [(0, ""); 0]),
+                Err(Error::NotEnoughShards {
+                    original_count: 1,
+                    original_received_count: 0,
+                    recovery_received_count: 0,
+                })
+            );
+        }
+
+        #[test]
+        fn unsupported_shard_count_with_zero_original_count() {
+            assert_eq!(
+                decode(0, 1, [(0, ""); 0], [(0, ""); 0]),
+                Err(Error::UnsupportedShardCount {
+                    original_count: 0,
+                    recovery_count: 1,
+                })
+            );
+        }
+
+        #[test]
+        fn unsupported_shard_count_with_zero_recovery_count() {
+            assert_eq!(
+                decode(1, 0, [(0, ""); 0], [(0, ""); 0]),
+                Err(Error::UnsupportedShardCount {
+                    original_count: 1,
+                    recovery_count: 0,
+                })
+            );
+        }
+    }
+}
diff --git a/src/rate.rs b/src/rate.rs
new file mode 100644
index 0000000..222c6c5
--- /dev/null
+++ b/src/rate.rs
@@ -0,0 +1,250 @@
+//! Advanced encoding/decoding using chosen [`Engine`] and [`Rate`].
+//!
+//! **This is an advanced module which is not needed for [simple usage] or [basic usage].**
+//!
+//! This module is relevant if you want to
+//! - encode/decode using other [`Engine`] than [`DefaultEngine`].
+//! - re-use working space of one encoder/decoder in another.
+//! - understand/benchmark/test high or low rate directly.
+//!
+//! # Rates
+//!
+//! See [algorithm > Rate] for details about high/low rate.
+//!
+//! - [`DefaultRate`], [`DefaultRateEncoder`], [`DefaultRateDecoder`]
+//!     - Encoding/decoding using high or low rate as appropriate.
+//!     - These are basically same as [`ReedSolomonEncoder`]
+//!       and [`ReedSolomonDecoder`] except with slightly different API
+//!       which allows specifying [`Engine`] and working space.
+//! - [`HighRate`], [`HighRateEncoder`], [`HighRateDecoder`]
+//!     - Encoding/decoding using only high rate.
+//! - [`LowRate`], [`LowRateEncoder`], [`LowRateDecoder`]
+//!     - Encoding/decoding using only low rate.
+//!
+//! [simple usage]: crate#simple-usage
+//! [basic usage]: crate#basic-usage
+//! [algorithm > Rate]: crate::algorithm#rate
+//! [`ReedSolomonEncoder`]: crate::ReedSolomonEncoder
+//! [`ReedSolomonDecoder`]: crate::ReedSolomonDecoder
+//! [`DefaultEngine`]: crate::engine::DefaultEngine
+
+use crate::{engine::Engine, DecoderResult, EncoderResult, Error};
+
+pub use self::{
+    decoder_work::DecoderWork,
+    encoder_work::EncoderWork,
+    rate_default::{DefaultRate, DefaultRateDecoder, DefaultRateEncoder},
+    rate_high::{HighRate, HighRateDecoder, HighRateEncoder},
+    rate_low::{LowRate, LowRateDecoder, LowRateEncoder},
+};
+
+mod decoder_work;
+mod encoder_work;
+mod rate_default;
+mod rate_high;
+mod rate_low;
+
+// ======================================================================
+// Rate - PUBLIC
+
+/// Reed-Solomon encoder/decoder generator using specific rate.
+pub trait Rate<E: Engine> {
+    // ============================================================
+    // REQUIRED
+
+    /// Encoder of this rate.
+    type RateEncoder: RateEncoder<E>;
+    /// Decoder of this rate.
+    type RateDecoder: RateDecoder<E>;
+
+    /// Returns `true` if given `original_count` / `recovery_count`
+    /// combination is supported.
+    fn supports(original_count: usize, recovery_count: usize) -> bool;
+
+    // ============================================================
+    // PROVIDED
+
+    /// Creates new encoder. This is same as [`RateEncoder::new`].
+    fn encoder(
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+        engine: E,
+        work: Option<EncoderWork>,
+    ) -> Result<Self::RateEncoder, Error> {
+        Self::RateEncoder::new(original_count, recovery_count, shard_bytes, engine, work)
+    }
+
+    /// Creates new decoder. This is same as [`RateDecoder::new`].
+    fn decoder(
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+        engine: E,
+        work: Option<DecoderWork>,
+    ) -> Result<Self::RateDecoder, Error> {
+        Self::RateDecoder::new(original_count, recovery_count, shard_bytes, engine, work)
+    }
+
+    /// Returns `Ok(())` if given `original_count` / `recovery_count`
+    /// combination is supported and given `shard_bytes` is valid.
+    fn validate(
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+    ) -> Result<(), Error> {
+        if !Self::supports(original_count, recovery_count) {
+            Err(Error::UnsupportedShardCount {
+                original_count,
+                recovery_count,
+            })
+        } else if shard_bytes == 0 || shard_bytes & 63 != 0 {
+            Err(Error::InvalidShardSize { shard_bytes })
+        } else {
+            Ok(())
+        }
+    }
+}
+
+// ======================================================================
+// RateEncoder - PUBLIC
+
+/// Reed-Solomon encoder using specific rate.
+pub trait RateEncoder<E: Engine>
+where
+    Self: Sized,
+{
+    // ============================================================
+    // REQUIRED
+
+    /// Rate of this encoder.
+    type Rate: Rate<E>;
+
+    /// Like [`ReedSolomonEncoder::add_original_shard`](crate::ReedSolomonEncoder::add_original_shard).
+    fn add_original_shard<T: AsRef<[u8]>>(&mut self, original_shard: T) -> Result<(), Error>;
+
+    /// Like [`ReedSolomonEncoder::encode`](crate::ReedSolomonEncoder::encode).
+    fn encode(&mut self) -> Result<EncoderResult, Error>;
+
+    /// Consumes this encoder returning its [`Engine`] and [`EncoderWork`]
+    /// so that they can be re-used by another encoder.
+    fn into_parts(self) -> (E, EncoderWork);
+
+    /// Like [`ReedSolomonEncoder::new`](crate::ReedSolomonEncoder::new)
+    /// with [`Engine`] to use and optional working space to be re-used.
+    fn new(
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+        engine: E,
+        work: Option<EncoderWork>,
+    ) -> Result<Self, Error>;
+
+    /// Like [`ReedSolomonEncoder::reset`](crate::ReedSolomonEncoder::reset).
+    fn reset(
+        &mut self,
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+    ) -> Result<(), Error>;
+
+    // ============================================================
+    // PROVIDED
+
+    /// Returns `true` if given `original_count` / `recovery_count`
+    /// combination is supported.
+    ///
+    /// This is same as [`Rate::supports`].
+    fn supports(original_count: usize, recovery_count: usize) -> bool {
+        Self::Rate::supports(original_count, recovery_count)
+    }
+
+    /// Returns `Ok(())` if given `original_count` / `recovery_count`
+    /// combination is supported and given `shard_bytes` is valid.
+    ///
+    /// This is same as [`Rate::validate`].
+    fn validate(
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+    ) -> Result<(), Error> {
+        Self::Rate::validate(original_count, recovery_count, shard_bytes)
+    }
+}
+
+// ======================================================================
+// RateDecoder - PUBLIC
+
+/// Reed-Solomon decoder using specific rate.
+pub trait RateDecoder<E: Engine>
+where
+    Self: Sized,
+{
+    // ============================================================
+    // REQUIRED
+
+    /// Rate of this decoder.
+    type Rate: Rate<E>;
+
+    /// Like [`ReedSolomonDecoder::add_original_shard`](crate::ReedSolomonDecoder::add_original_shard).
+    fn add_original_shard<T: AsRef<[u8]>>(
+        &mut self,
+        index: usize,
+        original_shard: T,
+    ) -> Result<(), Error>;
+
+    /// Like [`ReedSolomonDecoder::add_recovery_shard`](crate::ReedSolomonDecoder::add_recovery_shard).
+    fn add_recovery_shard<T: AsRef<[u8]>>(
+        &mut self,
+        index: usize,
+        recovery_shard: T,
+    ) -> Result<(), Error>;
+
+    /// Like [`ReedSolomonDecoder::decode`](crate::ReedSolomonDecoder::decode).
+    fn decode(&mut self) -> Result<DecoderResult, Error>;
+
+    /// Consumes this decoder returning its [`Engine`] and [`DecoderWork`]
+    /// so that they can be re-used by another decoder.
+    fn into_parts(self) -> (E, DecoderWork);
+
+    /// Like [`ReedSolomonDecoder::new`](crate::ReedSolomonDecoder::new)
+    /// with [`Engine`] to use and optional working space to be re-used.
+    fn new(
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+        engine: E,
+        work: Option<DecoderWork>,
+    ) -> Result<Self, Error>;
+
+    /// Like [`ReedSolomonDecoder::reset`](crate::ReedSolomonDecoder::reset).
+    fn reset(
+        &mut self,
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+    ) -> Result<(), Error>;
+
+    // ============================================================
+    // PROVIDED
+
+    /// Returns `true` if given `original_count` / `recovery_count`
+    /// combination is supported.
+    ///
+    /// This is same as [`Rate::supports`].
+    fn supports(original_count: usize, recovery_count: usize) -> bool {
+        Self::Rate::supports(original_count, recovery_count)
+    }
+
+    /// Returns `Ok(())` if given `original_count` / `recovery_count`
+    /// combination is supported and given `shard_bytes` is valid.
+    ///
+    /// This is same as [`Rate::validate`].
+    fn validate(
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+    ) -> Result<(), Error> {
+        Self::Rate::validate(original_count, recovery_count, shard_bytes)
+    }
+}
diff --git a/src/rate/decoder_work.rs b/src/rate/decoder_work.rs
new file mode 100644
index 0000000..f5145ad
--- /dev/null
+++ b/src/rate/decoder_work.rs
@@ -0,0 +1,194 @@
+use fixedbitset::FixedBitSet;
+
+use crate::{
+    engine::{Shards, ShardsRefMut},
+    Error,
+};
+
+// ======================================================================
+// DecoderWork - PUBLIC
+
+/// Working space for [`RateDecoder`].
+///
+/// [`RateDecoder`]: crate::rate::RateDecoder
+pub struct DecoderWork {
+    original_count: usize,
+    recovery_count: usize,
+    shard_bytes: usize,
+
+    original_base_pos: usize,
+    recovery_base_pos: usize,
+
+    original_received_count: usize,
+    recovery_received_count: usize,
+    // May contain extra zero bits.
+    received: FixedBitSet,
+    shards: Shards,
+}
+
+impl DecoderWork {
+    /// Creates new [`DecoderWork`] which initially
+    /// has no working space allocated.
+    pub fn new() -> Self {
+        Self {
+            original_count: 0,
+            recovery_count: 0,
+            shard_bytes: 0,
+
+            original_base_pos: 0,
+            recovery_base_pos: 0,
+
+            original_received_count: 0,
+            recovery_received_count: 0,
+            received: FixedBitSet::new(),
+            shards: Shards::new(),
+        }
+    }
+}
+
+// ======================================================================
+// DecoderWork - IMPL Default
+
+impl Default for DecoderWork {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+// ======================================================================
+// DecoderWork - CRATE
+
+impl DecoderWork {
+    pub(crate) fn add_original_shard<T: AsRef<[u8]>>(
+        &mut self,
+        index: usize,
+        original_shard: T,
+    ) -> Result<(), Error> {
+        let pos = self.original_base_pos + index;
+        let original_shard = original_shard.as_ref();
+
+        if index >= self.original_count {
+            Err(Error::InvalidOriginalShardIndex {
+                original_count: self.original_count,
+                index,
+            })
+        } else if self.received[pos] {
+            Err(Error::DuplicateOriginalShardIndex { index })
+        } else if original_shard.len() != self.shard_bytes {
+            Err(Error::DifferentShardSize {
+                shard_bytes: self.shard_bytes,
+                got: original_shard.len(),
+            })
+        } else {
+            self.shards[pos].copy_from_slice(original_shard);
+            self.original_received_count += 1;
+            self.received.set(pos, true);
+            Ok(())
+        }
+    }
+
+    pub(crate) fn add_recovery_shard<T: AsRef<[u8]>>(
+        &mut self,
+        index: usize,
+        recovery_shard: T,
+    ) -> Result<(), Error> {
+        let pos = self.recovery_base_pos + index;
+        let recovery_shard = recovery_shard.as_ref();
+
+        if index >= self.recovery_count {
+            Err(Error::InvalidRecoveryShardIndex {
+                recovery_count: self.recovery_count,
+                index,
+            })
+        } else if self.received[pos] {
+            Err(Error::DuplicateRecoveryShardIndex { index })
+        } else if recovery_shard.len() != self.shard_bytes {
+            Err(Error::DifferentShardSize {
+                shard_bytes: self.shard_bytes,
+                got: recovery_shard.len(),
+            })
+        } else {
+            self.shards[pos].copy_from_slice(recovery_shard);
+            self.recovery_received_count += 1;
+            self.received.set(pos, true);
+            Ok(())
+        }
+    }
+
+    // Begin decode.
+    // - Returned `FixedBitSet` may contain extra zero bits.
+    pub(crate) fn decode_begin(
+        &mut self,
+    ) -> Result<Option<(ShardsRefMut, usize, usize, &FixedBitSet)>, Error> {
+        if self.original_received_count + self.recovery_received_count < self.original_count {
+            Err(Error::NotEnoughShards {
+                original_count: self.original_count,
+                original_received_count: self.original_received_count,
+                recovery_received_count: self.recovery_received_count,
+            })
+        } else if self.original_received_count == self.original_count {
+            Ok(None)
+        } else {
+            Ok(Some((
+                self.shards.as_ref_mut(),
+                self.original_count,
+                self.recovery_count,
+                &self.received,
+            )))
+        }
+    }
+
+    pub(crate) fn original_count(&self) -> usize {
+        self.original_count
+    }
+
+    pub(crate) fn reset(
+        &mut self,
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+
+        original_base_pos: usize,
+        recovery_base_pos: usize,
+        work_count: usize,
+    ) {
+        self.original_count = original_count;
+        self.recovery_count = recovery_count;
+        self.shard_bytes = shard_bytes;
+
+        self.original_base_pos = original_base_pos;
+        self.recovery_base_pos = recovery_base_pos;
+
+        self.original_received_count = 0;
+        self.recovery_received_count = 0;
+
+        let max_received_pos = std::cmp::max(
+            original_base_pos + original_count,
+            recovery_base_pos + recovery_count,
+        );
+
+        self.received.clear();
+        if self.received.len() < max_received_pos {
+            self.received.grow(max_received_pos);
+        }
+
+        self.shards.resize(work_count, shard_bytes);
+    }
+
+    pub(crate) fn reset_received(&mut self) {
+        self.original_received_count = 0;
+        self.recovery_received_count = 0;
+        self.received.clear();
+    }
+
+    // This must only be called by `DecoderResult`.
+    pub(crate) fn restored_original(&self, index: usize) -> Option<&[u8]> {
+        let pos = self.original_base_pos + index;
+
+        if index < self.original_count && !self.received[pos] {
+            Some(&self.shards[pos])
+        } else {
+            None
+        }
+    }
+}
diff --git a/src/rate/encoder_work.rs b/src/rate/encoder_work.rs
new file mode 100644
index 0000000..2276009
--- /dev/null
+++ b/src/rate/encoder_work.rs
@@ -0,0 +1,113 @@
+use crate::{
+    engine::{Shards, ShardsRefMut},
+    Error,
+};
+
+// ======================================================================
+// EncoderWork - PUBLIC
+
+/// Working space for [`RateEncoder`].
+///
+/// [`RateEncoder`]: crate::rate::RateEncoder
+pub struct EncoderWork {
+    original_count: usize,
+    recovery_count: usize,
+    shard_bytes: usize,
+
+    original_received_count: usize,
+    shards: Shards,
+}
+
+impl EncoderWork {
+    /// Creates new [`EncoderWork`] which initially
+    /// has no working space allocated.
+    pub fn new() -> Self {
+        Self {
+            original_count: 0,
+            recovery_count: 0,
+            shard_bytes: 0,
+
+            original_received_count: 0,
+            shards: Shards::new(),
+        }
+    }
+}
+
+// ======================================================================
+// EncoderWork - IMPL Default
+
+impl Default for EncoderWork {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+// ======================================================================
+// EncoderWork - CRATE
+
+impl EncoderWork {
+    pub(crate) fn add_original_shard<T: AsRef<[u8]>>(
+        &mut self,
+        original_shard: T,
+    ) -> Result<(), Error> {
+        let original_shard = original_shard.as_ref();
+
+        if self.original_received_count == self.original_count {
+            Err(Error::TooManyOriginalShards {
+                original_count: self.original_count,
+            })
+        } else if original_shard.len() != self.shard_bytes {
+            Err(Error::DifferentShardSize {
+                shard_bytes: self.shard_bytes,
+                got: original_shard.len(),
+            })
+        } else {
+            self.shards[self.original_received_count].copy_from_slice(original_shard);
+            self.original_received_count += 1;
+            Ok(())
+        }
+    }
+
+    pub(crate) fn encode_begin(&mut self) -> Result<(ShardsRefMut, usize, usize), Error> {
+        if self.original_received_count != self.original_count {
+            Err(Error::TooFewOriginalShards {
+                original_count: self.original_count,
+                original_received_count: self.original_received_count,
+            })
+        } else {
+            Ok((
+                self.shards.as_ref_mut(),
+                self.original_count,
+                self.recovery_count,
+            ))
+        }
+    }
+
+    // This must only be called by `EncoderResult`.
+    pub(crate) fn recovery(&self, index: usize) -> Option<&[u8]> {
+        if index < self.recovery_count {
+            Some(&self.shards[index])
+        } else {
+            None
+        }
+    }
+
+    pub(crate) fn reset(
+        &mut self,
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+        work_count: usize,
+    ) {
+        self.original_count = original_count;
+        self.recovery_count = recovery_count;
+        self.shard_bytes = shard_bytes;
+
+        self.original_received_count = 0;
+        self.shards.resize(work_count, shard_bytes);
+    }
+
+    pub(crate) fn reset_received(&mut self) {
+        self.original_received_count = 0;
+    }
+}
diff --git a/src/rate/rate_default.rs b/src/rate/rate_default.rs
new file mode 100644
index 0000000..04bd081
--- /dev/null
+++ b/src/rate/rate_default.rs
@@ -0,0 +1,479 @@
+use std::{cmp::Ordering, marker::PhantomData};
+
+use crate::{
+    engine::{Engine, GF_ORDER},
+    rate::{
+        DecoderWork, EncoderWork, HighRateDecoder, HighRateEncoder, LowRateDecoder, LowRateEncoder,
+        Rate, RateDecoder, RateEncoder,
+    },
+    DecoderResult, EncoderResult, Error,
+};
+
+// ======================================================================
+// FUNCTIONS - PRIVATE
+
+fn use_high_rate(original_count: usize, recovery_count: usize) -> Result<bool, Error> {
+    if original_count > GF_ORDER || recovery_count > GF_ORDER {
+        return Err(Error::UnsupportedShardCount {
+            original_count,
+            recovery_count,
+        });
+    }
+
+    let original_count_pow2 = original_count.next_power_of_two();
+    let recovery_count_pow2 = recovery_count.next_power_of_two();
+
+    let smaller_pow2 = std::cmp::min(original_count_pow2, recovery_count_pow2);
+    let larger = std::cmp::max(original_count, recovery_count);
+
+    if original_count == 0 || recovery_count == 0 || smaller_pow2 + larger > GF_ORDER {
+        return Err(Error::UnsupportedShardCount {
+            original_count,
+            recovery_count,
+        });
+    }
+
+    match original_count_pow2.cmp(&recovery_count_pow2) {
+        Ordering::Less => {
+            // The "correct" rate is generally faster here,
+            // and also must be used if `recovery_count > 32768`.
+
+            Ok(false)
+        }
+
+        Ordering::Greater => {
+            // The "correct" rate is generally faster here,
+            // and also must be used if `original_count > 32768`.
+
+            Ok(true)
+        }
+
+        Ordering::Equal => {
+            // Here counter-intuitively the "wrong" rate is generally faster
+            // in decoding if `original_count` and `recovery_count` differ a lot.
+
+            if original_count <= recovery_count {
+                // Using the "wrong" rate on purpose.
+                Ok(true)
+            } else {
+                // Using the "wrong" rate on purpose.
+                Ok(false)
+            }
+        }
+    }
+}
+
+// ======================================================================
+// DefaultRate - PUBLIC
+
+/// Reed-Solomon encoder/decoder generator using high or low rate as appropriate.
+pub struct DefaultRate<E: Engine>(PhantomData<E>);
+
+impl<E: Engine> Rate<E> for DefaultRate<E> {
+    type RateEncoder = DefaultRateEncoder<E>;
+    type RateDecoder = DefaultRateDecoder<E>;
+
+    fn supports(original_count: usize, recovery_count: usize) -> bool {
+        use_high_rate(original_count, recovery_count).is_ok()
+    }
+}
+
+// ======================================================================
+// InnerEncoder - PRIVATE
+
+enum InnerEncoder<E: Engine> {
+    High(HighRateEncoder<E>),
+    Low(LowRateEncoder<E>),
+
+    // This is only used temporarily during `reset`, never anywhere else.
+    None,
+}
+
+impl<E: Engine> Default for InnerEncoder<E> {
+    fn default() -> Self {
+        InnerEncoder::None
+    }
+}
+
+// ======================================================================
+// DefaultRateEncoder - PUBLIC
+
+/// Reed-Solomon encoder using high or low rate as appropriate.
+///
+/// This is basically same as [`ReedSolomonEncoder`]
+/// except with slightly different API which allows
+/// specifying [`Engine`] and [`EncoderWork`].
+///
+/// [`ReedSolomonEncoder`]: crate::ReedSolomonEncoder
+pub struct DefaultRateEncoder<E: Engine>(InnerEncoder<E>);
+
+impl<E: Engine> RateEncoder<E> for DefaultRateEncoder<E> {
+    type Rate = DefaultRate<E>;
+
+    fn add_original_shard<T: AsRef<[u8]>>(&mut self, original_shard: T) -> Result<(), Error> {
+        match &mut self.0 {
+            InnerEncoder::High(high) => high.add_original_shard(original_shard),
+            InnerEncoder::Low(low) => low.add_original_shard(original_shard),
+            InnerEncoder::None => unreachable!(),
+        }
+    }
+
+    fn encode(&mut self) -> Result<EncoderResult, Error> {
+        match &mut self.0 {
+            InnerEncoder::High(high) => high.encode(),
+            InnerEncoder::Low(low) => low.encode(),
+            InnerEncoder::None => unreachable!(),
+        }
+    }
+
+    fn into_parts(self) -> (E, EncoderWork) {
+        match self.0 {
+            InnerEncoder::High(high) => high.into_parts(),
+            InnerEncoder::Low(low) => low.into_parts(),
+            InnerEncoder::None => unreachable!(),
+        }
+    }
+
+    fn new(
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+        engine: E,
+        work: Option<EncoderWork>,
+    ) -> Result<Self, Error> {
+        let inner = if use_high_rate(original_count, recovery_count)? {
+            InnerEncoder::High(HighRateEncoder::new(
+                original_count,
+                recovery_count,
+                shard_bytes,
+                engine,
+                work,
+            )?)
+        } else {
+            InnerEncoder::Low(LowRateEncoder::new(
+                original_count,
+                recovery_count,
+                shard_bytes,
+                engine,
+                work,
+            )?)
+        };
+
+        Ok(Self(inner))
+    }
+
+    fn reset(
+        &mut self,
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+    ) -> Result<(), Error> {
+        let new_rate_is_high = use_high_rate(original_count, recovery_count)?;
+
+        self.0 = match std::mem::take(&mut self.0) {
+            InnerEncoder::High(mut high) => {
+                if new_rate_is_high {
+                    high.reset(original_count, recovery_count, shard_bytes)?;
+                    InnerEncoder::High(high)
+                } else {
+                    let (engine, work) = high.into_parts();
+                    InnerEncoder::Low(LowRateEncoder::new(
+                        original_count,
+                        recovery_count,
+                        shard_bytes,
+                        engine,
+                        Some(work),
+                    )?)
+                }
+            }
+
+            InnerEncoder::Low(mut low) => {
+                if new_rate_is_high {
+                    let (engine, work) = low.into_parts();
+                    InnerEncoder::High(HighRateEncoder::new(
+                        original_count,
+                        recovery_count,
+                        shard_bytes,
+                        engine,
+                        Some(work),
+                    )?)
+                } else {
+                    low.reset(original_count, recovery_count, shard_bytes)?;
+                    InnerEncoder::Low(low)
+                }
+            }
+
+            InnerEncoder::None => unreachable!(),
+        };
+
+        Ok(())
+    }
+}
+
+// ======================================================================
+// InnerDecoder - PRIVATE
+
+enum InnerDecoder<E: Engine> {
+    High(HighRateDecoder<E>),
+    Low(LowRateDecoder<E>),
+
+    // This is only used temporarily during `reset`, never anywhere else.
+    None,
+}
+
+impl<E: Engine> Default for InnerDecoder<E> {
+    fn default() -> Self {
+        InnerDecoder::None
+    }
+}
+
+// ======================================================================
+// DefaultRateDecoder - PUBLIC
+
+/// Reed-Solomon decoder using high or low rate as appropriate.
+///
+/// This is basically same as [`ReedSolomonDecoder`]
+/// except with slightly different API which allows
+/// specifying [`Engine`] and [`DecoderWork`].
+///
+/// [`ReedSolomonDecoder`]: crate::ReedSolomonDecoder
+pub struct DefaultRateDecoder<E: Engine>(InnerDecoder<E>);
+
+impl<E: Engine> RateDecoder<E> for DefaultRateDecoder<E> {
+    type Rate = DefaultRate<E>;
+
+    fn add_original_shard<T: AsRef<[u8]>>(
+        &mut self,
+        index: usize,
+        original_shard: T,
+    ) -> Result<(), Error> {
+        match &mut self.0 {
+            InnerDecoder::High(high) => high.add_original_shard(index, original_shard),
+            InnerDecoder::Low(low) => low.add_original_shard(index, original_shard),
+            InnerDecoder::None => unreachable!(),
+        }
+    }
+
+    fn add_recovery_shard<T: AsRef<[u8]>>(
+        &mut self,
+        index: usize,
+        recovery_shard: T,
+    ) -> Result<(), Error> {
+        match &mut self.0 {
+            InnerDecoder::High(high) => high.add_recovery_shard(index, recovery_shard),
+            InnerDecoder::Low(low) => low.add_recovery_shard(index, recovery_shard),
+            InnerDecoder::None => unreachable!(),
+        }
+    }
+
+    fn decode(&mut self) -> Result<DecoderResult, Error> {
+        match &mut self.0 {
+            InnerDecoder::High(high) => high.decode(),
+            InnerDecoder::Low(low) => low.decode(),
+            InnerDecoder::None => unreachable!(),
+        }
+    }
+
+    fn into_parts(self) -> (E, DecoderWork) {
+        match self.0 {
+            InnerDecoder::High(high) => high.into_parts(),
+            InnerDecoder::Low(low) => low.into_parts(),
+            InnerDecoder::None => unreachable!(),
+        }
+    }
+
+    fn new(
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+        engine: E,
+        work: Option<DecoderWork>,
+    ) -> Result<Self, Error> {
+        let inner = if use_high_rate(original_count, recovery_count)? {
+            InnerDecoder::High(HighRateDecoder::new(
+                original_count,
+                recovery_count,
+                shard_bytes,
+                engine,
+                work,
+            )?)
+        } else {
+            InnerDecoder::Low(LowRateDecoder::new(
+                original_count,
+                recovery_count,
+                shard_bytes,
+                engine,
+                work,
+            )?)
+        };
+
+        Ok(Self(inner))
+    }
+
+    fn reset(
+        &mut self,
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+    ) -> Result<(), Error> {
+        let new_rate_is_high = use_high_rate(original_count, recovery_count)?;
+
+        self.0 = match std::mem::take(&mut self.0) {
+            InnerDecoder::High(mut high) => {
+                if new_rate_is_high {
+                    high.reset(original_count, recovery_count, shard_bytes)?;
+                    InnerDecoder::High(high)
+                } else {
+                    let (engine, work) = high.into_parts();
+                    InnerDecoder::Low(LowRateDecoder::new(
+                        original_count,
+                        recovery_count,
+                        shard_bytes,
+                        engine,
+                        Some(work),
+                    )?)
+                }
+            }
+
+            InnerDecoder::Low(mut low) => {
+                if new_rate_is_high {
+                    let (engine, work) = low.into_parts();
+                    InnerDecoder::High(HighRateDecoder::new(
+                        original_count,
+                        recovery_count,
+                        shard_bytes,
+                        engine,
+                        Some(work),
+                    )?)
+                } else {
+                    low.reset(original_count, recovery_count, shard_bytes)?;
+                    InnerDecoder::Low(low)
+                }
+            }
+
+            InnerDecoder::None => unreachable!(),
+        };
+
+        Ok(())
+    }
+}
+
+// ======================================================================
+// TESTS
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::test_util;
+
+    // ============================================================
+    // ROUNDTRIPS - SINGLE ROUND
+
+    #[test]
+    fn roundtrips_tiny() {
+        for (original_count, recovery_count, seed, recovery_hash) in test_util::DEFAULT_TINY {
+            roundtrip_single!(
+                DefaultRate,
+                *original_count,
+                *recovery_count,
+                1024,
+                recovery_hash,
+                &[*recovery_count..*original_count],
+                &[0..std::cmp::min(*original_count, *recovery_count)],
+                *seed,
+            );
+        }
+    }
+
+    // ============================================================
+    // ROUNDTRIPS - TWO ROUNDS
+
+    #[test]
+    fn two_rounds_implicit_reset() {
+        roundtrip_two_rounds!(
+            DefaultRate,
+            false,
+            (2, 3, 1024, test_util::LOW_2_3, &[], &[0, 2], 123),
+            (2, 3, 1024, test_util::LOW_2_3_223, &[0], &[1], 223),
+        );
+    }
+
+    #[test]
+    fn two_rounds_reset_high_to_high() {
+        roundtrip_two_rounds!(
+            DefaultRate,
+            true,
+            (3, 2, 1024, test_util::HIGH_3_2, &[1], &[0, 1], 132),
+            (5, 3, 1024, test_util::HIGH_5_3, &[1, 3], &[0, 1, 2], 153),
+        );
+    }
+
+    #[test]
+    fn two_rounds_reset_high_to_low() {
+        roundtrip_two_rounds!(
+            DefaultRate,
+            true,
+            (3, 2, 1024, test_util::HIGH_3_2, &[1], &[0, 1], 132),
+            (2, 3, 1024, test_util::LOW_2_3, &[], &[0, 2], 123),
+        );
+    }
+
+    #[test]
+    fn two_rounds_reset_low_to_high() {
+        roundtrip_two_rounds!(
+            DefaultRate,
+            true,
+            (2, 3, 1024, test_util::LOW_2_3, &[], &[0, 1], 123),
+            (3, 2, 1024, test_util::HIGH_3_2, &[1], &[0, 1], 132),
+        );
+    }
+
+    #[test]
+    fn two_rounds_reset_low_to_low() {
+        roundtrip_two_rounds!(
+            DefaultRate,
+            true,
+            (2, 3, 1024, test_util::LOW_2_3, &[], &[0, 2], 123),
+            (3, 5, 1024, test_util::LOW_3_5, &[], &[0, 2, 4], 135),
+        );
+    }
+
+    // ============================================================
+    // use_high_rate
+
+    #[test]
+    fn use_high_rate() {
+        fn err(original_count: usize, recovery_count: usize) -> Result<bool, Error> {
+            Err(Error::UnsupportedShardCount {
+                original_count,
+                recovery_count,
+            })
+        }
+
+        for (original_count, recovery_count, expected) in [
+            (0, 1, err(0, 1)),
+            (1, 0, err(1, 0)),
+            // CORRECT/WRONG RATE
+            (3, 3, Ok(true)),
+            (3, 4, Ok(true)),
+            (3, 5, Ok(false)),
+            (4, 3, Ok(false)),
+            (5, 3, Ok(true)),
+            // LOW RATE LIMIT
+            (4096, 61440, Ok(false)),
+            (4096, 61441, err(4096, 61441)),
+            (4097, 61440, err(4097, 61440)),
+            // HIGH RATE LIMIT
+            (61440, 4096, Ok(true)),
+            (61440, 4097, err(61440, 4097)),
+            (61441, 4096, err(61441, 4096)),
+            // OVERFLOW CHECK
+            (usize::MAX, usize::MAX, err(usize::MAX, usize::MAX)),
+        ] {
+            assert_eq!(
+                super::use_high_rate(original_count, recovery_count),
+                expected
+            );
+        }
+    }
+}
diff --git a/src/rate/rate_high.rs b/src/rate/rate_high.rs
new file mode 100644
index 0000000..f62603d
--- /dev/null
+++ b/src/rate/rate_high.rs
@@ -0,0 +1,607 @@
+use std::marker::PhantomData;
+
+use crate::{
+    engine::{self, Engine, GF_MODULUS, GF_ORDER},
+    rate::{DecoderWork, EncoderWork, Rate, RateDecoder, RateEncoder},
+    DecoderResult, EncoderResult, Error,
+};
+
+// ======================================================================
+// HighRate - PUBLIC
+
+/// Reed-Solomon encoder/decoder generator using only high rate.
+pub struct HighRate<E: Engine>(PhantomData<E>);
+
+impl<E: Engine> Rate<E> for HighRate<E> {
+    type RateEncoder = HighRateEncoder<E>;
+    type RateDecoder = HighRateDecoder<E>;
+
+    fn supports(original_count: usize, recovery_count: usize) -> bool {
+        original_count > 0
+            && recovery_count > 0
+            && original_count < GF_ORDER
+            && recovery_count < GF_ORDER
+            && recovery_count.next_power_of_two() + original_count <= GF_ORDER
+    }
+}
+
+// ======================================================================
+// HighRateEncoder - PUBLIC
+
+/// Reed-Solomon encoder using only high rate.
+pub struct HighRateEncoder<E: Engine> {
+    engine: E,
+    work: EncoderWork,
+}
+
+impl<E: Engine> RateEncoder<E> for HighRateEncoder<E> {
+    type Rate = HighRate<E>;
+
+    fn add_original_shard<T: AsRef<[u8]>>(&mut self, original_shard: T) -> Result<(), Error> {
+        self.work.add_original_shard(original_shard)
+    }
+
+    fn encode(&mut self) -> Result<EncoderResult, Error> {
+        let (mut work, original_count, recovery_count) = self.work.encode_begin()?;
+        let chunk_size = recovery_count.next_power_of_two();
+        let engine = &self.engine;
+
+        // FIRST CHUNK
+
+        let first_count = std::cmp::min(original_count, chunk_size);
+
+        work.zero(first_count..chunk_size);
+        engine.ifft_skew_end(&mut work, 0, chunk_size, first_count);
+
+        if original_count > chunk_size {
+            // FULL CHUNKS
+
+            let mut chunk_start = chunk_size;
+            while chunk_start + chunk_size <= original_count {
+                engine.ifft_skew_end(&mut work, chunk_start, chunk_size, chunk_size);
+                E::xor_within(&mut work, 0, chunk_start, chunk_size);
+                chunk_start += chunk_size;
+            }
+
+            // FINAL PARTIAL CHUNK
+
+            let last_count = original_count % chunk_size;
+            if last_count > 0 {
+                work.zero(chunk_start + last_count..);
+                engine.ifft_skew_end(&mut work, chunk_start, chunk_size, last_count);
+                E::xor_within(&mut work, 0, chunk_start, chunk_size);
+            }
+        }
+
+        // FFT
+
+        engine.fft(&mut work, 0, chunk_size, recovery_count, 0);
+
+        // DONE
+
+        Ok(EncoderResult::new(&mut self.work))
+    }
+
+    fn into_parts(self) -> (E, EncoderWork) {
+        (self.engine, self.work)
+    }
+
+    fn new(
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+        engine: E,
+        work: Option<EncoderWork>,
+    ) -> Result<Self, Error> {
+        let mut work = work.unwrap_or_default();
+        Self::reset_work(original_count, recovery_count, shard_bytes, &mut work)?;
+        Ok(Self { work, engine })
+    }
+
+    fn reset(
+        &mut self,
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+    ) -> Result<(), Error> {
+        Self::reset_work(original_count, recovery_count, shard_bytes, &mut self.work)
+    }
+}
+
+// ======================================================================
+// HighRateEncoder - PRIVATE
+
+impl<E: Engine> HighRateEncoder<E> {
+    fn reset_work(
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+        work: &mut EncoderWork,
+    ) -> Result<(), Error> {
+        Self::validate(original_count, recovery_count, shard_bytes)?;
+        work.reset(
+            original_count,
+            recovery_count,
+            shard_bytes,
+            Self::work_count(original_count, recovery_count),
+        );
+        Ok(())
+    }
+
+    fn work_count(original_count: usize, recovery_count: usize) -> usize {
+        debug_assert!(Self::supports(original_count, recovery_count));
+
+        let chunk_size = recovery_count.next_power_of_two();
+
+        engine::checked_next_multiple_of(original_count, chunk_size).unwrap()
+    }
+}
+
+// ======================================================================
+// HighRateDecoder - PUBLIC
+
+/// Reed-Solomon decoder using only high rate.
+pub struct HighRateDecoder<E: Engine> {
+    engine: E,
+    work: DecoderWork,
+}
+
+impl<E: Engine> RateDecoder<E> for HighRateDecoder<E> {
+    type Rate = HighRate<E>;
+
+    fn add_original_shard<T: AsRef<[u8]>>(
+        &mut self,
+        index: usize,
+        original_shard: T,
+    ) -> Result<(), Error> {
+        self.work.add_original_shard(index, original_shard)
+    }
+
+    fn add_recovery_shard<T: AsRef<[u8]>>(
+        &mut self,
+        index: usize,
+        recovery_shard: T,
+    ) -> Result<(), Error> {
+        self.work.add_recovery_shard(index, recovery_shard)
+    }
+
+    fn decode(&mut self) -> Result<DecoderResult, Error> {
+        let (mut work, original_count, recovery_count, received) =
+            if let Some(stuff) = self.work.decode_begin()? {
+                stuff
+            } else {
+                // Nothing to do, original data is complete.
+                return Ok(DecoderResult::new(&mut self.work));
+            };
+
+        let chunk_size = recovery_count.next_power_of_two();
+        let original_end = chunk_size + original_count;
+        let work_count = work.len();
+
+        // ERASURE LOCATIONS
+
+        let mut erasures = [0; GF_ORDER];
+
+        for i in 0..recovery_count {
+            if !received[i] {
+                erasures[i] = 1;
+            }
+        }
+
+        erasures[recovery_count..chunk_size].fill(1);
+
+        for i in chunk_size..original_end {
+            if !received[i] {
+                erasures[i] = 1;
+            }
+        }
+
+        // EVALUATE POLYNOMIAL
+
+        E::eval_poly(&mut erasures, original_end);
+
+        // MULTIPLY SHARDS
+
+        // work[               .. recovery_count] = recovery * erasures
+        // work[recovery_count .. chunk_size    ] = 0
+        // work[chunk_size     .. original_end  ] = original * erasures
+        // work[original_end   ..               ] = 0
+
+        for i in 0..recovery_count {
+            if received[i] {
+                self.engine.mul(&mut work[i], erasures[i]);
+            } else {
+                work[i].fill(0);
+            }
+        }
+
+        work.zero(recovery_count..chunk_size);
+
+        for i in chunk_size..original_end {
+            if received[i] {
+                self.engine.mul(&mut work[i], erasures[i]);
+            } else {
+                work[i].fill(0);
+            }
+        }
+
+        work.zero(original_end..);
+
+        // IFFT / FORMAL DERIVATIVE / FFT
+
+        self.engine.ifft(&mut work, 0, work_count, original_end, 0);
+        E::formal_derivative(&mut work);
+        self.engine.fft(&mut work, 0, work_count, original_end, 0);
+
+        // REVEAL ERASURES
+
+        for i in chunk_size..original_end {
+            if !received[i] {
+                self.engine.mul(&mut work[i], GF_MODULUS - erasures[i]);
+            }
+        }
+
+        // DONE
+
+        Ok(DecoderResult::new(&mut self.work))
+    }
+
+    fn into_parts(self) -> (E, DecoderWork) {
+        (self.engine, self.work)
+    }
+
+    fn new(
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+        engine: E,
+        work: Option<DecoderWork>,
+    ) -> Result<Self, Error> {
+        let mut work = work.unwrap_or_default();
+        Self::reset_work(original_count, recovery_count, shard_bytes, &mut work)?;
+        Ok(Self { work, engine })
+    }
+
+    fn reset(
+        &mut self,
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+    ) -> Result<(), Error> {
+        Self::reset_work(original_count, recovery_count, shard_bytes, &mut self.work)
+    }
+}
+
+// ======================================================================
+// HighRateDecoder - PRIVATE
+
+impl<E: Engine> HighRateDecoder<E> {
+    fn reset_work(
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+        work: &mut DecoderWork,
+    ) -> Result<(), Error> {
+        Self::validate(original_count, recovery_count, shard_bytes)?;
+
+        // work[..recovery_count     ]  =  recovery
+        // work[recovery_count_pow2..]  =  original
+        work.reset(
+            original_count,
+            recovery_count,
+            shard_bytes,
+            recovery_count.next_power_of_two(),
+            0,
+            Self::work_count(original_count, recovery_count),
+        );
+
+        Ok(())
+    }
+
+    fn work_count(original_count: usize, recovery_count: usize) -> usize {
+        debug_assert!(Self::supports(original_count, recovery_count));
+
+        (recovery_count.next_power_of_two() + original_count).next_power_of_two()
+    }
+}
+
+// ======================================================================
+// TESTS
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::test_util;
+
+    // ============================================================
+    // ROUNDTRIPS - SINGLE ROUND
+
+    #[test]
+    fn roundtrip_all_originals_missing() {
+        roundtrip_single!(
+            HighRate,
+            3,
+            3,
+            1024,
+            test_util::EITHER_3_3,
+            &[],
+            &[0..3],
+            133,
+        );
+    }
+
+    #[test]
+    fn roundtrip_no_originals_missing() {
+        roundtrip_single!(HighRate, 3, 2, 1024, test_util::HIGH_3_2, &[0..3], &[], 132);
+    }
+
+    #[test]
+    fn roundtrips_tiny() {
+        for (original_count, recovery_count, seed, recovery_hash) in test_util::HIGH_TINY {
+            roundtrip_single!(
+                HighRate,
+                *original_count,
+                *recovery_count,
+                1024,
+                recovery_hash,
+                &[*recovery_count..*original_count],
+                &[0..std::cmp::min(*original_count, *recovery_count)],
+                *seed,
+            );
+        }
+    }
+
+    #[test]
+    #[ignore]
+    fn roundtrip_3000_30000() {
+        roundtrip_single!(
+            HighRate,
+            3000,
+            30000,
+            64,
+            test_util::HIGH_3000_30000_14,
+            &[],
+            &[0..3000],
+            14,
+        );
+    }
+
+    #[test]
+    #[ignore]
+    fn roundtrip_32768_32768() {
+        roundtrip_single!(
+            HighRate,
+            32768,
+            32768,
+            64,
+            test_util::EITHER_32768_32768_11,
+            &[],
+            &[0..32768],
+            11,
+        );
+    }
+
+    #[test]
+    #[ignore]
+    fn roundtrip_60000_3000() {
+        roundtrip_single!(
+            HighRate,
+            60000,
+            3000,
+            64,
+            test_util::HIGH_60000_3000_12,
+            &[3000..60000],
+            &[0..3000],
+            12,
+        );
+    }
+
+    // ============================================================
+    // ROUNDTRIPS - TWO ROUNDS
+
+    #[test]
+    fn two_rounds_implicit_reset() {
+        roundtrip_two_rounds!(
+            HighRate,
+            false,
+            (3, 2, 1024, test_util::HIGH_3_2, &[1], &[0, 1], 132),
+            (3, 2, 1024, test_util::HIGH_3_2_232, &[0], &[0, 1], 232),
+        );
+    }
+
+    #[test]
+    fn two_rounds_explicit_reset() {
+        roundtrip_two_rounds!(
+            HighRate,
+            true,
+            (3, 2, 1024, test_util::HIGH_3_2, &[1], &[0, 1], 132),
+            (5, 2, 1024, test_util::HIGH_5_2, &[0, 2, 4], &[0, 1], 152),
+        );
+    }
+
+    // ============================================================
+    // HighRate
+
+    mod high_rate {
+        use crate::{
+            engine::NoSimd,
+            rate::{HighRate, Rate},
+            Error,
+        };
+
+        #[test]
+        fn decoder() {
+            assert_eq!(
+                HighRate::<NoSimd>::decoder(4096, 61440, 64, NoSimd::new(), None).err(),
+                Some(Error::UnsupportedShardCount {
+                    original_count: 4096,
+                    recovery_count: 61440,
+                })
+            );
+
+            assert!(HighRate::<NoSimd>::decoder(61440, 4096, 64, NoSimd::new(), None).is_ok());
+        }
+
+        #[test]
+        fn encoder() {
+            assert_eq!(
+                HighRate::<NoSimd>::encoder(4096, 61440, 64, NoSimd::new(), None).err(),
+                Some(Error::UnsupportedShardCount {
+                    original_count: 4096,
+                    recovery_count: 61440,
+                })
+            );
+
+            assert!(HighRate::<NoSimd>::encoder(61440, 4096, 64, NoSimd::new(), None).is_ok());
+        }
+
+        #[test]
+        fn supports() {
+            assert!(!HighRate::<NoSimd>::supports(0, 1));
+            assert!(!HighRate::<NoSimd>::supports(1, 0));
+
+            assert!(!HighRate::<NoSimd>::supports(4096, 61440));
+
+            assert!(HighRate::<NoSimd>::supports(61440, 4096));
+            assert!(!HighRate::<NoSimd>::supports(61440, 4097));
+            assert!(!HighRate::<NoSimd>::supports(61441, 4096));
+
+            assert!(!HighRate::<NoSimd>::supports(usize::MAX, usize::MAX));
+        }
+
+        #[test]
+        fn validate() {
+            assert_eq!(
+                HighRate::<NoSimd>::validate(1, 1, 123).err(),
+                Some(Error::InvalidShardSize { shard_bytes: 123 })
+            );
+
+            assert_eq!(
+                HighRate::<NoSimd>::validate(4096, 61440, 64).err(),
+                Some(Error::UnsupportedShardCount {
+                    original_count: 4096,
+                    recovery_count: 61440,
+                })
+            );
+
+            assert!(HighRate::<NoSimd>::validate(61440, 4096, 64).is_ok());
+        }
+    }
+
+    // ============================================================
+    // HighRateEncoder
+
+    mod high_rate_encoder {
+        use crate::{
+            engine::NoSimd,
+            rate::{HighRateEncoder, RateEncoder},
+            Error,
+        };
+
+        // ==================================================
+        // ERRORS
+
+        test_rate_encoder_errors! {HighRateEncoder}
+
+        // ==================================================
+        // supports
+
+        #[test]
+        fn supports() {
+            assert!(!HighRateEncoder::<NoSimd>::supports(4096, 61440));
+            assert!(HighRateEncoder::<NoSimd>::supports(61440, 4096));
+        }
+
+        // ==================================================
+        // validate
+
+        #[test]
+        fn validate() {
+            assert_eq!(
+                HighRateEncoder::<NoSimd>::validate(1, 1, 123).err(),
+                Some(Error::InvalidShardSize { shard_bytes: 123 })
+            );
+
+            assert_eq!(
+                HighRateEncoder::<NoSimd>::validate(4096, 61440, 64).err(),
+                Some(Error::UnsupportedShardCount {
+                    original_count: 4096,
+                    recovery_count: 61440,
+                })
+            );
+
+            assert!(HighRateEncoder::<NoSimd>::validate(61440, 4096, 64).is_ok());
+        }
+
+        // ==================================================
+        // work_count
+
+        #[test]
+        fn work_count() {
+            assert_eq!(HighRateEncoder::<NoSimd>::work_count(1, 1), 1);
+            assert_eq!(HighRateEncoder::<NoSimd>::work_count(4096, 1024), 4096);
+            assert_eq!(HighRateEncoder::<NoSimd>::work_count(4097, 1024), 5120);
+            assert_eq!(HighRateEncoder::<NoSimd>::work_count(4097, 1025), 6144);
+            assert_eq!(HighRateEncoder::<NoSimd>::work_count(32768, 32768), 32768);
+        }
+    }
+
+    // ============================================================
+    // HighRateDecoder
+
+    mod high_rate_decoder {
+        use crate::{
+            engine::NoSimd,
+            rate::{HighRateDecoder, RateDecoder},
+            Error,
+        };
+
+        // ==================================================
+        // ERRORS
+
+        test_rate_decoder_errors! {HighRateDecoder}
+
+        // ==================================================
+        // supports
+
+        #[test]
+        fn supports() {
+            assert!(!HighRateDecoder::<NoSimd>::supports(4096, 61440));
+            assert!(HighRateDecoder::<NoSimd>::supports(61440, 4096));
+        }
+
+        // ==================================================
+        // validate
+
+        #[test]
+        fn validate() {
+            assert_eq!(
+                HighRateDecoder::<NoSimd>::validate(1, 1, 123).err(),
+                Some(Error::InvalidShardSize { shard_bytes: 123 })
+            );
+
+            assert_eq!(
+                HighRateDecoder::<NoSimd>::validate(4096, 61440, 64).err(),
+                Some(Error::UnsupportedShardCount {
+                    original_count: 4096,
+                    recovery_count: 61440,
+                })
+            );
+
+            assert!(HighRateDecoder::<NoSimd>::validate(61440, 4096, 64).is_ok());
+        }
+
+        // ==================================================
+        // work_count
+
+        #[test]
+        fn work_count() {
+            assert_eq!(HighRateDecoder::<NoSimd>::work_count(1, 1), 2);
+            assert_eq!(HighRateDecoder::<NoSimd>::work_count(2048, 1025), 4096);
+            assert_eq!(HighRateDecoder::<NoSimd>::work_count(2049, 1025), 8192);
+            assert_eq!(HighRateDecoder::<NoSimd>::work_count(3072, 1024), 4096);
+            assert_eq!(HighRateDecoder::<NoSimd>::work_count(3073, 1024), 8192);
+            assert_eq!(HighRateDecoder::<NoSimd>::work_count(32768, 32768), 65536);
+        }
+    }
+}
diff --git a/src/rate/rate_low.rs b/src/rate/rate_low.rs
new file mode 100644
index 0000000..62d1d28
--- /dev/null
+++ b/src/rate/rate_low.rs
@@ -0,0 +1,607 @@
+use std::marker::PhantomData;
+
+use crate::{
+    engine::{self, Engine, GF_MODULUS, GF_ORDER},
+    rate::{DecoderWork, EncoderWork, Rate, RateDecoder, RateEncoder},
+    DecoderResult, EncoderResult, Error,
+};
+
+// ======================================================================
+// LowRate - PUBLIC
+
+/// Reed-Solomon encoder/decoder generator using only low rate.
+pub struct LowRate<E: Engine>(PhantomData<E>);
+
+impl<E: Engine> Rate<E> for LowRate<E> {
+    type RateEncoder = LowRateEncoder<E>;
+    type RateDecoder = LowRateDecoder<E>;
+
+    fn supports(original_count: usize, recovery_count: usize) -> bool {
+        original_count > 0
+            && recovery_count > 0
+            && original_count < GF_ORDER
+            && recovery_count < GF_ORDER
+            && original_count.next_power_of_two() + recovery_count <= GF_ORDER
+    }
+}
+
+// ======================================================================
+// LowRateEncoder - PUBLIC
+
+/// Reed-Solomon encoder using only low rate.
+pub struct LowRateEncoder<E: Engine> {
+    engine: E,
+    work: EncoderWork,
+}
+
+impl<E: Engine> RateEncoder<E> for LowRateEncoder<E> {
+    type Rate = LowRate<E>;
+
+    fn add_original_shard<T: AsRef<[u8]>>(&mut self, original_shard: T) -> Result<(), Error> {
+        self.work.add_original_shard(original_shard)
+    }
+
+    fn encode(&mut self) -> Result<EncoderResult, Error> {
+        let (mut work, original_count, recovery_count) = self.work.encode_begin()?;
+        let chunk_size = original_count.next_power_of_two();
+        let engine = &self.engine;
+
+        // ZEROPAD ORIGINAL
+
+        work.zero(original_count..chunk_size);
+
+        // IFFT - ORIGINAL
+
+        engine.ifft(&mut work, 0, chunk_size, original_count, 0);
+
+        // COPY IFFT RESULT TO OTHER CHUNKS
+
+        let mut chunk_start = chunk_size;
+        while chunk_start < recovery_count {
+            work.copy_within(0, chunk_start, chunk_size);
+            chunk_start += chunk_size;
+        }
+
+        // FFT - FULL CHUNKS
+
+        let mut chunk_start = 0;
+        while chunk_start + chunk_size <= recovery_count {
+            engine.fft_skew_end(&mut work, chunk_start, chunk_size, chunk_size);
+            chunk_start += chunk_size;
+        }
+
+        // FFT - FINAL PARTIAL CHUNK
+
+        let last_count = recovery_count % chunk_size;
+        if last_count > 0 {
+            engine.fft_skew_end(&mut work, chunk_start, chunk_size, last_count);
+        }
+
+        // DONE
+
+        Ok(EncoderResult::new(&mut self.work))
+    }
+
+    fn into_parts(self) -> (E, EncoderWork) {
+        (self.engine, self.work)
+    }
+
+    fn new(
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+        engine: E,
+        work: Option<EncoderWork>,
+    ) -> Result<Self, Error> {
+        let mut work = work.unwrap_or_default();
+        Self::reset_work(original_count, recovery_count, shard_bytes, &mut work)?;
+        Ok(Self { work, engine })
+    }
+
+    fn reset(
+        &mut self,
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+    ) -> Result<(), Error> {
+        Self::reset_work(original_count, recovery_count, shard_bytes, &mut self.work)
+    }
+}
+
+// ======================================================================
+// LowRateEncoder - PRIVATE
+
+impl<E: Engine> LowRateEncoder<E> {
+    fn reset_work(
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+        work: &mut EncoderWork,
+    ) -> Result<(), Error> {
+        Self::validate(original_count, recovery_count, shard_bytes)?;
+        work.reset(
+            original_count,
+            recovery_count,
+            shard_bytes,
+            Self::work_count(original_count, recovery_count),
+        );
+        Ok(())
+    }
+
+    fn work_count(original_count: usize, recovery_count: usize) -> usize {
+        debug_assert!(Self::supports(original_count, recovery_count));
+
+        let chunk_size = original_count.next_power_of_two();
+
+        engine::checked_next_multiple_of(recovery_count, chunk_size).unwrap()
+    }
+}
+
+// ======================================================================
+// LowRateDecoder - PUBLIC
+
+/// Reed-Solomon decoder using only low rate.
+pub struct LowRateDecoder<E: Engine> {
+    engine: E,
+    work: DecoderWork,
+}
+
+impl<E: Engine> RateDecoder<E> for LowRateDecoder<E> {
+    type Rate = LowRate<E>;
+
+    fn add_original_shard<T: AsRef<[u8]>>(
+        &mut self,
+        index: usize,
+        original_shard: T,
+    ) -> Result<(), Error> {
+        self.work.add_original_shard(index, original_shard)
+    }
+
+    fn add_recovery_shard<T: AsRef<[u8]>>(
+        &mut self,
+        index: usize,
+        recovery_shard: T,
+    ) -> Result<(), Error> {
+        self.work.add_recovery_shard(index, recovery_shard)
+    }
+
+    fn decode(&mut self) -> Result<DecoderResult, Error> {
+        let (mut work, original_count, recovery_count, received) =
+            if let Some(stuff) = self.work.decode_begin()? {
+                stuff
+            } else {
+                // Nothing to do, original data is complete.
+                return Ok(DecoderResult::new(&mut self.work));
+            };
+
+        let chunk_size = original_count.next_power_of_two();
+        let recovery_end = chunk_size + recovery_count;
+        let work_count = work.len();
+
+        // ERASURE LOCATIONS
+
+        let mut erasures = [0; GF_ORDER];
+
+        for i in 0..original_count {
+            if !received[i] {
+                erasures[i] = 1;
+            }
+        }
+
+        for i in chunk_size..recovery_end {
+            if !received[i] {
+                erasures[i] = 1;
+            }
+        }
+
+        erasures[recovery_end..].fill(1);
+
+        // EVALUATE POLYNOMIAL
+
+        E::eval_poly(&mut erasures, GF_ORDER);
+
+        // MULTIPLY SHARDS
+
+        // work[               .. original_count] = original * erasures
+        // work[original_count .. chunk_size    ] = 0
+        // work[chunk_size     .. original_end  ] = recovery * erasures
+        // work[recovery_end   ..               ] = 0
+
+        for i in 0..original_count {
+            if received[i] {
+                self.engine.mul(&mut work[i], erasures[i]);
+            } else {
+                work[i].fill(0);
+            }
+        }
+
+        work.zero(original_count..chunk_size);
+
+        for i in chunk_size..recovery_end {
+            if received[i] {
+                self.engine.mul(&mut work[i], erasures[i]);
+            } else {
+                work[i].fill(0);
+            }
+        }
+
+        work.zero(recovery_end..);
+
+        // IFFT / FORMAL DERIVATIVE / FFT
+
+        self.engine.ifft(&mut work, 0, work_count, recovery_end, 0);
+        E::formal_derivative(&mut work);
+        self.engine.fft(&mut work, 0, work_count, recovery_end, 0);
+
+        // REVEAL ERASURES
+
+        for i in 0..original_count {
+            if !received[i] {
+                self.engine.mul(&mut work[i], GF_MODULUS - erasures[i]);
+            }
+        }
+
+        // DONE
+
+        Ok(DecoderResult::new(&mut self.work))
+    }
+
+    fn into_parts(self) -> (E, DecoderWork) {
+        (self.engine, self.work)
+    }
+
+    fn new(
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+        engine: E,
+        work: Option<DecoderWork>,
+    ) -> Result<Self, Error> {
+        let mut work = work.unwrap_or_default();
+        Self::reset_work(original_count, recovery_count, shard_bytes, &mut work)?;
+        Ok(Self { work, engine })
+    }
+
+    fn reset(
+        &mut self,
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+    ) -> Result<(), Error> {
+        Self::reset_work(original_count, recovery_count, shard_bytes, &mut self.work)
+    }
+}
+
+// ======================================================================
+// LowRateDecoder - PRIVATE
+
+impl<E: Engine> LowRateDecoder<E> {
+    fn reset_work(
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+        work: &mut DecoderWork,
+    ) -> Result<(), Error> {
+        Self::validate(original_count, recovery_count, shard_bytes)?;
+
+        // work[..original_count     ]  =  original
+        // work[original_count_pow2..]  =  recovery
+        work.reset(
+            original_count,
+            recovery_count,
+            shard_bytes,
+            0,
+            original_count.next_power_of_two(),
+            Self::work_count(original_count, recovery_count),
+        );
+
+        Ok(())
+    }
+
+    fn work_count(original_count: usize, recovery_count: usize) -> usize {
+        debug_assert!(Self::supports(original_count, recovery_count));
+
+        (original_count.next_power_of_two() + recovery_count).next_power_of_two()
+    }
+}
+
+// ======================================================================
+// TESTS
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::test_util;
+
+    // ============================================================
+    // ROUNDTRIPS - SINGLE ROUND
+
+    #[test]
+    fn roundtrip_all_originals_missing() {
+        roundtrip_single!(
+            LowRate,
+            3,
+            3,
+            1024,
+            test_util::EITHER_3_3,
+            &[],
+            &[0..3],
+            133
+        );
+    }
+
+    #[test]
+    fn roundtrip_no_originals_missing() {
+        roundtrip_single!(LowRate, 2, 3, 1024, test_util::LOW_2_3, &[0, 1], &[], 123);
+    }
+
+    #[test]
+    fn roundtrips_tiny() {
+        for (original_count, recovery_count, seed, recovery_hash) in test_util::LOW_TINY {
+            roundtrip_single!(
+                LowRate,
+                *original_count,
+                *recovery_count,
+                1024,
+                recovery_hash,
+                &[*recovery_count..*original_count],
+                &[0..std::cmp::min(*original_count, *recovery_count)],
+                *seed,
+            );
+        }
+    }
+
+    #[test]
+    #[ignore]
+    fn roundtrip_3000_60000() {
+        roundtrip_single!(
+            LowRate,
+            3000,
+            60000,
+            64,
+            test_util::LOW_3000_60000_13,
+            &[],
+            &[0..3000],
+            13,
+        );
+    }
+
+    #[test]
+    #[ignore]
+    fn roundtrip_30000_3000() {
+        roundtrip_single!(
+            LowRate,
+            30000,
+            3000,
+            64,
+            test_util::LOW_30000_3000_15,
+            &[3000..30000],
+            &[0..3000],
+            15,
+        );
+    }
+
+    #[test]
+    #[ignore]
+    fn roundtrip_32768_32768() {
+        roundtrip_single!(
+            LowRate,
+            32768,
+            32768,
+            64,
+            test_util::EITHER_32768_32768_11,
+            &[],
+            &[0..32768],
+            11,
+        );
+    }
+
+    // ============================================================
+    // ROUNDTRIPS - TWO ROUNDS
+
+    #[test]
+    fn two_rounds_implicit_reset() {
+        roundtrip_two_rounds!(
+            LowRate,
+            false,
+            (2, 3, 1024, test_util::LOW_2_3, &[], &[0, 2], 123),
+            (2, 3, 1024, test_util::LOW_2_3_223, &[], &[1, 2], 223),
+        );
+    }
+
+    #[test]
+    fn two_rounds_explicit_reset() {
+        roundtrip_two_rounds!(
+            LowRate,
+            true,
+            (2, 3, 1024, test_util::LOW_2_3, &[], &[0, 2], 123),
+            (2, 5, 1024, test_util::LOW_2_5, &[], &[0, 4], 125),
+        );
+    }
+
+    // ============================================================
+    // LowRate
+
+    mod low_rate {
+        use crate::{
+            engine::NoSimd,
+            rate::{LowRate, Rate},
+            Error,
+        };
+
+        #[test]
+        fn decoder() {
+            assert!(LowRate::<NoSimd>::decoder(4096, 61440, 64, NoSimd::new(), None).is_ok());
+
+            assert_eq!(
+                LowRate::<NoSimd>::decoder(61440, 4096, 64, NoSimd::new(), None).err(),
+                Some(Error::UnsupportedShardCount {
+                    original_count: 61440,
+                    recovery_count: 4096,
+                })
+            );
+        }
+
+        #[test]
+        fn encoder() {
+            assert!(LowRate::<NoSimd>::encoder(4096, 61440, 64, NoSimd::new(), None).is_ok());
+
+            assert_eq!(
+                LowRate::<NoSimd>::encoder(61440, 4096, 64, NoSimd::new(), None).err(),
+                Some(Error::UnsupportedShardCount {
+                    original_count: 61440,
+                    recovery_count: 4096,
+                })
+            );
+        }
+
+        #[test]
+        fn supports() {
+            assert!(!LowRate::<NoSimd>::supports(0, 1));
+            assert!(!LowRate::<NoSimd>::supports(1, 0));
+
+            assert!(LowRate::<NoSimd>::supports(4096, 61440));
+            assert!(!LowRate::<NoSimd>::supports(4096, 61441));
+            assert!(!LowRate::<NoSimd>::supports(4097, 61440));
+
+            assert!(!LowRate::<NoSimd>::supports(61440, 4096));
+
+            assert!(!LowRate::<NoSimd>::supports(usize::MAX, usize::MAX));
+        }
+
+        #[test]
+        fn validate() {
+            assert_eq!(
+                LowRate::<NoSimd>::validate(1, 1, 123).err(),
+                Some(Error::InvalidShardSize { shard_bytes: 123 })
+            );
+
+            assert!(LowRate::<NoSimd>::validate(4096, 61440, 64).is_ok());
+
+            assert_eq!(
+                LowRate::<NoSimd>::validate(61440, 4096, 64).err(),
+                Some(Error::UnsupportedShardCount {
+                    original_count: 61440,
+                    recovery_count: 4096,
+                })
+            );
+        }
+    }
+
+    // ============================================================
+    // LowRateEncoder
+
+    mod low_rate_encoder {
+        use crate::{
+            engine::NoSimd,
+            rate::{LowRateEncoder, RateEncoder},
+            Error,
+        };
+
+        // ==================================================
+        // ERRORS
+
+        test_rate_encoder_errors! {LowRateEncoder}
+
+        // ==================================================
+        // supports
+
+        #[test]
+        fn supports() {
+            assert!(LowRateEncoder::<NoSimd>::supports(4096, 61440));
+            assert!(!LowRateEncoder::<NoSimd>::supports(61440, 4096));
+        }
+
+        // ==================================================
+        // validate
+
+        #[test]
+        fn validate() {
+            assert_eq!(
+                LowRateEncoder::<NoSimd>::validate(1, 1, 123).err(),
+                Some(Error::InvalidShardSize { shard_bytes: 123 })
+            );
+
+            assert!(LowRateEncoder::<NoSimd>::validate(4096, 61440, 64).is_ok());
+
+            assert_eq!(
+                LowRateEncoder::<NoSimd>::validate(61440, 4096, 64).err(),
+                Some(Error::UnsupportedShardCount {
+                    original_count: 61440,
+                    recovery_count: 4096,
+                })
+            );
+        }
+
+        // ==================================================
+        // work_count
+
+        #[test]
+        fn work_count() {
+            assert_eq!(LowRateEncoder::<NoSimd>::work_count(1, 1), 1);
+            assert_eq!(LowRateEncoder::<NoSimd>::work_count(1024, 4096), 4096);
+            assert_eq!(LowRateEncoder::<NoSimd>::work_count(1024, 4097), 5120);
+            assert_eq!(LowRateEncoder::<NoSimd>::work_count(1025, 4097), 6144);
+            assert_eq!(LowRateEncoder::<NoSimd>::work_count(32768, 32768), 32768);
+        }
+    }
+
+    // ============================================================
+    // LowRateDecoder
+
+    mod low_rate_decoder {
+        use crate::{
+            engine::NoSimd,
+            rate::{LowRateDecoder, RateDecoder},
+            Error,
+        };
+
+        // ==================================================
+        // ERRORS
+
+        test_rate_decoder_errors! {LowRateDecoder}
+
+        // ==================================================
+        // supports
+
+        #[test]
+        fn supports() {
+            assert!(LowRateDecoder::<NoSimd>::supports(4096, 61440));
+            assert!(!LowRateDecoder::<NoSimd>::supports(61440, 4096));
+        }
+
+        // ==================================================
+        // validate
+
+        #[test]
+        fn validate() {
+            assert_eq!(
+                LowRateDecoder::<NoSimd>::validate(1, 1, 123).err(),
+                Some(Error::InvalidShardSize { shard_bytes: 123 })
+            );
+
+            assert!(LowRateDecoder::<NoSimd>::validate(4096, 61440, 64).is_ok());
+
+            assert_eq!(
+                LowRateDecoder::<NoSimd>::validate(61440, 4096, 64).err(),
+                Some(Error::UnsupportedShardCount {
+                    original_count: 61440,
+                    recovery_count: 4096,
+                })
+            );
+        }
+
+        // ==================================================
+        // work_count
+
+        #[test]
+        fn work_count() {
+            assert_eq!(LowRateDecoder::<NoSimd>::work_count(1, 1), 2);
+            assert_eq!(LowRateDecoder::<NoSimd>::work_count(1024, 3072), 4096);
+            assert_eq!(LowRateDecoder::<NoSimd>::work_count(1024, 3073), 8192);
+            assert_eq!(LowRateDecoder::<NoSimd>::work_count(1025, 2048), 4096);
+            assert_eq!(LowRateDecoder::<NoSimd>::work_count(1025, 2049), 8192);
+            assert_eq!(LowRateDecoder::<NoSimd>::work_count(32768, 32768), 65536);
+        }
+    }
+}
diff --git a/src/reed_solomon.rs b/src/reed_solomon.rs
new file mode 100644
index 0000000..582a2d4
--- /dev/null
+++ b/src/reed_solomon.rs
@@ -0,0 +1,284 @@
+use crate::{
+    engine::DefaultEngine,
+    rate::{DefaultRate, DefaultRateDecoder, DefaultRateEncoder, Rate, RateDecoder, RateEncoder},
+    DecoderResult, EncoderResult, Error,
+};
+
+// ======================================================================
+// ReedSolomonEncoder - PUBLIC
+
+/// Reed-Solomon encoder using [`DefaultEngine`] and [`DefaultRate`].
+///
+/// [`DefaultEngine`]: crate::engine::DefaultEngine
+pub struct ReedSolomonEncoder(DefaultRateEncoder<DefaultEngine>);
+
+impl ReedSolomonEncoder {
+    /// Adds one original shard to the encoder.
+    ///
+    /// Original shards have indexes `0..original_count` corresponding to the order
+    /// in which they are added and these same indexes must be used when decoding.
+    ///
+    /// See [basic usage](crate#basic-usage) for an example.
+    pub fn add_original_shard<T: AsRef<[u8]>>(&mut self, original_shard: T) -> Result<(), Error> {
+        self.0.add_original_shard(original_shard)
+    }
+
+    /// Encodes the added original shards returning [`EncoderResult`]
+    /// which contains the generated recovery shards.
+    ///
+    /// When returned [`EncoderResult`] is dropped the encoder is
+    /// automatically [`reset`] and ready for new round of encoding.
+    ///
+    /// See [basic usage](crate#basic-usage) for an example.
+    ///
+    /// [`reset`]: ReedSolomonEncoder::reset
+    pub fn encode(&mut self) -> Result<EncoderResult, Error> {
+        self.0.encode()
+    }
+
+    /// Creates new encoder with given configuration
+    /// and allocates required working space.
+    ///
+    /// See [basic usage](crate#basic-usage) for an example.
+    pub fn new(
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+    ) -> Result<Self, Error> {
+        Ok(Self(DefaultRateEncoder::new(
+            original_count,
+            recovery_count,
+            shard_bytes,
+            DefaultEngine::new(),
+            None,
+        )?))
+    }
+
+    /// Resets encoder to given configuration.
+    ///
+    /// - Added original shards are forgotten.
+    /// - Existing working space is re-used if it's large enough
+    ///   or re-allocated otherwise.
+    pub fn reset(
+        &mut self,
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+    ) -> Result<(), Error> {
+        self.0.reset(original_count, recovery_count, shard_bytes)
+    }
+
+    /// Returns `true` if given `original_count` / `recovery_count`
+    /// combination is supported.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use reed_solomon_16::ReedSolomonEncoder;
+    ///
+    /// assert_eq!(ReedSolomonEncoder::supports(60_000, 4_000), true);
+    /// assert_eq!(ReedSolomonEncoder::supports(60_000, 5_000), false);
+    /// ```
+    pub fn supports(original_count: usize, recovery_count: usize) -> bool {
+        DefaultRate::<DefaultEngine>::supports(original_count, recovery_count)
+    }
+}
+
+// ======================================================================
+// ReedSolomonDecoder - PUBLIC
+
+/// Reed-Solomon decoder using [`DefaultEngine`] and [`DefaultRate`].
+///
+/// [`DefaultEngine`]: crate::engine::DefaultEngine
+pub struct ReedSolomonDecoder(DefaultRateDecoder<DefaultEngine>);
+
+impl ReedSolomonDecoder {
+    /// Adds one original shard to the decoder.
+    ///
+    /// - Shards can be added in any order.
+    /// - Index must be the same that was used in encoding.
+    ///
+    /// See [basic usage](crate#basic-usage) for an example.
+    pub fn add_original_shard<T: AsRef<[u8]>>(
+        &mut self,
+        index: usize,
+        original_shard: T,
+    ) -> Result<(), Error> {
+        self.0.add_original_shard(index, original_shard)
+    }
+
+    /// Adds one recovery shard to the decoder.
+    ///
+    /// - Shards can be added in any order.
+    /// - Index must be the same that was used in encoding.
+    ///
+    /// See [basic usage](crate#basic-usage) for an example.
+    pub fn add_recovery_shard<T: AsRef<[u8]>>(
+        &mut self,
+        index: usize,
+        recovery_shard: T,
+    ) -> Result<(), Error> {
+        self.0.add_recovery_shard(index, recovery_shard)
+    }
+
+    /// Decodes the added shards returning [`DecoderResult`]
+    /// which contains the restored original shards.
+    ///
+    /// When returned [`DecoderResult`] is dropped the decoder is
+    /// automatically [`reset`] and ready for new round of decoding.
+    ///
+    /// See [basic usage](crate#basic-usage) for an example.
+    ///
+    /// [`reset`]: ReedSolomonDecoder::reset
+    pub fn decode(&mut self) -> Result<DecoderResult, Error> {
+        self.0.decode()
+    }
+
+    /// Creates new decoder with given configuration
+    /// and allocates required working space.
+    ///
+    /// See [basic usage](crate#basic-usage) for an example.
+    pub fn new(
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+    ) -> Result<Self, Error> {
+        Ok(Self(DefaultRateDecoder::new(
+            original_count,
+            recovery_count,
+            shard_bytes,
+            DefaultEngine::new(),
+            None,
+        )?))
+    }
+
+    /// Resets decoder to given configuration.
+    ///
+    /// - Added shards are forgotten.
+    /// - Existing working space is re-used if it's large enough
+    ///   or re-allocated otherwise.
+    pub fn reset(
+        &mut self,
+        original_count: usize,
+        recovery_count: usize,
+        shard_bytes: usize,
+    ) -> Result<(), Error> {
+        self.0.reset(original_count, recovery_count, shard_bytes)
+    }
+
+    /// Returns `true` if given `original_count` / `recovery_count`
+    /// combination is supported.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use reed_solomon_16::ReedSolomonDecoder;
+    ///
+    /// assert_eq!(ReedSolomonDecoder::supports(60_000, 4_000), true);
+    /// assert_eq!(ReedSolomonDecoder::supports(60_000, 5_000), false);
+    /// ```
+    pub fn supports(original_count: usize, recovery_count: usize) -> bool {
+        DefaultRate::<DefaultEngine>::supports(original_count, recovery_count)
+    }
+}
+
+// ======================================================================
+// TESTS
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+
+    use fixedbitset::FixedBitSet;
+
+    use super::*;
+    use crate::test_util;
+
+    // ============================================================
+    // HELPERS
+
+    fn roundtrip(
+        encoder: &mut ReedSolomonEncoder,
+        decoder: &mut ReedSolomonDecoder,
+        original_count: usize,
+        recovery_hash: &str,
+        decoder_original: &[usize],
+        decoder_recovery: &[usize],
+        seed: u8,
+    ) {
+        let original = test_util::generate_original(original_count, 1024, seed);
+
+        for original in &original {
+            encoder.add_original_shard(original).unwrap();
+        }
+
+        let result = encoder.encode().unwrap();
+        let recovery: Vec<_> = result.recovery_iter().collect();
+
+        test_util::assert_hash(&recovery, recovery_hash);
+
+        let mut original_received = FixedBitSet::with_capacity(original_count);
+
+        for i in decoder_original {
+            decoder.add_original_shard(*i, &original[*i]).unwrap();
+            original_received.set(*i, true);
+        }
+
+        for i in decoder_recovery {
+            decoder.add_recovery_shard(*i, recovery[*i]).unwrap();
+        }
+
+        let result = decoder.decode().unwrap();
+        let restored: HashMap<_, _> = result.restored_original_iter().collect();
+
+        for i in 0..original_count {
+            if !original_received[i] {
+                assert_eq!(restored[&i], original[i]);
+            }
+        }
+    }
+
+    // ============================================================
+    // ROUNDTRIP - TWO ROUNDS
+
+    #[test]
+    fn roundtrip_two_rounds_reset_low_to_high() {
+        let mut encoder = ReedSolomonEncoder::new(2, 3, 1024).unwrap();
+        let mut decoder = ReedSolomonDecoder::new(2, 3, 1024).unwrap();
+
+        roundtrip(
+            &mut encoder,
+            &mut decoder,
+            2,
+            test_util::LOW_2_3,
+            &[],
+            &[0, 1],
+            123,
+        );
+
+        encoder.reset(3, 2, 1024).unwrap();
+        decoder.reset(3, 2, 1024).unwrap();
+
+        roundtrip(
+            &mut encoder,
+            &mut decoder,
+            3,
+            test_util::HIGH_3_2,
+            &[1],
+            &[0, 1],
+            132,
+        );
+    }
+
+    // ==================================================
+    // supports
+
+    #[test]
+    fn supports() {
+        assert!(ReedSolomonEncoder::supports(4096, 61440));
+        assert!(ReedSolomonEncoder::supports(61440, 4096));
+
+        assert!(ReedSolomonDecoder::supports(4096, 61440));
+        assert!(ReedSolomonDecoder::supports(61440, 4096));
+    }
+}
diff --git a/src/test_util.rs b/src/test_util.rs
new file mode 100644
index 0000000..d0103d3
--- /dev/null
+++ b/src/test_util.rs
@@ -0,0 +1,837 @@
+use std::{collections::HashMap, ops::Range};
+
+use fixedbitset::FixedBitSet;
+use rand::{Rng, SeedableRng};
+use rand_chacha::ChaCha8Rng;
+use sha2::{Digest, Sha256};
+
+use crate::{
+    engine::Engine,
+    rate::{Rate, RateDecoder, RateEncoder},
+};
+
+// ======================================================================
+// IntOrRange - CRATE
+
+pub(crate) trait IntOrRange {
+    fn contains(&self, item: usize) -> bool;
+    // incluside
+    fn min(&self) -> usize;
+    // exclusive
+    fn max(&self) -> usize;
+}
+
+impl IntOrRange for usize {
+    fn contains(&self, item: usize) -> bool {
+        *self == item
+    }
+
+    fn min(&self) -> usize {
+        *self
+    }
+
+    fn max(&self) -> usize {
+        self + 1
+    }
+}
+
+impl IntOrRange for Range<usize> {
+    fn contains(&self, item: usize) -> bool {
+        self.contains(&item)
+    }
+
+    fn min(&self) -> usize {
+        self.start
+    }
+
+    fn max(&self) -> usize {
+        self.end
+    }
+}
+
+// ======================================================================
+// FUNCTIONS - CRATE
+
+pub(crate) fn assert_hash<T>(shards: T, expected: &str)
+where
+    T: IntoIterator,
+    T::Item: AsRef<[u8]>,
+{
+    let mut sha = Sha256::new();
+    for shard in shards {
+        sha.update(shard);
+    }
+    let got = sha.finalize();
+
+    if &got[..] != hex::decode(expected).unwrap() {
+        print!("GOT     : ");
+        for x in got {
+            print!("{:02x}", x);
+        }
+        println!();
+        println!("EXPECTED: {}", expected);
+        panic!("recovery shards hash doesn't match");
+    }
+}
+
+pub(crate) fn generate_original(
+    original_count: usize,
+    shard_bytes: usize,
+    seed: u8,
+) -> Vec<Vec<u8>> {
+    let mut rng = ChaCha8Rng::from_seed([seed; 32]);
+    let mut original = vec![vec![0u8; shard_bytes]; original_count];
+    for original in &mut original {
+        rng.fill::<[u8]>(original);
+    }
+    original
+}
+
+// ======================================================================
+// RATE ENCODER/DECODER - TEST SINGLE-ROUND ROUNDTRIP
+
+pub(crate) fn roundtrip<R: Rate<E>, E: Engine, T: IntOrRange>(
+    encoder: &mut R::RateEncoder,
+    decoder: &mut R::RateDecoder,
+    original_count: usize,
+    shard_bytes: usize,
+    recovery_hash: &str,
+    decoder_original: &[T],
+    decoder_recovery: &[T],
+    seed: u8,
+) {
+    let original = generate_original(original_count, shard_bytes, seed);
+
+    for original in &original {
+        encoder.add_original_shard(original).unwrap();
+    }
+
+    let result = encoder.encode().unwrap();
+    let recovery: Vec<_> = result.recovery_iter().collect();
+
+    assert_hash(&recovery, recovery_hash);
+
+    let mut original_received = FixedBitSet::with_capacity(original_count);
+
+    for x in decoder_original {
+        for i in x.min()..x.max() {
+            decoder.add_original_shard(i, &original[i]).unwrap();
+            original_received.set(i, true);
+        }
+    }
+
+    for x in decoder_recovery {
+        for i in x.min()..x.max() {
+            decoder.add_recovery_shard(i, recovery[i]).unwrap();
+        }
+    }
+
+    let result = decoder.decode().unwrap();
+    let restored: HashMap<_, _> = result.restored_original_iter().collect();
+
+    for i in 0..original_count {
+        if !original_received[i] {
+            assert_eq!(restored[&i], original[i]);
+        }
+    }
+}
+
+pub(crate) fn roundtrip_single<R: Rate<E>, E: Engine, T: IntOrRange>(
+    engine: E,
+    original_count: usize,
+    recovery_count: usize,
+    shard_bytes: usize,
+    recovery_hash: &str,
+    decoder_original: &[T],
+    decoder_recovery: &[T],
+    seed: u8,
+) {
+    let mut encoder = R::encoder(
+        original_count,
+        recovery_count,
+        shard_bytes,
+        engine.clone(),
+        None,
+    )
+    .unwrap();
+
+    let mut decoder =
+        R::decoder(original_count, recovery_count, shard_bytes, engine, None).unwrap();
+
+    roundtrip::<R, E, T>(
+        &mut encoder,
+        &mut decoder,
+        original_count,
+        shard_bytes,
+        recovery_hash,
+        decoder_original,
+        decoder_recovery,
+        seed,
+    );
+}
+
+macro_rules! roundtrip_single {
+    ($Rate: ident,
+     $original_count: expr,
+     $recovery_count: expr,
+     $shard_bytes: expr,
+     $recovery_hash: expr,
+     $decoder_original: expr,
+     $decoder_recovery: expr,
+     $seed: expr $(,)?
+    ) => {
+        crate::test_util::roundtrip_single::<$Rate<_>, _, _>(
+            crate::engine::Naive::new(),
+            $original_count,
+            $recovery_count,
+            $shard_bytes,
+            $recovery_hash,
+            $decoder_original,
+            $decoder_recovery,
+            $seed,
+        );
+
+        crate::test_util::roundtrip_single::<$Rate<_>, _, _>(
+            crate::engine::NoSimd::new(),
+            $original_count,
+            $recovery_count,
+            $shard_bytes,
+            $recovery_hash,
+            $decoder_original,
+            $decoder_recovery,
+            $seed,
+        );
+    };
+}
+
+// ======================================================================
+// RATE ENCODER/DECODER - TEST TWO-ROUND ROUNDTRIP
+
+macro_rules! roundtrip_two_rounds {
+    (
+        $Rate: ident,
+        $explicit_reset: expr,
+        (
+            $original_count_a: expr,
+            $recovery_count_a: expr,
+            $shard_bytes_a: expr,
+            $recovery_hash_a: expr,
+            $decoder_original_a: expr,
+            $decoder_recovery_a: expr,
+            $seed_a: expr $(,)?
+        ),
+        (
+            $original_count_b: expr,
+            $recovery_count_b: expr,
+            $shard_bytes_b: expr,
+            $recovery_hash_b: expr,
+            $decoder_original_b: expr,
+            $decoder_recovery_b: expr,
+            $seed_b: expr $(,)?
+        ) $(,)?
+    ) => {
+        use crate::engine::{Naive, NoSimd};
+
+        roundtrip_two_rounds_inner!(
+            $Rate,
+            Naive,
+            $explicit_reset,
+            (
+                $original_count_a,
+                $recovery_count_a,
+                $shard_bytes_a,
+                $recovery_hash_a,
+                $decoder_original_a,
+                $decoder_recovery_a,
+                $seed_a,
+            ),
+            (
+                $original_count_b,
+                $recovery_count_b,
+                $shard_bytes_b,
+                $recovery_hash_b,
+                $decoder_original_b,
+                $decoder_recovery_b,
+                $seed_b,
+            ),
+        );
+
+        roundtrip_two_rounds_inner!(
+            $Rate,
+            NoSimd,
+            $explicit_reset,
+            (
+                $original_count_a,
+                $recovery_count_a,
+                $shard_bytes_a,
+                $recovery_hash_a,
+                $decoder_original_a,
+                $decoder_recovery_a,
+                $seed_a,
+            ),
+            (
+                $original_count_b,
+                $recovery_count_b,
+                $shard_bytes_b,
+                $recovery_hash_b,
+                $decoder_original_b,
+                $decoder_recovery_b,
+                $seed_b,
+            ),
+        );
+    };
+}
+
+macro_rules! roundtrip_two_rounds_inner {
+    (
+        $Rate: ident,
+        $Engine: ident,
+        $explicit_reset: expr,
+        (
+            $original_count_a: expr,
+            $recovery_count_a: expr,
+            $shard_bytes_a: expr,
+            $recovery_hash_a: expr,
+            $decoder_original_a: expr,
+            $decoder_recovery_a: expr,
+            $seed_a: expr $(,)?
+        ),
+        (
+            $original_count_b: expr,
+            $recovery_count_b: expr,
+            $shard_bytes_b: expr,
+            $recovery_hash_b: expr,
+            $decoder_original_b: expr,
+            $decoder_recovery_b: expr,
+            $seed_b: expr $(,)?
+        ) $(,)?
+    ) => {
+        let mut encoder = $Rate::encoder(
+            $original_count_a,
+            $recovery_count_a,
+            $shard_bytes_a,
+            $Engine::new(),
+            None,
+        )
+        .unwrap();
+
+        let mut decoder = $Rate::decoder(
+            $original_count_a,
+            $recovery_count_a,
+            $shard_bytes_a,
+            $Engine::new(),
+            None,
+        )
+        .unwrap();
+
+        test_util::roundtrip::<$Rate<_>, _, _>(
+            &mut encoder,
+            &mut decoder,
+            $original_count_a,
+            $shard_bytes_a,
+            $recovery_hash_a,
+            $decoder_original_a,
+            $decoder_recovery_a,
+            $seed_a,
+        );
+
+        if $explicit_reset {
+            encoder
+                .reset($original_count_b, $recovery_count_b, $shard_bytes_b)
+                .unwrap();
+
+            decoder
+                .reset($original_count_b, $recovery_count_b, $shard_bytes_b)
+                .unwrap();
+        }
+
+        test_util::roundtrip::<$Rate<_>, _, _>(
+            &mut encoder,
+            &mut decoder,
+            $original_count_b,
+            $shard_bytes_b,
+            $recovery_hash_b,
+            $decoder_original_b,
+            $decoder_recovery_b,
+            $seed_b,
+        );
+    };
+}
+
+// ======================================================================
+// RATE ENCODER - TEST ERRORS
+
+macro_rules! test_rate_encoder_errors {
+    ($Encoder:ident) => {
+        #[test]
+        fn different_shard_size_in_add_original_shard() {
+            let mut encoder = $Encoder::new(1, 1, 64, NoSimd::new(), None).unwrap();
+            assert_eq!(
+                encoder.add_original_shard([0; 128]),
+                Err(Error::DifferentShardSize {
+                    shard_bytes: 64,
+                    got: 128
+                }),
+            );
+        }
+
+        #[test]
+        fn invalid_shard_size_in_new() {
+            assert_eq!(
+                $Encoder::new(1, 1, 123, NoSimd::new(), None).err(),
+                Some(Error::InvalidShardSize { shard_bytes: 123 }),
+            );
+        }
+
+        #[test]
+        fn invalid_shard_size_in_reset() {
+            let mut encoder = $Encoder::new(1, 1, 64, NoSimd::new(), None).unwrap();
+            assert_eq!(
+                encoder.reset(1, 1, 123),
+                Err(Error::InvalidShardSize { shard_bytes: 123 }),
+            );
+        }
+
+        #[test]
+        fn too_few_original_shards() {
+            let mut encoder = $Encoder::new(1, 1, 64, NoSimd::new(), None).unwrap();
+            assert_eq!(
+                encoder.encode().err(),
+                Some(Error::TooFewOriginalShards {
+                    original_count: 1,
+                    original_received_count: 0
+                }),
+            );
+        }
+
+        #[test]
+        fn too_many_original_shards() {
+            let mut encoder = $Encoder::new(1, 1, 64, NoSimd::new(), None).unwrap();
+            encoder.add_original_shard([0; 64]).unwrap();
+            assert_eq!(
+                encoder.add_original_shard([0; 64]),
+                Err(Error::TooManyOriginalShards { original_count: 1 }),
+            );
+        }
+
+        #[test]
+        fn unsupported_shard_count_in_new() {
+            assert_eq!(
+                $Encoder::new(0, 1, 64, NoSimd::new(), None).err(),
+                Some(Error::UnsupportedShardCount {
+                    original_count: 0,
+                    recovery_count: 1,
+                }),
+            );
+        }
+
+        #[test]
+        fn unsupported_shard_count_in_reset() {
+            let mut encoder = $Encoder::new(1, 1, 64, NoSimd::new(), None).unwrap();
+            assert_eq!(
+                encoder.reset(0, 1, 64),
+                Err(Error::UnsupportedShardCount {
+                    original_count: 0,
+                    recovery_count: 1,
+                }),
+            );
+        }
+    };
+}
+
+// ======================================================================
+// RATE DECODER - TEST ERRORS
+
+macro_rules! test_rate_decoder_errors {
+    ($Decoder:ident) => {
+        #[test]
+        fn different_shard_size_in_add_original_shard() {
+            let mut decoder = $Decoder::new(1, 1, 64, NoSimd::new(), None).unwrap();
+            assert_eq!(
+                decoder.add_original_shard(0, [0; 128]),
+                Err(Error::DifferentShardSize {
+                    shard_bytes: 64,
+                    got: 128
+                }),
+            );
+        }
+
+        #[test]
+        fn different_shard_size_in_add_recovery_shard() {
+            let mut decoder = $Decoder::new(1, 1, 64, NoSimd::new(), None).unwrap();
+            assert_eq!(
+                decoder.add_recovery_shard(0, [0; 128]),
+                Err(Error::DifferentShardSize {
+                    shard_bytes: 64,
+                    got: 128
+                }),
+            );
+        }
+
+        #[test]
+        fn duplicate_shard_index_in_add_original_shard() {
+            let mut decoder = $Decoder::new(1, 1, 64, NoSimd::new(), None).unwrap();
+            decoder.add_original_shard(0, [0; 64]).unwrap();
+            assert_eq!(
+                decoder.add_original_shard(0, [0; 64]),
+                Err(Error::DuplicateOriginalShardIndex { index: 0 }),
+            );
+        }
+
+        #[test]
+        fn duplicate_shard_index_in_add_recovert_shard() {
+            let mut decoder = $Decoder::new(1, 1, 64, NoSimd::new(), None).unwrap();
+            decoder.add_recovery_shard(0, [0; 64]).unwrap();
+            assert_eq!(
+                decoder.add_recovery_shard(0, [0; 64]),
+                Err(Error::DuplicateRecoveryShardIndex { index: 0 }),
+            );
+        }
+
+        #[test]
+        fn invalid_original_shard_index() {
+            let mut decoder = $Decoder::new(1, 1, 64, NoSimd::new(), None).unwrap();
+            assert_eq!(
+                decoder.add_original_shard(1, [0; 64]),
+                Err(Error::InvalidOriginalShardIndex {
+                    original_count: 1,
+                    index: 1,
+                }),
+            );
+        }
+
+        #[test]
+        fn invalid_recovery_shard_index() {
+            let mut decoder = $Decoder::new(1, 1, 64, NoSimd::new(), None).unwrap();
+            assert_eq!(
+                decoder.add_recovery_shard(1, [0; 64]),
+                Err(Error::InvalidRecoveryShardIndex {
+                    recovery_count: 1,
+                    index: 1,
+                }),
+            );
+        }
+
+        #[test]
+        fn invalid_shard_size_in_new() {
+            assert_eq!(
+                $Decoder::new(1, 1, 123, NoSimd::new(), None).err(),
+                Some(Error::InvalidShardSize { shard_bytes: 123 }),
+            );
+        }
+
+        #[test]
+        fn invalid_shard_size_in_reset() {
+            let mut decoder = $Decoder::new(1, 1, 64, NoSimd::new(), None).unwrap();
+            assert_eq!(
+                decoder.reset(1, 1, 123),
+                Err(Error::InvalidShardSize { shard_bytes: 123 }),
+            );
+        }
+
+        #[test]
+        fn not_enough_shards() {
+            let mut decoder = $Decoder::new(1, 1, 64, NoSimd::new(), None).unwrap();
+            assert_eq!(
+                decoder.decode().err(),
+                Some(Error::NotEnoughShards {
+                    original_count: 1,
+                    original_received_count: 0,
+                    recovery_received_count: 0,
+                }),
+            );
+        }
+
+        #[test]
+        fn unsupported_shard_count_in_new() {
+            assert_eq!(
+                $Decoder::new(0, 1, 64, NoSimd::new(), None).err(),
+                Some(Error::UnsupportedShardCount {
+                    original_count: 0,
+                    recovery_count: 1,
+                }),
+            );
+        }
+
+        #[test]
+        fn unsupported_shard_count_in_reset() {
+            let mut decoder = $Decoder::new(1, 1, 64, NoSimd::new(), None).unwrap();
+            assert_eq!(
+                decoder.reset(0, 1, 64),
+                Err(Error::UnsupportedShardCount {
+                    original_count: 0,
+                    recovery_count: 1,
+                }),
+            );
+        }
+    };
+}
+
+// ============================================================
+// RECOVERY HASHES
+
+// SHA256 hashes of some recovery shards.
+// - shard_bytes = 1024 (or 64 if mentioned explicitly)
+// - Original shards are from `generate_original`.
+
+// ==================================================
+// TINY
+
+// (original_count, recovery_count, seed, hash)
+
+#[rustfmt::skip]
+pub(crate) const DEFAULT_TINY: &[(usize, usize, u8, &str)] = &[
+    // single original/recovery
+    (1, 1, 111, "17e3108283196d04f027f01c23577076a1db3c4caeed6269995733ffef6d3398"), // EITHER
+    (1, 2, 112, "cabef22cfe49d9167b4cd40a6a6437b52496af28ff1dcfb6e207c9c337d5affa"), // LOW
+    (1, 3, 113, "fda3b35bb91a71b0ba7b6ea437fbf74648ea6e94a4ce2be885b0cd14f0d8005b"), // LOW
+    (2, 1, 121, "7fc8ed9211851121e4a80cf995b113f498c20646e18dc312db7d27efd6cd60d2"), // HIGH
+    (3, 1, 131, "1f118cce8f4c528a4f68c9215d6996e982bce81ba7c0132193a65961f777943a"), // HIGH
+
+    // 2 .. 8
+    (2, 2, 122, "7d53725125394f5913300b40f09055bb75e6335a936305070da3707c9211dd26"), // EITHER
+    (2, 3, 123, LOW_2_3),                                                            // LOW
+    (2, 4, 124, "3ce3eab3625dae68e164daee1e2bd3304ac7cdcf1ffdd8f81560c2def733e567"), // LOW
+    (2, 5, 125, LOW_2_5),                                                            // LOW
+    (2, 6, 126, "f7d65a6334421428930e8223962f5e280a6ed75a252cb82b9ae6a27314708013"), // LOW
+    (2, 7, 127, "cd75f744cf44cf7036758b3bc096192317b962cf2f32039bd67a535ae8b5d251"), // LOW
+    (2, 8, 128, "07964065a913b631645d6e251908650fc4eba4a8b5844cdaab43d76d5f4f3a79"), // LOW
+    (3, 2, 132, HIGH_3_2),                                                           // HIGH
+    (3, 3, 133, EITHER_3_3),                                                         // EITHER
+    (3, 4, 134, EITHER_3_4),                                                         // EITHER
+    (3, 5, 135, LOW_3_5),                                                            // LOW
+    (3, 6, 136, "531b4db2b2148c609fe1b3d6ab4e6a012193f28647c0eb1ed13344a94057c6fe"), // LOW
+    (3, 7, 137, "053434cf04886f7f3bef43743700046f57d2e38cb5682ceaeaccf893c5120c78"), // LOW
+    (3, 8, 138, "848b7bc12174a1a74a30aaeccf875fe2be82d4cc8f9b992f04e45607839cd4ff"), // LOW
+    (4, 2, 142, "e0c05cb0f4e699694907ce9a5c16034e5b1d8b4eee51942ba87854149036d8f1"), // HIGH
+    (4, 3, 143, EITHER_4_3),                                                         // EITHER
+    (4, 4, 144, "df2c520f15464bfe3448ebbbfbb6bfc2f64237a7a20cfa65bc6f1046e97470d2"), // EITHER
+    (4, 5, 145, "e7709cc3f00e377e15e624df78a7a0a76b49ed5e4c0bc9035dda9e846935746a"), // LOW
+    (4, 6, 146, "8852c9526508d934315a3e07dd90f9389f5a6639ed7f3aaee74b066cccbcf033"), // LOW
+    (4, 7, 147, "4475531153c9ea65743a64e4f661746dc5cd4c7a70bdc06812f1b73d00d65f36"), // LOW
+    (4, 8, 148, "b682387ee7e5e6a42ff5c8b8050c301225f84f98961ba5aee739f3f20d3cae02"), // LOW
+    (5, 2, 152, HIGH_5_2),                                                           // HIGH
+    (5, 3, 153, HIGH_5_3),                                                           // HIGH
+    (5, 4, 154, "3eb67a0993903f688d767928d2d35d5762f25fdb196a5f6a0e49b36f9a5a229b"), // HIGH
+    (5, 5, 155, "41b83349a18ec3c20fb19879e0e513512c60078e57b4ff98f57cae0d93effc7c"), // EITHER
+    (5, 6, 156, "67766507a7cedaa663f798354f274829703143cd068f68075f6380976a65c99a"), // EITHER
+    (5, 7, 157, "a47d23ed58eec1c809799b1c63bcfe75e527489985cf91c0f42f7ae10c9e8abe"), // EITHER
+    (5, 8, 158, "ff33eb1539f0573faaf0993c63507ed61d809527505fd26e8e2aa2511e3622c5"), // EITHER
+    (6, 2, 162, "6e45e014adf6201172f45c23e2918e2b628c55bc60d9e88c359337758ca63e27"), // HIGH
+    (6, 3, 163, "b2295f7f0f055476f9385cdfbba27512d3fef0aee872b9794193a457132af7d4"), // HIGH
+    (6, 4, 164, "0242981363ddab69e3f3f7bac4e0aeb8d64ed040eb1925d0d63fbba864a7aebc"), // HIGH
+    (6, 5, 165, "0619cf8025f6c6f25b2c4c3609f71224de518108b4d6f577762c5160f2753733"), // EITHER
+    (6, 6, 166, "27472dea67ef5470579f8f2fcab5f9370334a91af49382780a6ccf0df6027a98"), // EITHER
+    (6, 7, 167, "afffabb84e4987e15af741ac0f919fa73af954fe44c0da223cb67bdcfd3415c2"), // EITHER
+    (6, 8, 168, "129b44878eef071c0b2e92b17cdb15139d2d0744f8f5306fa6a4c100396a1e3c"), // EITHER
+    (7, 2, 172, "b07a9064742825258206c4c4ab041305ad6d3646380740bb54b938962630df6c"), // HIGH
+    (7, 3, 173, "64061b0af048381c22e8b08c19a1148de6859a7bcc26ddee348bdf6006554578"), // HIGH
+    (7, 4, 174, "4cdab47a556582096b8195a5bf30f63d3effbb1f9ad9e25a48b41ba260739247"), // HIGH
+    (7, 5, 175, "feb342a8e0b9c33d120983c3f4df95ca19fded3e0ed3484a0d02f5ec27961d4b"), // EITHER
+    (7, 6, 176, "7f127b5c827854f721c7592faecb11a239894c653ac6efb95cfcf54e1348c326"), // EITHER
+    (7, 7, 177, "b03e8b01d887050f762c40cce37042a8b5a8afb601a2476eb138f65b9234efe7"), // EITHER
+    (7, 8, 178, "eacf451d3112d43be2619b01bbc40915a109d387e21f7b3c083f00fa7abcdf68"), // EITHER
+    (8, 2, 182, "dcf2306c7f9aab2dd0590708864d68ba1a6484632c3a7a4b1c1c56a3d6b0bb50"), // HIGH
+    (8, 3, 183, "83c2cdcc981c627f778f061c7eadc6be49e7665c4ed591a0884cfa4adc3a20cf"), // HIGH
+    (8, 4, 184, "356d75c370e3ed29c7d458a9d5f5b48798119d0d32dc8e742a423f94647eb085"), // HIGH
+    (8, 5, 185, "4b0a3bd10e64f8db57abeddb028ce7c93b89d84b59c2e4805eecf1ef43aef858"), // EITHER
+    (8, 6, 186, "44ffaeac7c1585d8b8c3afd813ea388b3dcceeebe3ef46bab4219df554ef057f"), // EITHER
+    (8, 7, 187, "2627846d37793df3ddeb1922892c2723a5fefe36b6d244506fa810c11fb70df7"), // EITHER
+    (8, 8, 188, "b8da62e75f305a59128b2257162605e541fd252aca8f74ceb2a91fb2a3276d6e"), // EITHER
+];
+
+#[rustfmt::skip]
+pub(crate) const HIGH_TINY: &[(usize, usize, u8, &str)] = &[
+    // single original/recovery
+    (1, 1, 111, "17e3108283196d04f027f01c23577076a1db3c4caeed6269995733ffef6d3398"), // EITHER
+    (1, 2, 112, "a5bdc2eb1cd88327a675d2fa1df587ea3e7fa42e74975fd8577c5c248ab51824"),
+    (1, 3, 113, "ea7c19a1de8308599d84334059c6ca6c1e574ea3cfbe680f749754af986a0b18"),
+    (2, 1, 121, "7fc8ed9211851121e4a80cf995b113f498c20646e18dc312db7d27efd6cd60d2"),
+    (3, 1, 131, "1f118cce8f4c528a4f68c9215d6996e982bce81ba7c0132193a65961f777943a"),
+
+    // 2 .. 8
+    (2, 2, 122, "7d53725125394f5913300b40f09055bb75e6335a936305070da3707c9211dd26"), // EITHER
+    (2, 3, 123, "19fb5ce2d7a3db95f819017cf49050eb8cd4b3c626cedf5ca13f6d2ab4eb43c4"),
+    (2, 4, 124, "ed0d8db29d770cbafc4fa2ebe5ab991b3a0ee2dd8089f82cbb35de4670ccee50"),
+    (2, 5, 125, "9b2818b4442619aed74f277ea7a97aa9d0a92f1c1413fea97091fcd2e696f03a"),
+    (2, 6, 126, "cac3955636c60dfa82d0a8383949bbdf0a7c5bbb89422fa764cccea0a927d5d7"),
+    (2, 7, 127, "42f34812f503a419fc6ddaee8f3947afc1fc533e9c8b29eae746addceebc1748"),
+    (2, 8, 128, "1212dc3e1f8e8743996c303a05a0401d03c72b67dfefc1aaaa2cc07c31f47710"),
+    (3, 2, 132, HIGH_3_2),
+    (3, 3, 133, EITHER_3_3),
+    (3, 4, 134, EITHER_3_4),
+    (3, 5, 135, "eb5dc236bdd7aa7d8a927524118161f2dd8e51526653cd31194ee8ff007a8062"),
+    (3, 6, 136, "2338d6073e4e5103483f748312f5872141f51dc2fa510695837ea99e3508892c"),
+    (3, 7, 137, "6559a2478ce0f362e08934dbec840f3be6a42e3fa9591824548b15811717cf49"),
+    (3, 8, 138, "afe6ecd8baf01b3514787a593c73276f1e24d29b4bd909ee0a26d16ea3d07844"),
+    (4, 2, 142, "e0c05cb0f4e699694907ce9a5c16034e5b1d8b4eee51942ba87854149036d8f1"),
+    (4, 3, 143, EITHER_4_3),
+    (4, 4, 144, "df2c520f15464bfe3448ebbbfbb6bfc2f64237a7a20cfa65bc6f1046e97470d2"), // EITHER
+    (4, 5, 145, "57e72af02f975404d6d3905394782da034581c137c08c5ebe73acb2d071b38bb"),
+    (4, 6, 146, "d07ad54dc275f3c16d68a86fb4893c4e7a2dda9edd4dcf5c90d09ee5c647993a"),
+    (4, 7, 147, "32266a50e6f97a901f8eae8d633fcf98d27a2c9e71c8369fbe17acc290d5f817"),
+    (4, 8, 148, "0f157da98d800fe60dbb381f3473e122e15549d418bc2cb5f3e57e32fad033b8"),
+    (5, 2, 152, HIGH_5_2),
+    (5, 3, 153, HIGH_5_3),
+    (5, 4, 154, "3eb67a0993903f688d767928d2d35d5762f25fdb196a5f6a0e49b36f9a5a229b"),
+    (5, 5, 155, "41b83349a18ec3c20fb19879e0e513512c60078e57b4ff98f57cae0d93effc7c"), // EITHER
+    (5, 6, 156, "67766507a7cedaa663f798354f274829703143cd068f68075f6380976a65c99a"), // EITHER
+    (5, 7, 157, "a47d23ed58eec1c809799b1c63bcfe75e527489985cf91c0f42f7ae10c9e8abe"), // EITHER
+    (5, 8, 158, "ff33eb1539f0573faaf0993c63507ed61d809527505fd26e8e2aa2511e3622c5"), // EITHER
+    (6, 2, 162, "6e45e014adf6201172f45c23e2918e2b628c55bc60d9e88c359337758ca63e27"),
+    (6, 3, 163, "b2295f7f0f055476f9385cdfbba27512d3fef0aee872b9794193a457132af7d4"),
+    (6, 4, 164, "0242981363ddab69e3f3f7bac4e0aeb8d64ed040eb1925d0d63fbba864a7aebc"),
+    (6, 5, 165, "0619cf8025f6c6f25b2c4c3609f71224de518108b4d6f577762c5160f2753733"), // EITHER
+    (6, 6, 166, "27472dea67ef5470579f8f2fcab5f9370334a91af49382780a6ccf0df6027a98"), // EITHER
+    (6, 7, 167, "afffabb84e4987e15af741ac0f919fa73af954fe44c0da223cb67bdcfd3415c2"), // EITHER
+    (6, 8, 168, "129b44878eef071c0b2e92b17cdb15139d2d0744f8f5306fa6a4c100396a1e3c"), // EITHER
+    (7, 2, 172, "b07a9064742825258206c4c4ab041305ad6d3646380740bb54b938962630df6c"),
+    (7, 3, 173, "64061b0af048381c22e8b08c19a1148de6859a7bcc26ddee348bdf6006554578"),
+    (7, 4, 174, "4cdab47a556582096b8195a5bf30f63d3effbb1f9ad9e25a48b41ba260739247"),
+    (7, 5, 175, "feb342a8e0b9c33d120983c3f4df95ca19fded3e0ed3484a0d02f5ec27961d4b"), // EITHER
+    (7, 6, 176, "7f127b5c827854f721c7592faecb11a239894c653ac6efb95cfcf54e1348c326"), // EITHER
+    (7, 7, 177, "b03e8b01d887050f762c40cce37042a8b5a8afb601a2476eb138f65b9234efe7"), // EITHER
+    (7, 8, 178, "eacf451d3112d43be2619b01bbc40915a109d387e21f7b3c083f00fa7abcdf68"), // EITHER
+    (8, 2, 182, "dcf2306c7f9aab2dd0590708864d68ba1a6484632c3a7a4b1c1c56a3d6b0bb50"),
+    (8, 3, 183, "83c2cdcc981c627f778f061c7eadc6be49e7665c4ed591a0884cfa4adc3a20cf"),
+    (8, 4, 184, "356d75c370e3ed29c7d458a9d5f5b48798119d0d32dc8e742a423f94647eb085"),
+    (8, 5, 185, "4b0a3bd10e64f8db57abeddb028ce7c93b89d84b59c2e4805eecf1ef43aef858"), // EITHER
+    (8, 6, 186, "44ffaeac7c1585d8b8c3afd813ea388b3dcceeebe3ef46bab4219df554ef057f"), // EITHER
+    (8, 7, 187, "2627846d37793df3ddeb1922892c2723a5fefe36b6d244506fa810c11fb70df7"), // EITHER
+    (8, 8, 188, "b8da62e75f305a59128b2257162605e541fd252aca8f74ceb2a91fb2a3276d6e"), // EITHER
+];
+
+#[rustfmt::skip]
+pub(crate) const LOW_TINY: &[(usize, usize, u8, &str)] = &[
+    // single original/recovery
+    (1, 1, 111, "17e3108283196d04f027f01c23577076a1db3c4caeed6269995733ffef6d3398"), // EITHER
+    (1, 2, 112, "cabef22cfe49d9167b4cd40a6a6437b52496af28ff1dcfb6e207c9c337d5affa"),
+    (1, 3, 113, "fda3b35bb91a71b0ba7b6ea437fbf74648ea6e94a4ce2be885b0cd14f0d8005b"),
+    (2, 1, 121, "446657e70765196f11c9df04fcacc74ef915cdb634633e0d5755c1ca6e46e323"),
+    (3, 1, 131, "b93350bf3318af823674c954d274f51ed1bef1a49a5240338d31440aebbf8af5"),
+
+    // 2 .. 8
+    (2, 2, 122, "7d53725125394f5913300b40f09055bb75e6335a936305070da3707c9211dd26"), // EITHER
+    (2, 3, 123, LOW_2_3),
+    (2, 4, 124, "3ce3eab3625dae68e164daee1e2bd3304ac7cdcf1ffdd8f81560c2def733e567"),
+    (2, 5, 125, LOW_2_5),
+    (2, 6, 126, "f7d65a6334421428930e8223962f5e280a6ed75a252cb82b9ae6a27314708013"),
+    (2, 7, 127, "cd75f744cf44cf7036758b3bc096192317b962cf2f32039bd67a535ae8b5d251"),
+    (2, 8, 128, "07964065a913b631645d6e251908650fc4eba4a8b5844cdaab43d76d5f4f3a79"),
+    (3, 2, 132, "1e4d449a4d59f974258ff2fb8dfde7ea6554bd1b5a7d524d801cc9e0503c0f0a"),
+    (3, 3, 133, EITHER_3_3),
+    (3, 4, 134, EITHER_3_4),
+    (3, 5, 135, LOW_3_5),
+    (3, 6, 136, "531b4db2b2148c609fe1b3d6ab4e6a012193f28647c0eb1ed13344a94057c6fe"),
+    (3, 7, 137, "053434cf04886f7f3bef43743700046f57d2e38cb5682ceaeaccf893c5120c78"),
+    (3, 8, 138, "848b7bc12174a1a74a30aaeccf875fe2be82d4cc8f9b992f04e45607839cd4ff"),
+    (4, 2, 142, "35a5d572f75bbf8b2a850d503bf988a10dc2f30f15ff5cde611f73ea6cc44d55"),
+    (4, 3, 143, EITHER_4_3),
+    (4, 4, 144, "df2c520f15464bfe3448ebbbfbb6bfc2f64237a7a20cfa65bc6f1046e97470d2"), // EITHER
+    (4, 5, 145, "e7709cc3f00e377e15e624df78a7a0a76b49ed5e4c0bc9035dda9e846935746a"),
+    (4, 6, 146, "8852c9526508d934315a3e07dd90f9389f5a6639ed7f3aaee74b066cccbcf033"),
+    (4, 7, 147, "4475531153c9ea65743a64e4f661746dc5cd4c7a70bdc06812f1b73d00d65f36"),
+    (4, 8, 148, "b682387ee7e5e6a42ff5c8b8050c301225f84f98961ba5aee739f3f20d3cae02"),
+    (5, 2, 152, "6728e606f2f9dd9559b0370b495685444519c04ffdcfa5120398a0516858a83f"),
+    (5, 3, 153, "b458c5b07fbacfebb9a836251548505b43d5cbca872eecfad098f2bdda111824"),
+    (5, 4, 154, "e82d6583b78c42479c98311daa5aa620b64979259bf49ff13c75daf889d3bf22"),
+    (5, 5, 155, "41b83349a18ec3c20fb19879e0e513512c60078e57b4ff98f57cae0d93effc7c"), // EITHER
+    (5, 6, 156, "67766507a7cedaa663f798354f274829703143cd068f68075f6380976a65c99a"), // EITHER
+    (5, 7, 157, "a47d23ed58eec1c809799b1c63bcfe75e527489985cf91c0f42f7ae10c9e8abe"), // EITHER
+    (5, 8, 158, "ff33eb1539f0573faaf0993c63507ed61d809527505fd26e8e2aa2511e3622c5"), // EITHER
+    (6, 2, 162, "218e25db4678002119fe557c7fc7c6d80fd43c1a9cfc779623ce35455dc8ff75"),
+    (6, 3, 163, "ac7d0eeb90253d1e846b2e741557320b80bcf2ae0a8901a18c2d137230e8994b"),
+    (6, 4, 164, "c42c4deb89c2c3f19856628e887cc7db72165e5d836e584ac4fdbfac0a356b56"),
+    (6, 5, 165, "0619cf8025f6c6f25b2c4c3609f71224de518108b4d6f577762c5160f2753733"), // EITHER
+    (6, 6, 166, "27472dea67ef5470579f8f2fcab5f9370334a91af49382780a6ccf0df6027a98"), // EITHER
+    (6, 7, 167, "afffabb84e4987e15af741ac0f919fa73af954fe44c0da223cb67bdcfd3415c2"), // EITHER
+    (6, 8, 168, "129b44878eef071c0b2e92b17cdb15139d2d0744f8f5306fa6a4c100396a1e3c"), // EITHER
+    (7, 2, 172, "1a435f1723561eead67bf9a37bda196814afe2c7b77cd82c3c438600ef616e61"),
+    (7, 3, 173, "86ab51f58f9a0f24deeb1ab83cff451983cf679ab9df81ef1a4daf9c3405495a"),
+    (7, 4, 174, "192979d61b5dbe112839bc0c4051945568a9ac7c4dc4c1d8e7cc6c4c27213bb9"),
+    (7, 5, 175, "feb342a8e0b9c33d120983c3f4df95ca19fded3e0ed3484a0d02f5ec27961d4b"), // EITHER
+    (7, 6, 176, "7f127b5c827854f721c7592faecb11a239894c653ac6efb95cfcf54e1348c326"), // EITHER
+    (7, 7, 177, "b03e8b01d887050f762c40cce37042a8b5a8afb601a2476eb138f65b9234efe7"), // EITHER
+    (7, 8, 178, "eacf451d3112d43be2619b01bbc40915a109d387e21f7b3c083f00fa7abcdf68"), // EITHER
+    (8, 2, 182, "ed7c5de1bd38abf2aeda70670ecc61caac6a133d742fe56e52c69e464ba2e9f5"),
+    (8, 3, 183, "98e3bbaf60b13e1b11d7a1ed3cc11686e10177ecfab8c7bfecf83c3f011ab353"),
+    (8, 4, 184, "dee6491a8007d007db853485dc55b013d2243b7ed9f3a62cd2d3fc77f0fd0899"),
+    (8, 5, 185, "4b0a3bd10e64f8db57abeddb028ce7c93b89d84b59c2e4805eecf1ef43aef858"), // EITHER
+    (8, 6, 186, "44ffaeac7c1585d8b8c3afd813ea388b3dcceeebe3ef46bab4219df554ef057f"), // EITHER
+    (8, 7, 187, "2627846d37793df3ddeb1922892c2723a5fefe36b6d244506fa810c11fb70df7"), // EITHER
+    (8, 8, 188, "b8da62e75f305a59128b2257162605e541fd252aca8f74ceb2a91fb2a3276d6e"), // EITHER
+];
+
+// ==================================================
+// EITHER RATE
+
+// 3 original ; 3 recovery ; 133 seed
+pub(crate) const EITHER_3_3: &str =
+    "9502b325f6f50a25e6816144603f1b0cda09e00b4949965babbaf8266ff81e84";
+
+// 3 original ; 4 recovery ; 134 seed
+pub(crate) const EITHER_3_4: &str =
+    "e534a7260f1e8aca3c2983503138f158d8977b82f1d3c09b2cedb66d01c01e0b";
+
+// 4 original ; 3 recovery ; 143 seed
+pub(crate) const EITHER_4_3: &str =
+    "e43d0903b619f4b17c5389ce869317ce549e3f6d2fe3aa2805ef4d4fb7adce74";
+
+// 32768 original ; 32768 recovery ; 11 seed ; shard_bytes = 64
+pub(crate) const EITHER_32768_32768_11: &str =
+    "432025ead0e3f432f74e30500076a8c2b5554f5dfb7767b62fc3a8126eef7389";
+
+// ==================================================
+// HIGH RATE
+
+// 3 original ; 2 recovery ; 132 seed
+pub(crate) const HIGH_3_2: &str =
+    "afd47751b63fb0a62671e0e4a124a8ba51eb6d4b55f79c3dd54a60c28583634f";
+
+// 3 original ; 2 recovery ; 232 seed
+pub(crate) const HIGH_3_2_232: &str =
+    "2ee88d495ae1fff216f2865dbbdda2e1a051c5d98c7117a2a0b2ebcdfb57cd33";
+
+// 5 original ; 2 recovery ; 152 seed
+pub(crate) const HIGH_5_2: &str =
+    "5387208d6756e3e79558a9b9ddebe0439eb3b08eec2393d4acafce6fc5332683";
+
+// 5 original ; 3 recovery ; 153 seed
+pub(crate) const HIGH_5_3: &str =
+    "6f53d5175900d70b4821d1d0c947d0c47a802add0d620bfa72d57dd983dfc156";
+
+// 3000 original ; 30000 recovery ; 14 seed ; shard_bytes = 64
+// NOTE: Chunk size is 4096, with partial chunk at end.
+pub(crate) const HIGH_3000_30000_14: &str =
+    "2d7d97fd92be0721b4fcfac8814fe0dd9ad07959eb40558c6ed9af09943fed4e";
+
+// 60000 original ; 3000 recovery ; 12 seed ; shard_bytes = 64
+// NOTE: Chunk size is 4096, with partial chunk at end.
+pub(crate) const HIGH_60000_3000_12: &str =
+    "88e68e1d86a0fc168a549e195845d20b49ff85734db20d560c36ff2e14f78676";
+
+// ==================================================
+// LOW RATE
+
+// 2 original ; 3 recovery ; 123 seed
+pub(crate) const LOW_2_3: &str = "f682a6c87c2bcd3e0feddbeff5c34f9d14026b78c44e5fdb5cf3cf71ec15e1f4";
+
+// 2 original ; 3 recovery ; 223 seed
+pub(crate) const LOW_2_3_223: &str =
+    "2dc25a5dc42b2d1f94a80489e9f357a48f011f931cdac3ed7c85e2abb07063a2";
+
+// 2 original ; 5 recovery ; 125 seed
+pub(crate) const LOW_2_5: &str = "24449ae058f54a33b3b7ee568761e68e36bd7171ee2a3271a0fbd2f07ac65a7c";
+
+// 3 original ; 5 recovery ; 135 seed
+pub(crate) const LOW_3_5: &str = "c23920347f00328dceca9cb6012d797d97f366617cf27aae5c45b4f0b8491552";
+
+// 3000 original ; 60000 recovery ; 13 seed ; shard_bytes = 64
+// NOTE: Chunk size is 4096, with partial chunk at end.
+pub(crate) const LOW_3000_60000_13: &str =
+    "d44f9c9ed9158f8aad140794e64a730577327f195753af21b810090966b4b4df";
+
+// 30000 original ; 3000 recovery ; 15 seed ; shard_bytes = 64
+// NOTE: Chunk size is 4096, with partial chunk at end.
+pub(crate) const LOW_30000_3000_15: &str =
+    "202f99a2ade121d2404e967d5c04ff390f7a147070a2dcbe71dcf3baeafdf93a";
diff --git a/util/run-all-tests b/util/run-all-tests
new file mode 100755
index 0000000..8eff266
--- /dev/null
+++ b/util/run-all-tests
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+
+set -e
+
+cargo test
+cargo test -- --ignored