diff --git a/CMakeLists.txt b/CMakeLists.txt
index 590f34609..71f1fc1b0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required (VERSION 3.9)
-project (Qrack VERSION 9.11.11 DESCRIPTION "High Performance Quantum Bit Simulation" LANGUAGES CXX)
+project (Qrack VERSION 9.11.12 DESCRIPTION "High Performance Quantum Bit Simulation" LANGUAGES CXX)
 
 # Installation commands
 include (GNUInstallDirs)
diff --git a/include/qengine_cpu.hpp b/include/qengine_cpu.hpp
index 1ffd34d83..16b23aa29 100644
--- a/include/qengine_cpu.hpp
+++ b/include/qengine_cpu.hpp
@@ -41,14 +41,12 @@ class QEngineCPU : public QEngine {
     DispatchQueue dispatchQueue;
 #endif
 
-    StateVectorSparsePtr CastStateVecSparse() { return std::dynamic_pointer_cast<StateVectorSparse>(stateVec); }
-
 public:
     QEngineCPU(bitLenInt qBitCount, const bitCapInt& initState, qrack_rand_gen_ptr rgp = nullptr,
         const complex& phaseFac = CMPLX_DEFAULT_ARG, bool doNorm = false, bool randomGlobalPhase = true,
-        bool ignored = false, int64_t ignored2 = -1, bool useHardwareRNG = true, bool useSparseStateVec = false,
-        real1_f norm_thresh = REAL1_EPSILON, std::vector<int64_t> ignored3 = {}, bitLenInt ignored4 = 0U,
-        real1_f ignored5 = _qrack_qunit_sep_thresh);
+        bool ignored = false, int64_t ignored2 = -1, bool useHardwareRNG = true, bool ignored3 = false,
+        real1_f norm_thresh = REAL1_EPSILON, std::vector<int64_t> ignored4 = {}, bitLenInt ignored5 = 0U,
+        real1_f ignored6 = _qrack_qunit_sep_thresh);
 
     ~QEngineCPU() { Dump(); }
 
diff --git a/include/statevector.hpp b/include/statevector.hpp
index 54fc631ea..4961ce017 100644
--- a/include/statevector.hpp
+++ b/include/statevector.hpp
@@ -22,9 +22,6 @@
 #include <future>
 #endif
 
-#include <unordered_map>
-#define SparseStateVecMap std::unordered_map<bitCapIntOcl, complex>
-
 #if ENABLE_COMPLEX_X2
 #if FPPOW == 5
 #include "common/complex8x2simd.hpp"
@@ -36,7 +33,6 @@
 namespace Qrack {
 
 class StateVectorArray;
-class StateVectorSparse;
 
 // This is a buffer struct that's capable of representing controlled single bit gates and arithmetic, when subclassed.
 class StateVector : public ParallelFor {
@@ -74,7 +70,6 @@ class StateVector : public ParallelFor {
     virtual void copy(StateVectorPtr toCopy) = 0;
     virtual void shuffle(StateVectorPtr svp) = 0;
     virtual void get_probs(real1* outArray) = 0;
-    virtual bool is_sparse() = 0;
 };
 
 class StateVectorArray : public StateVector {
@@ -217,332 +212,5 @@ class StateVectorArray : public StateVector {
         par_for(
             0, capacity, [&](const bitCapIntOcl& lcv, const unsigned& cpu) { outArray[lcv] = norm(amplitudes[lcv]); });
     }
-
-    bool is_sparse() { return false; }
-};
-
-class StateVectorSparse : public StateVector {
-protected:
-    SparseStateVecMap amplitudes;
-    std::mutex mtx;
-
-    complex readUnlocked(const bitCapIntOcl& i)
-    {
-        auto it = amplitudes.find(i);
-        return (it == amplitudes.end()) ? ZERO_CMPLX : it->second;
-    }
-
-    complex readLocked(const bitCapIntOcl& i)
-    {
-        std::lock_guard<std::mutex> lock(mtx);
-        return readUnlocked(i);
-    }
-
-public:
-    StateVectorSparse(bitCapIntOcl cap)
-        : StateVector(cap)
-        , amplitudes()
-    {
-    }
-
-    complex read(const bitCapIntOcl& i) { return isReadLocked ? readLocked(i) : readUnlocked(i); }
-
-#if ENABLE_COMPLEX_X2
-    complex2 read2(const bitCapIntOcl& i1, const bitCapIntOcl& i2)
-    {
-        if (isReadLocked) {
-            return complex2(readLocked(i1), readLocked(i2));
-        }
-        return complex2(readUnlocked(i1), readUnlocked(i2));
-    }
-#endif
-
-    void write(const bitCapIntOcl& i, const complex& c)
-    {
-        const bool isCSet = abs(c) > REAL1_EPSILON;
-        if (isCSet) {
-            std::lock_guard<std::mutex> lock(mtx);
-            amplitudes[i] = c;
-        } else {
-            std::lock_guard<std::mutex> lock(mtx);
-            amplitudes.erase(i);
-        }
-    }
-
-    void write2(const bitCapIntOcl& i1, const complex& c1, const bitCapIntOcl& i2, const complex& c2)
-    {
-        const bool isC1Set = abs(c1) > REAL1_EPSILON;
-        const bool isC2Set = abs(c2) > REAL1_EPSILON;
-        if (!isC1Set && !isC2Set) {
-            std::lock_guard<std::mutex> lock(mtx);
-            amplitudes.erase(i1);
-            amplitudes.erase(i2);
-        } else if (isC1Set && isC2Set) {
-            std::lock_guard<std::mutex> lock(mtx);
-            amplitudes[i1] = c1;
-            amplitudes[i2] = c2;
-        } else if (isC1Set) {
-            std::lock_guard<std::mutex> lock(mtx);
-            amplitudes.erase(i2);
-            amplitudes[i1] = c1;
-        } else {
-            std::lock_guard<std::mutex> lock(mtx);
-            amplitudes.erase(i1);
-            amplitudes[i2] = c2;
-        }
-    }
-
-    void clear()
-    {
-        std::lock_guard<std::mutex> lock(mtx);
-        amplitudes.clear();
-    }
-
-    void copy_in(const complex* copyIn)
-    {
-        if (!copyIn) {
-            clear();
-            return;
-        }
-
-        std::lock_guard<std::mutex> lock(mtx);
-        for (bitCapIntOcl i = 0U; i < capacity; ++i) {
-            if (abs(copyIn[i]) <= REAL1_EPSILON) {
-                amplitudes.erase(i);
-            } else {
-                amplitudes[i] = copyIn[i];
-            }
-        }
-    }
-
-    void copy_in(const complex* copyIn, const bitCapIntOcl offset, const bitCapIntOcl length)
-    {
-        if (!copyIn) {
-            std::lock_guard<std::mutex> lock(mtx);
-            for (bitCapIntOcl i = 0U; i < length; ++i) {
-                amplitudes.erase(i);
-            }
-
-            return;
-        }
-
-        std::lock_guard<std::mutex> lock(mtx);
-        for (bitCapIntOcl i = 0U; i < length; ++i) {
-            if (abs(copyIn[i]) <= REAL1_EPSILON) {
-                amplitudes.erase(i);
-            } else {
-                amplitudes[i + offset] = copyIn[i];
-            }
-        }
-    }
-
-    void copy_in(
-        StateVectorPtr copyInSv, const bitCapIntOcl srcOffset, const bitCapIntOcl dstOffset, const bitCapIntOcl length)
-    {
-        StateVectorSparsePtr copyIn = std::dynamic_pointer_cast<StateVectorSparse>(copyInSv);
-
-        if (!copyIn) {
-            std::lock_guard<std::mutex> lock(mtx);
-            for (bitCapIntOcl i = 0U; i < length; ++i) {
-                amplitudes.erase(i + srcOffset);
-            }
-
-            return;
-        }
-
-        std::lock_guard<std::mutex> lock(mtx);
-        for (bitCapIntOcl i = 0U; i < length; ++i) {
-            complex amp = copyIn->read(i + srcOffset);
-            if (abs(amp) <= REAL1_EPSILON) {
-                amplitudes.erase(i + srcOffset);
-            } else {
-                amplitudes[i + dstOffset] = amp;
-            }
-        }
-    }
-
-    void copy_out(complex* copyOut)
-    {
-        for (bitCapIntOcl i = 0U; i < capacity; ++i) {
-            copyOut[i] = read(i);
-        }
-    }
-
-    void copy_out(complex* copyOut, const bitCapIntOcl offset, const bitCapIntOcl length)
-    {
-        for (bitCapIntOcl i = 0U; i < length; ++i) {
-            copyOut[i] = read(i + offset);
-        }
-    }
-
-    void copy(const StateVectorPtr toCopy) { copy(std::dynamic_pointer_cast<StateVectorSparse>(toCopy)); }
-
-    void copy(StateVectorSparsePtr toCopy)
-    {
-        std::lock_guard<std::mutex> lock(mtx);
-        amplitudes = toCopy->amplitudes;
-    }
-
-    void shuffle(StateVectorPtr svp) { shuffle(std::dynamic_pointer_cast<StateVectorSparse>(svp)); }
-
-    void shuffle(StateVectorSparsePtr svp)
-    {
-        const size_t halfCap = (size_t)(capacity >> 1U);
-        std::lock_guard<std::mutex> lock(mtx);
-        for (bitCapIntOcl i = 0U; i < halfCap; ++i) {
-            complex amp = svp->read(i);
-            svp->write(i, read(i + halfCap));
-            write(i + halfCap, amp);
-        }
-    }
-
-    void get_probs(real1* outArray)
-    {
-        for (bitCapIntOcl i = 0U; i < capacity; ++i) {
-            outArray[i] = norm(read(i));
-        }
-    }
-
-    bool is_sparse() { return (amplitudes.size() < (size_t)(capacity >> 1U)); }
-
-    std::vector<bitCapIntOcl> iterable()
-    {
-        std::vector<std::vector<bitCapIntOcl>> toRet(GetConcurrencyLevel());
-        std::vector<std::vector<bitCapIntOcl>>::iterator toRetIt;
-
-        // For lock_guard scope
-        if (true) {
-            std::lock_guard<std::mutex> lock(mtx);
-
-            par_for(0U, amplitudes.size(), [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
-                auto it = amplitudes.begin();
-                std::advance(it, lcv);
-                toRet[cpu].push_back(it->first);
-            });
-        }
-
-        for (int64_t i = (int64_t)(toRet.size() - 1U); i >= 0; i--) {
-            if (toRet[i].empty()) {
-                toRetIt = toRet.begin();
-                std::advance(toRetIt, i);
-                toRet.erase(toRetIt);
-            }
-        }
-
-        if (toRet.empty()) {
-            return {};
-        }
-
-        while (toRet.size() > 1U) {
-            // Work odd unit into collapse sequence:
-            if (toRet.size() & 1U) {
-                toRet[toRet.size() - 2U].insert(
-                    toRet[toRet.size() - 2U].end(), toRet[toRet.size() - 1U].begin(), toRet[toRet.size() - 1U].end());
-                toRet.pop_back();
-            }
-
-            const int64_t combineCount = (int64_t)(toRet.size() >> 1U);
-#if ENABLE_PTHREAD
-            std::vector<std::future<void>> futures(combineCount);
-            for (int64_t i = (combineCount - 1); i >= 0; i--) {
-                futures[i] = std::async(std::launch::async, [i, combineCount, &toRet]() {
-                    toRet[i].insert(toRet[i].end(), toRet[i + combineCount].begin(), toRet[i + combineCount].end());
-                    toRet[i + combineCount].clear();
-                });
-            }
-            for (int64_t i = (combineCount - 1); i >= 0; i--) {
-                futures[i].get();
-                toRet.pop_back();
-            }
-#else
-            for (int64_t i = (combineCount - 1); i >= 0; i--) {
-                toRet[i].insert(toRet[i].end(), toRet[i + combineCount].begin(), toRet[i + combineCount].end());
-                toRet.pop_back();
-            }
-#endif
-        }
-
-        return toRet[0U];
-    }
-
-    /// Returns empty if iteration should be over full set, otherwise just the iterable elements:
-    std::set<bitCapIntOcl> iterable(
-        const bitCapIntOcl& setMask, const bitCapIntOcl& filterMask = 0, const bitCapIntOcl& filterValues = 0)
-    {
-        if (!filterMask && filterValues) {
-            return {};
-        }
-
-        const bitCapIntOcl unsetMask = ~setMask;
-
-        std::vector<std::set<bitCapIntOcl>> toRet(GetConcurrencyLevel());
-        std::vector<std::set<bitCapIntOcl>>::iterator toRetIt;
-
-        // For lock_guard scope
-        if (true) {
-            std::lock_guard<std::mutex> lock(mtx);
-
-            if (!filterMask && !filterValues) {
-                par_for(0U, amplitudes.size(), [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
-                    auto it = amplitudes.begin();
-                    std::advance(it, lcv);
-                    toRet[cpu].insert(it->first & unsetMask);
-                });
-            } else {
-                const bitCapIntOcl unfilterMask = ~filterMask;
-                par_for(0U, amplitudes.size(), [&](const bitCapIntOcl lcv, const unsigned& cpu) {
-                    auto it = amplitudes.begin();
-                    std::advance(it, lcv);
-                    if ((it->first & filterMask) == filterValues) {
-                        toRet[cpu].insert(it->first & unsetMask & unfilterMask);
-                    }
-                });
-            }
-        }
-
-        for (int64_t i = (int64_t)(toRet.size() - 1U); i >= 0; i--) {
-            if (toRet[i].empty()) {
-                toRetIt = toRet.begin();
-                std::advance(toRetIt, i);
-                toRet.erase(toRetIt);
-            }
-        }
-
-        if (toRet.empty()) {
-            return {};
-        }
-
-        while (toRet.size() > 1U) {
-            // Work odd unit into collapse sequence:
-            if (toRet.size() & 1U) {
-                toRet[toRet.size() - 2U].insert(toRet[toRet.size() - 1U].begin(), toRet[toRet.size() - 1U].end());
-                toRet.pop_back();
-            }
-
-            const int64_t combineCount = (int64_t)(toRet.size() >> 1U);
-#if ENABLE_PTHREAD
-            std::vector<std::future<void>> futures(combineCount);
-            for (int64_t i = (combineCount - 1); i >= 0; i--) {
-                futures[i] = std::async(std::launch::async, [i, combineCount, &toRet]() {
-                    toRet[i].insert(toRet[i + combineCount].begin(), toRet[i + combineCount].end());
-                    toRet[i + combineCount].clear();
-                });
-            }
-
-            for (int64_t i = (combineCount - 1); i >= 0; i--) {
-                futures[i].get();
-                toRet.pop_back();
-            }
-#else
-            for (int64_t i = (combineCount - 1); i >= 0; i--) {
-                toRet[i].insert(toRet[i + combineCount].begin(), toRet[i + combineCount].end());
-                toRet.pop_back();
-            }
-#endif
-        }
-
-        return toRet[0U];
-    }
 };
-
 } // namespace Qrack
diff --git a/src/qengine/arithmetic.cpp b/src/qengine/arithmetic.cpp
index 3dd3b4625..3a05a7865 100644
--- a/src/qengine/arithmetic.cpp
+++ b/src/qengine/arithmetic.cpp
@@ -47,18 +47,12 @@ void QEngineCPU::ROL(bitLenInt shift, bitLenInt start, bitLenInt length)
     StateVectorPtr nStateVec = AllocStateVec(maxQPowerOcl);
     stateVec->isReadLocked = false;
 
-    ParallelFunc fn = [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
+    par_for(0, maxQPowerOcl, [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
         const bitCapIntOcl otherRes = lcv & otherMask;
         const bitCapIntOcl regInt = (lcv & regMask) >> start;
         const bitCapIntOcl outInt = (regInt >> (length - shift)) | ((regInt << shift) & lengthMask);
         nStateVec->write((outInt << start) | otherRes, stateVec->read(lcv));
-    };
-
-    if (stateVec->is_sparse()) {
-        par_for_set(CastStateVecSparse()->iterable(), fn);
-    } else {
-        par_for(0, maxQPowerOcl, fn);
-    }
+    });
 
     ResetStateVec(nStateVec);
 }
@@ -91,18 +85,12 @@ void QEngineCPU::INC(const bitCapInt& toAdd, bitLenInt inOutStart, bitLenInt len
     StateVectorPtr nStateVec = AllocStateVec(maxQPowerOcl);
     stateVec->isReadLocked = false;
 
-    ParallelFunc fn = [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
+    par_for(0, maxQPowerOcl, [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
         const bitCapIntOcl otherRes = lcv & otherMask;
         const bitCapIntOcl inOutInt = (lcv & inOutMask) >> inOutStart;
         const bitCapIntOcl outInt = (inOutInt + toAddOcl) & lengthMask;
         nStateVec->write((outInt << inOutStart) | otherRes, stateVec->read(lcv));
-    };
-
-    if (stateVec->is_sparse()) {
-        par_for_set(CastStateVecSparse()->iterable(), fn);
-    } else {
-        par_for(0, maxQPowerOcl, fn);
-    }
+    });
 
     ResetStateVec(nStateVec);
 }
@@ -249,7 +237,7 @@ void QEngineCPU::INCS(const bitCapInt& toAdd, bitLenInt inOutStart, bitLenInt le
     nStateVec->clear();
     stateVec->isReadLocked = false;
 
-    ParallelFunc fn = [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
+    par_for(0, maxQPowerOcl, [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
         const bitCapIntOcl otherRes = lcv & otherMask;
         const bitCapIntOcl inOutInt = (lcv & inOutMask) >> inOutStart;
         const bitCapIntOcl outInt = inOutInt + toAddOcl;
@@ -261,13 +249,7 @@ void QEngineCPU::INCS(const bitCapInt& toAdd, bitLenInt inOutStart, bitLenInt le
         } else {
             nStateVec->write(outRes, stateVec->read(lcv));
         }
-    };
-
-    if (stateVec->is_sparse()) {
-        par_for_set(CastStateVecSparse()->iterable(), fn);
-    } else {
-        par_for(0, maxQPowerOcl, fn);
-    }
+    });
 
     ResetStateVec(nStateVec);
 }
@@ -778,7 +760,7 @@ void QEngineCPU::INCBCD(const bitCapInt& toAdd, bitLenInt inOutStart, bitLenInt
     nStateVec->clear();
     stateVec->isReadLocked = false;
 
-    ParallelFunc fn = [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
+    par_for(0, maxQPowerOcl, [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
         const bitCapIntOcl otherRes = lcv & otherMask;
         bitCapIntOcl partToAdd = toAddOcl;
         bitCapIntOcl inOutInt = (lcv & inOutMask) >> inOutStart;
@@ -809,13 +791,7 @@ void QEngineCPU::INCBCD(const bitCapInt& toAdd, bitLenInt inOutStart, bitLenInt
         } else {
             nStateVec->write(lcv, stateVec->read(lcv));
         }
-    };
-
-    if (stateVec->is_sparse()) {
-        par_for_set(CastStateVecSparse()->iterable(), fn);
-    } else {
-        par_for(0, maxQPowerOcl, fn);
-    }
+    });
 
     ResetStateVec(nStateVec);
 }
@@ -975,11 +951,7 @@ bitCapInt QEngineCPU::IndexedLDA(bitLenInt indexStart, bitLenInt indexLength, bi
         };
     }
 
-    if (stateVec->is_sparse()) {
-        par_for_set(CastStateVecSparse()->iterable(0, skipPower, 0), fn);
-    } else {
-        par_for_skip(0, maxQPowerOcl, skipPower, valueLength, fn);
-    }
+    par_for_skip(0, maxQPowerOcl, skipPower, valueLength, fn);
 
     ResetStateVec(nStateVec);
 
@@ -1046,7 +1018,7 @@ bitCapInt QEngineCPU::IndexedADC(bitLenInt indexStart, bitLenInt indexLength, bi
     const bitCapIntOcl otherMask = (maxQPowerOcl - 1U) & (~(inputMask | outputMask | carryMask));
     const bitCapIntOcl skipPower = pow2Ocl(carryIndex);
 
-    ParallelFunc fn = [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
+    par_for_skip(0, maxQPowerOcl, skipPower, 1, [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
         // These are qubits that are not directly involved in the
         // operation. We iterate over all of their possibilities, but their
         // input value matches their output value:
@@ -1093,13 +1065,7 @@ bitCapInt QEngineCPU::IndexedADC(bitLenInt indexStart, bitLenInt indexLength, bi
         outputRes = outputInt << valueStart;
 
         nStateVec->write(outputRes | inputRes | otherRes | carryRes, stateVec->read(lcv));
-    };
-
-    if (stateVec->is_sparse()) {
-        par_for_set(CastStateVecSparse()->iterable(0, skipPower, 0), fn);
-    } else {
-        par_for_skip(0, maxQPowerOcl, skipPower, 1, fn);
-    }
+    });
 
     // We dealloc the old state vector and replace it with the one we
     // just calculated.
@@ -1168,7 +1134,7 @@ bitCapInt QEngineCPU::IndexedSBC(bitLenInt indexStart, bitLenInt indexLength, bi
     const bitCapIntOcl otherMask = (maxQPowerOcl - 1U) & (~(inputMask | outputMask | carryMask));
     const bitCapIntOcl skipPower = pow2Ocl(carryIndex);
 
-    ParallelFunc fn = [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
+    par_for_skip(0, maxQPowerOcl, skipPower, valueLength, [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
         // These are qubits that are not directly involved in the
         // operation. We iterate over all of their possibilities, but their
         // input value matches their output value:
@@ -1219,13 +1185,7 @@ bitCapInt QEngineCPU::IndexedSBC(bitLenInt indexStart, bitLenInt indexLength, bi
         outputRes = outputInt << valueStart;
 
         nStateVec->write(outputRes | inputRes | otherRes | carryRes, stateVec->read(lcv));
-    };
-
-    if (stateVec->is_sparse()) {
-        par_for_set(CastStateVecSparse()->iterable(0, skipPower, 0), fn);
-    } else {
-        par_for_skip(0, maxQPowerOcl, skipPower, valueLength, fn);
-    }
+    });
 
     // We dealloc the old state vector and replace it with the one we
     // just calculated.
@@ -1256,7 +1216,7 @@ void QEngineCPU::Hash(bitLenInt start, bitLenInt length, const unsigned char* va
     nStateVec->clear();
     stateVec->isReadLocked = false;
 
-    ParallelFunc fn = [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
+    par_for(0, maxQPowerOcl, [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
         const bitCapIntOcl inputRes = lcv & inputMask;
         const bitCapIntOcl inputInt = inputRes >> start;
         bitCapIntOcl outputInt = 0;
@@ -1273,13 +1233,7 @@ void QEngineCPU::Hash(bitLenInt start, bitLenInt length, const unsigned char* va
         }
         bitCapIntOcl outputRes = outputInt << start;
         nStateVec->write(outputRes | (lcv & ~inputRes), stateVec->read(lcv));
-    };
-
-    if (stateVec->is_sparse()) {
-        par_for_set(CastStateVecSparse()->iterable(), fn);
-    } else {
-        par_for(0, maxQPowerOcl, fn);
-    }
+    });
 
     ResetStateVec(nStateVec);
 }
diff --git a/src/qengine/state.cpp b/src/qengine/state.cpp
index 696f5c0ac..8c32ec14a 100644
--- a/src/qengine/state.cpp
+++ b/src/qengine/state.cpp
@@ -452,18 +452,7 @@ void QEngineCPU::Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const comp
                 }
             }
 
-            if (stateVec->is_sparse()) {
-                const bitCapIntOcl setMask = offset1 ^ offset2;
-                bitCapIntOcl filterMask = 0U;
-                for (bitLenInt i = 0U; i < bitCount; ++i) {
-                    filterMask |= qPowersSorted[i];
-                }
-                filterMask &= ~setMask;
-                const bitCapIntOcl filterValues = filterMask & offset1 & offset2;
-                par_for_set(CastStateVecSparse()->iterable(setMask, filterMask, filterValues), fn);
-            } else {
-                par_for_mask(0U, maxQPowerOcl, qPowersSorted, fn);
-            }
+            par_for_mask(0U, maxQPowerOcl, qPowersSorted, fn);
 
             if (doApplyNorm) {
                 runningNorm = ONE_R1;
@@ -635,18 +624,7 @@ void QEngineCPU::Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const comp
                 }
             }
 
-            if (stateVec->is_sparse()) {
-                const bitCapIntOcl setMask = offset1 ^ offset2;
-                bitCapIntOcl filterMask = 0U;
-                for (bitLenInt i = 0U; i < bitCount; ++i) {
-                    filterMask |= qPowersSorted[i];
-                }
-                filterMask &= ~setMask;
-                const bitCapIntOcl filterValues = filterMask & offset1 & offset2;
-                par_for_set(CastStateVecSparse()->iterable(setMask, filterMask, filterValues), fn);
-            } else {
-                par_for_mask(0U, maxQPowerOcl, qPowersSorted, fn);
-            }
+            par_for_mask(0U, maxQPowerOcl, qPowersSorted, fn);
 
             if (doApplyNorm) {
                 runningNorm = ONE_R1;
@@ -687,11 +665,6 @@ void QEngineCPU::XMask(const bitCapInt& mask)
         return;
     }
 
-    if (stateVec->is_sparse()) {
-        QInterface::XMask(mask);
-        return;
-    }
-
     Dispatch(maxQPowerOcl, [this, mask] {
         const bitCapIntOcl maskOcl = (bitCapIntOcl)mask;
         const bitCapIntOcl otherMask = (maxQPowerOcl - 1U) ^ maskOcl;
@@ -734,11 +707,6 @@ void QEngineCPU::PhaseParity(real1_f radians, const bitCapInt& mask)
         return;
     }
 
-    if (stateVec->is_sparse()) {
-        QInterface::PhaseParity(radians, mask);
-        return;
-    }
-
     Dispatch(maxQPowerOcl, [this, mask, radians] {
         const bitCapIntOcl parityStartSize = 4U * sizeof(bitCapIntOcl);
         const complex phaseFac = std::polar(ONE_R1, (real1)(radians / 2));
@@ -787,11 +755,6 @@ void QEngineCPU::PhaseRootNMask(bitLenInt n, const bitCapInt& mask)
         return;
     }
 
-    if (stateVec->is_sparse()) {
-        QInterface::PhaseRootNMask(n, mask);
-        return;
-    }
-
     Dispatch(maxQPowerOcl, [this, n, mask, radians] {
         const bitCapIntOcl maskOcl = (bitCapIntOcl)mask;
         const bitCapIntOcl nPhases = pow2Ocl(n);
@@ -935,7 +898,8 @@ void QEngineCPU::UniformParityRZ(const bitCapInt& mask, real1_f angle)
         const real1 sine = (real1)sin(angle);
         const complex phaseFac(cosine, sine);
         const complex phaseFacAdj(cosine, -sine);
-        ParallelFunc fn = [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
+
+        par_for(0U, maxQPowerOcl, [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
             bitCapIntOcl perm = lcv & (bitCapIntOcl)mask;
             // From https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetNaive
             // c accumulates the total bits set in v
@@ -945,13 +909,7 @@ void QEngineCPU::UniformParityRZ(const bitCapInt& mask, real1_f angle)
                 perm &= perm - 1U;
             }
             stateVec->write(lcv, stateVec->read(lcv) * ((c & 1U) ? phaseFac : phaseFacAdj));
-        };
-
-        if (stateVec->is_sparse()) {
-            par_for_set(CastStateVecSparse()->iterable(), fn);
-        } else {
-            par_for(0U, maxQPowerOcl, fn);
-        }
+        });
     });
 }
 
@@ -1057,21 +1015,14 @@ bitLenInt QEngineCPU::Compose(QEngineCPUPtr toCopy)
     StateVectorPtr nStateVec = AllocStateVec(nMaxQPower);
     stateVec->isReadLocked = false;
 
-    ParallelFunc fn = [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
-        nStateVec->write(lcv, stateVec->read(lcv & startMask) * toCopy->stateVec->read((lcv & endMask) >> qubitCount));
-    };
-
     if ((toCopy->doNormalize) && (toCopy->runningNorm != ONE_R1)) {
         toCopy->NormalizeState();
     }
     toCopy->Finish();
 
-    if (stateVec->is_sparse() || toCopy->stateVec->is_sparse()) {
-        par_for_sparse_compose(
-            CastStateVecSparse()->iterable(), toCopy->CastStateVecSparse()->iterable(), qubitCount, fn);
-    } else {
-        par_for(0U, nMaxQPower, fn);
-    }
+    par_for(0U, nMaxQPower, [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
+        nStateVec->write(lcv, stateVec->read(lcv & startMask) * toCopy->stateVec->read((lcv & endMask) >> qubitCount));
+    });
 
     SetQubitCount(nQubitCount);
 
@@ -1457,19 +1408,15 @@ real1_f QEngineCPU::Prob(bitLenInt qubit)
     }
 
     stateVec->isReadLocked = false;
-    if (stateVec->is_sparse()) {
-        par_for_set(CastStateVecSparse()->iterable(qPower, qPower, qPower), fn);
-    } else {
 #if ENABLE_COMPLEX_X2
-        if (qPower == 1U) {
-            par_for(0U, maxQPowerOcl >> 2U, fn);
-        } else {
-            par_for_skip(0U, maxQPowerOcl >> 1U, qPower >> 1U, 1U, fn);
-        }
+    if (qPower == 1U) {
+        par_for(0U, maxQPowerOcl >> 2U, fn);
+    } else {
+        par_for_skip(0U, maxQPowerOcl >> 1U, qPower >> 1U, 1U, fn);
+    }
 #else
-        par_for_skip(0U, maxQPowerOcl, qPower, 1U, fn);
+    par_for_skip(0U, maxQPowerOcl, qPower, 1U, fn);
 #endif
-    }
     stateVec->isReadLocked = true;
 
     real1 oneChance = ZERO_R1;
@@ -1517,11 +1464,7 @@ real1_f QEngineCPU::CtrlOrAntiProb(bool controlState, bitLenInt control, bitLenI
     });
 
     stateVec->isReadLocked = false;
-    if (stateVec->is_sparse()) {
-        par_for_set(CastStateVecSparse()->iterable(qPower, qPower, qPower), fn);
-    } else {
-        par_for_skip(0U, maxQPowerOcl, qPower, 1U, fn);
-    }
+    par_for_skip(0U, maxQPowerOcl, qPower, 1U, fn);
     stateVec->isReadLocked = true;
 
     real1 oneChance = ZERO_R1;
@@ -1554,11 +1497,7 @@ real1_f QEngineCPU::ProbReg(bitLenInt start, bitLenInt length, const bitCapInt&
     };
 
     stateVec->isReadLocked = false;
-    if (stateVec->is_sparse()) {
-        par_for_set(CastStateVecSparse()->iterable(0, bitRegMaskOcl(start, length), perm), fn);
-    } else {
-        par_for_skip(0U, maxQPowerOcl, pow2Ocl(start), length, fn);
-    }
+    par_for_skip(0U, maxQPowerOcl, pow2Ocl(start), length, fn);
     stateVec->isReadLocked = true;
 
     real1 prob = ZERO_R1;
@@ -1632,7 +1571,9 @@ real1_f QEngineCPU::ProbParity(const bitCapInt& mask)
     std::unique_ptr<real1[]> oddChanceBuff(new real1[numCores]());
 
     const bitCapIntOcl maskOcl = (bitCapIntOcl)mask;
-    ParallelFunc fn = [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
+
+    stateVec->isReadLocked = false;
+    par_for(0U, maxQPowerOcl, [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
         bool parity = false;
         bitCapIntOcl v = lcv & maskOcl;
         while (v) {
@@ -1643,14 +1584,7 @@ real1_f QEngineCPU::ProbParity(const bitCapInt& mask)
         if (parity) {
             oddChanceBuff[cpu] += norm(stateVec->read(lcv));
         }
-    };
-
-    stateVec->isReadLocked = false;
-    if (stateVec->is_sparse()) {
-        par_for_set(CastStateVecSparse()->iterable(), fn);
-    } else {
-        par_for(0U, maxQPowerOcl, fn);
-    }
+    });
     stateVec->isReadLocked = true;
 
     for (unsigned i = 0U; i < numCores; ++i) {
@@ -1704,7 +1638,9 @@ bool QEngineCPU::ForceMParity(const bitCapInt& mask, bool result, bool doForce)
     std::unique_ptr<real1[]> oddChanceBuff(new real1[numCores]());
 
     const bitCapIntOcl maskOcl = (bitCapIntOcl)mask;
-    ParallelFunc fn = [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
+
+    stateVec->isReadLocked = false;
+    par_for(0U, maxQPowerOcl, [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
         bool parity = false;
         bitCapIntOcl v = lcv & maskOcl;
         while (v) {
@@ -1717,14 +1653,7 @@ bool QEngineCPU::ForceMParity(const bitCapInt& mask, bool result, bool doForce)
         } else {
             stateVec->write(lcv, ZERO_CMPLX);
         }
-    };
-
-    stateVec->isReadLocked = false;
-    if (stateVec->is_sparse()) {
-        par_for_set(CastStateVecSparse()->iterable(), fn);
-    } else {
-        par_for(0U, maxQPowerOcl, fn);
-    }
+    });
     stateVec->isReadLocked = true;
 
     for (unsigned i = 0U; i < numCores; ++i) {
@@ -1811,19 +1740,14 @@ void QEngineCPU::ApplyM(const bitCapInt& regMask, const bitCapInt& result, const
     Dispatch(maxQPowerOcl, [this, regMask, result, nrm] {
         const bitCapIntOcl regMaskOcl = (bitCapIntOcl)regMask;
         const bitCapIntOcl resultOcl = (bitCapIntOcl)result;
-        ParallelFunc fn = [&](const bitCapIntOcl& i, const unsigned& cpu) {
+
+        par_for(0U, maxQPowerOcl, [&](const bitCapIntOcl& i, const unsigned& cpu) {
             if ((i & regMaskOcl) == resultOcl) {
                 stateVec->write(i, nrm * stateVec->read(i));
             } else {
                 stateVec->write(i, ZERO_CMPLX);
             }
-        };
-
-        if (stateVec->is_sparse()) {
-            par_for_set(CastStateVecSparse()->iterable(), fn);
-        } else {
-            par_for(0U, maxQPowerOcl, fn);
-        }
+        });
 
         runningNorm = ONE_R1;
     });
@@ -1899,10 +1823,6 @@ void QEngineCPU::UpdateRunningNorm(real1_f norm_thresh)
 
 StateVectorPtr QEngineCPU::AllocStateVec(bitCapIntOcl elemCount)
 {
-    if (isSparse) {
-        return std::make_shared<StateVectorSparse>(elemCount);
-    } else {
-        return std::make_shared<StateVectorArray>(elemCount);
-    }
+    return std::make_shared<StateVectorArray>(elemCount);
 }
 } // namespace Qrack
diff --git a/src/qunit.cpp b/src/qunit.cpp
index 8c43140aa..f5bb898fb 100644
--- a/src/qunit.cpp
+++ b/src/qunit.cpp
@@ -366,7 +366,7 @@ void QUnit::Detach(bitLenInt start, bitLenInt length, QUnitPtr dest)
             dest->shards[i] = shard;
         }
     }
-    
+
     // Find the rest of the qubits.
     for (auto&& shard : shards) {
         const auto subunit = subunits.find(shard.unit);
diff --git a/test/benchmarks.cpp b/test/benchmarks.cpp
index 5cda08596..8dafa373f 100644
--- a/test/benchmarks.cpp
+++ b/test/benchmarks.cpp
@@ -121,7 +121,7 @@ void benchmarkLoopVariable(std::function<void(QInterfacePtr, bitLenInt)> fn, bit
 
     for (bitLenInt numBits = mnQbts; numBits <= mxQbts; numBits++) {
         QInterfacePtr qftReg = CreateQuantumInterface(engineStack, numBits, ZERO_BCI, rng, CMPLX_DEFAULT_ARG,
-            enable_normalization, true, use_host_dma, device_id, !disable_hardware_rng, sparse, REAL1_EPSILON, devList);
+            enable_normalization, true, use_host_dma, device_id, !disable_hardware_rng, false, REAL1_EPSILON, devList);
         if (disable_t_injection) {
             qftReg->SetTInjection(false);
         }
@@ -182,7 +182,7 @@ void benchmarkLoopVariable(std::function<void(QInterfacePtr, bitLenInt)> fn, bit
                 // Re-alloc:
                 qftReg =
                     CreateQuantumInterface(engineStack, numBits, ZERO_BCI, rng, CMPLX_DEFAULT_ARG, enable_normalization,
-                        true, use_host_dma, device_id, !disable_hardware_rng, sparse, REAL1_EPSILON, devList);
+                        true, use_host_dma, device_id, !disable_hardware_rng, false, REAL1_EPSILON, devList);
                 if (disable_t_injection) {
                     qftReg->SetTInjection(false);
                 }
@@ -207,7 +207,7 @@ void benchmarkLoopVariable(std::function<void(QInterfacePtr, bitLenInt)> fn, bit
 
                             // Re-alloc:
                             qftReg = CreateQuantumInterface(engineStack, numBits, ZERO_BCI, rng, CMPLX_DEFAULT_ARG,
-                                enable_normalization, true, use_host_dma, device_id, !disable_hardware_rng, sparse,
+                                enable_normalization, true, use_host_dma, device_id, !disable_hardware_rng, false,
                                 REAL1_EPSILON, devList);
                             if (disable_t_injection) {
                                 qftReg->SetTInjection(false);
@@ -261,7 +261,7 @@ void benchmarkLoopVariable(std::function<void(QInterfacePtr, bitLenInt)> fn, bit
                 // Re-alloc:
                 qftReg =
                     CreateQuantumInterface(engineStack, numBits, ZERO_BCI, rng, CMPLX_DEFAULT_ARG, enable_normalization,
-                        true, use_host_dma, device_id, !disable_hardware_rng, sparse, REAL1_EPSILON, devList);
+                        true, use_host_dma, device_id, !disable_hardware_rng, false, REAL1_EPSILON, devList);
                 if (disable_t_injection) {
                     qftReg->SetTInjection(false);
                 }
@@ -4749,7 +4749,7 @@ TEST_CASE("test_universal_circuit_digital_cross_entropy", "[supreme]")
     std::cout << "Gold standard vs. gold standard cross entropy (out of 1.0): " << crossEntropy << std::endl;
 
     QInterfacePtr testCase = CreateQuantumInterface({ testEngineType, testSubEngineType }, n, ZERO_BCI, rng, ONE_CMPLX,
-        enable_normalization, true, use_host_dma, device_id, !disable_hardware_rng, sparse);
+        enable_normalization, true, use_host_dma, device_id, !disable_hardware_rng, false);
     if (disable_t_injection) {
         testCase->SetTInjection(false);
     }
diff --git a/test/benchmarks_main.cpp b/test/benchmarks_main.cpp
index c7aa3c98c..622aec831 100644
--- a/test/benchmarks_main.cpp
+++ b/test/benchmarks_main.cpp
@@ -36,7 +36,6 @@ bool disable_terminal_measurement = false;
 bool use_host_dma = false;
 bool disable_hardware_rng = false;
 bool async_time = false;
-bool sparse = false;
 int device_id = -1;
 bitLenInt max_qubits = 24;
 bitLenInt min_qubits = 4;
@@ -160,8 +159,6 @@ int main(int argc, char* argv[])
                                                "type should be binary. (By default, it is "
                                                "human-readable.)") |
         Opt(single_qubit_run)["--single"]("Only run single (maximum) qubit count for tests") |
-        Opt(sparse)["--sparse"](
-            "(For QEngineCPU, under QUnit:) Use a state vector optimized for sparse representation and iteration.") |
         Opt(benchmarkSamples, "samples")["--samples"]("number of samples to collect (default: 100)") |
         Opt(benchmarkDepth, "depth")["--benchmark-depth"](
             "depth of randomly constructed circuits, when applicable, with 1 round of single qubit and 1 round of "
@@ -423,11 +420,7 @@ int main(int argc, char* argv[])
     if (num_failed == 0 && qunit) {
         testEngineType = QINTERFACE_QUNIT;
         if (num_failed == 0 && cpu) {
-            if (sparse) {
-                session.config().stream() << "############ QUnit -> QEngine -> CPU (Sparse) ############" << std::endl;
-            } else {
-                session.config().stream() << "############ QUnit -> QEngine -> CPU ############" << std::endl;
-            }
+            session.config().stream() << "############ QUnit -> QEngine -> CPU ############" << std::endl;
             testSubEngineType = QINTERFACE_CPU;
             num_failed = session.run();
         }
diff --git a/test/test_main.cpp b/test/test_main.cpp
index 517ecd6ef..5184dfa03 100644
--- a/test/test_main.cpp
+++ b/test/test_main.cpp
@@ -32,7 +32,6 @@ bool disable_reactive_separation = false;
 bool use_host_dma = false;
 bool disable_hardware_rng = false;
 bool async_time = false;
-bool sparse = false;
 int device_id = -1;
 bitLenInt max_qubits = 24;
 std::string mOutputFileName;
@@ -126,8 +125,6 @@ int main(int argc, char* argv[])
         Opt(isBinaryOutput)["--binary-output"]("If included, specifies that the --measure-output file "
                                                "type should be binary. (By default, it is "
                                                "human-readable.)") |
-        Opt(sparse)["--sparse"](
-            "(For QEngineCPU, under QUnit:) Use a state vector optimized for sparse representation and iteration.") |
         Opt(benchmarkSamples, "samples")["--samples"]("number of samples to collect (default: 100)") |
         Opt(benchmarkDepth, "depth")["--benchmark-depth"](
             "depth of randomly constructed circuits, when applicable, with 1 round of single qubit and 1 round of "
@@ -301,11 +298,7 @@ int main(int argc, char* argv[])
     if (num_failed == 0 && qunit) {
         testEngineType = QINTERFACE_QUNIT;
         if (num_failed == 0 && cpu) {
-            if (sparse) {
-                session.config().stream() << "############ QUnit -> QEngine -> CPU (Sparse) ############" << std::endl;
-            } else {
-                session.config().stream() << "############ QUnit -> QEngine -> CPU ############" << std::endl;
-            }
+            session.config().stream() << "############ QUnit -> QEngine -> CPU ############" << std::endl;
             testSubEngineType = QINTERFACE_CPU;
             num_failed = session.run();
         }
@@ -539,7 +532,7 @@ QInterfaceTestFixture::QInterfaceTestFixture()
 
     qftReg = CreateQuantumInterface(
         { testEngineType, testSubEngineType, testSubSubEngineType, testSubSubSubEngineType }, 20, ZERO_BCI, rng,
-        ONE_CMPLX, enable_normalization, true, false, device_id, !disable_hardware_rng, sparse, REAL1_EPSILON, devList);
+        ONE_CMPLX, enable_normalization, true, false, device_id, !disable_hardware_rng, false, REAL1_EPSILON, devList);
 
     if (disable_t_injection) {
         qftReg->SetTInjection(false);
diff --git a/test/tests.cpp b/test/tests.cpp
index 2938ecb7c..8064c1b37 100644
--- a/test/tests.cpp
+++ b/test/tests.cpp
@@ -65,7 +65,7 @@ QInterfacePtr MakeEngine(bitLenInt qubitCount)
 {
     QInterfacePtr toRet = CreateQuantumInterface(
         { testEngineType, testSubEngineType, testSubSubEngineType, testSubSubSubEngineType }, qubitCount, ZERO_BCI, rng,
-        ONE_CMPLX, enable_normalization, true, false, device_id, !disable_hardware_rng, sparse, REAL1_EPSILON, devList);
+        ONE_CMPLX, enable_normalization, true, false, device_id, !disable_hardware_rng, false, REAL1_EPSILON, devList);
 
     if (disable_t_injection) {
         toRet->SetTInjection(false);
@@ -659,7 +659,7 @@ TEST_CASE_METHOD(QInterfaceTestFixture, "test_cswap")
 
     QInterfacePtr qftReg2 =
         CreateQuantumInterface({ testEngineType, testSubEngineType, testSubSubEngineType, testSubSubSubEngineType },
-            20U, ZERO_BCI, rng, ONE_CMPLX, enable_normalization, true, false, device_id, !disable_hardware_rng, sparse,
+            20U, ZERO_BCI, rng, ONE_CMPLX, enable_normalization, true, false, device_id, !disable_hardware_rng, false,
             REAL1_DEFAULT_ARG, devList, 10);
 
     control[0] = 9;
@@ -1257,11 +1257,11 @@ TEST_CASE_METHOD(QInterfaceTestFixture, "test_approxcompare")
 {
     qftReg =
         CreateQuantumInterface({ testEngineType, testSubEngineType, testSubSubEngineType, testSubSubSubEngineType }, 3U,
-            ZERO_BCI, rng, ONE_CMPLX, enable_normalization, true, false, device_id, !disable_hardware_rng, sparse,
+            ZERO_BCI, rng, ONE_CMPLX, enable_normalization, true, false, device_id, !disable_hardware_rng, false,
             REAL1_DEFAULT_ARG, devList, 10);
     QInterfacePtr qftReg2 =
         CreateQuantumInterface({ testEngineType, testSubEngineType, testSubSubEngineType, testSubSubSubEngineType }, 3U,
-            ZERO_BCI, rng, ONE_CMPLX, enable_normalization, true, false, device_id, !disable_hardware_rng, sparse,
+            ZERO_BCI, rng, ONE_CMPLX, enable_normalization, true, false, device_id, !disable_hardware_rng, false,
             REAL1_DEFAULT_ARG, devList, 10);
 
     qftReg->X(0);
@@ -2881,7 +2881,7 @@ TEST_CASE_METHOD(QInterfaceTestFixture, "test_zero_phase_flip")
     REQUIRE_THAT(qftReg, HasProbability(0, 8, 0x03));
 
     QInterfacePtr qftReg2 = CreateQuantumInterface({ testEngineType, testSubEngineType, testSubSubEngineType, testSubSubSubEngineType }, 20U,
-        ZERO_BCI, rng, ONE_CMPLX, enable_normalization, true, false, device_id, !disable_hardware_rng, sparse,
+        ZERO_BCI, rng, ONE_CMPLX, enable_normalization, true, false, device_id, !disable_hardware_rng, false,
         REAL1_DEFAULT_ARG, devList, 10);
 
     qftReg2->SetPermutation(3U << 9U);
@@ -2958,7 +2958,7 @@ TEST_CASE_METHOD(QInterfaceTestFixture, "test_decompose", "[sd_xfail]")
     // Try across device/heap allocation case:
     qftReg2 = CreateQuantumInterface(
         { testEngineType, testSubEngineType, testSubSubEngineType, testSubSubSubEngineType }, 4, ZERO_BCI, rng,
-        ONE_CMPLX, enable_normalization, true, true, device_id, !disable_hardware_rng, sparse, REAL1_EPSILON, devList);
+        ONE_CMPLX, enable_normalization, true, true, device_id, !disable_hardware_rng, false, REAL1_EPSILON, devList);
 
     qftReg->SetPermutation(0x2b);
     qftReg->Decompose(0, qftReg2);
diff --git a/test/tests.hpp b/test/tests.hpp
index a09cf4d07..7511db2fc 100644
--- a/test/tests.hpp
+++ b/test/tests.hpp
@@ -38,7 +38,6 @@ extern bool disable_terminal_measurement;
 extern bool use_host_dma;
 extern bool disable_hardware_rng;
 extern bool async_time;
-extern bool sparse;
 extern int device_id;
 extern bitLenInt max_qubits;
 extern bitLenInt min_qubits;