diff --git a/CMakeLists.txt b/CMakeLists.txt index 590f34609..71f1fc1b0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required (VERSION 3.9) -project (Qrack VERSION 9.11.11 DESCRIPTION "High Performance Quantum Bit Simulation" LANGUAGES CXX) +project (Qrack VERSION 9.11.12 DESCRIPTION "High Performance Quantum Bit Simulation" LANGUAGES CXX) # Installation commands include (GNUInstallDirs) diff --git a/include/qengine_cpu.hpp b/include/qengine_cpu.hpp index 1ffd34d83..16b23aa29 100644 --- a/include/qengine_cpu.hpp +++ b/include/qengine_cpu.hpp @@ -41,14 +41,12 @@ class QEngineCPU : public QEngine { DispatchQueue dispatchQueue; #endif - StateVectorSparsePtr CastStateVecSparse() { return std::dynamic_pointer_cast(stateVec); } - public: QEngineCPU(bitLenInt qBitCount, const bitCapInt& initState, qrack_rand_gen_ptr rgp = nullptr, const complex& phaseFac = CMPLX_DEFAULT_ARG, bool doNorm = false, bool randomGlobalPhase = true, - bool ignored = false, int64_t ignored2 = -1, bool useHardwareRNG = true, bool useSparseStateVec = false, - real1_f norm_thresh = REAL1_EPSILON, std::vector ignored3 = {}, bitLenInt ignored4 = 0U, - real1_f ignored5 = _qrack_qunit_sep_thresh); + bool ignored = false, int64_t ignored2 = -1, bool useHardwareRNG = true, bool ignored3 = false, + real1_f norm_thresh = REAL1_EPSILON, std::vector ignored4 = {}, bitLenInt ignored5 = 0U, + real1_f ignored6 = _qrack_qunit_sep_thresh); ~QEngineCPU() { Dump(); } diff --git a/include/statevector.hpp b/include/statevector.hpp index 54fc631ea..4961ce017 100644 --- a/include/statevector.hpp +++ b/include/statevector.hpp @@ -22,9 +22,6 @@ #include #endif -#include -#define SparseStateVecMap std::unordered_map - #if ENABLE_COMPLEX_X2 #if FPPOW == 5 #include "common/complex8x2simd.hpp" @@ -36,7 +33,6 @@ namespace Qrack { class StateVectorArray; -class StateVectorSparse; // This is a buffer struct that's capable of representing controlled single bit gates and arithmetic, when subclassed. class StateVector : public ParallelFor { @@ -74,7 +70,6 @@ class StateVector : public ParallelFor { virtual void copy(StateVectorPtr toCopy) = 0; virtual void shuffle(StateVectorPtr svp) = 0; virtual void get_probs(real1* outArray) = 0; - virtual bool is_sparse() = 0; }; class StateVectorArray : public StateVector { @@ -217,332 +212,5 @@ class StateVectorArray : public StateVector { par_for( 0, capacity, [&](const bitCapIntOcl& lcv, const unsigned& cpu) { outArray[lcv] = norm(amplitudes[lcv]); }); } - - bool is_sparse() { return false; } -}; - -class StateVectorSparse : public StateVector { -protected: - SparseStateVecMap amplitudes; - std::mutex mtx; - - complex readUnlocked(const bitCapIntOcl& i) - { - auto it = amplitudes.find(i); - return (it == amplitudes.end()) ? ZERO_CMPLX : it->second; - } - - complex readLocked(const bitCapIntOcl& i) - { - std::lock_guard lock(mtx); - return readUnlocked(i); - } - -public: - StateVectorSparse(bitCapIntOcl cap) - : StateVector(cap) - , amplitudes() - { - } - - complex read(const bitCapIntOcl& i) { return isReadLocked ? readLocked(i) : readUnlocked(i); } - -#if ENABLE_COMPLEX_X2 - complex2 read2(const bitCapIntOcl& i1, const bitCapIntOcl& i2) - { - if (isReadLocked) { - return complex2(readLocked(i1), readLocked(i2)); - } - return complex2(readUnlocked(i1), readUnlocked(i2)); - } -#endif - - void write(const bitCapIntOcl& i, const complex& c) - { - const bool isCSet = abs(c) > REAL1_EPSILON; - if (isCSet) { - std::lock_guard lock(mtx); - amplitudes[i] = c; - } else { - std::lock_guard lock(mtx); - amplitudes.erase(i); - } - } - - void write2(const bitCapIntOcl& i1, const complex& c1, const bitCapIntOcl& i2, const complex& c2) - { - const bool isC1Set = abs(c1) > REAL1_EPSILON; - const bool isC2Set = abs(c2) > REAL1_EPSILON; - if (!isC1Set && !isC2Set) { - std::lock_guard lock(mtx); - amplitudes.erase(i1); - amplitudes.erase(i2); - } else if (isC1Set && isC2Set) { - std::lock_guard lock(mtx); - amplitudes[i1] = c1; - amplitudes[i2] = c2; - } else if (isC1Set) { - std::lock_guard lock(mtx); - amplitudes.erase(i2); - amplitudes[i1] = c1; - } else { - std::lock_guard lock(mtx); - amplitudes.erase(i1); - amplitudes[i2] = c2; - } - } - - void clear() - { - std::lock_guard lock(mtx); - amplitudes.clear(); - } - - void copy_in(const complex* copyIn) - { - if (!copyIn) { - clear(); - return; - } - - std::lock_guard lock(mtx); - for (bitCapIntOcl i = 0U; i < capacity; ++i) { - if (abs(copyIn[i]) <= REAL1_EPSILON) { - amplitudes.erase(i); - } else { - amplitudes[i] = copyIn[i]; - } - } - } - - void copy_in(const complex* copyIn, const bitCapIntOcl offset, const bitCapIntOcl length) - { - if (!copyIn) { - std::lock_guard lock(mtx); - for (bitCapIntOcl i = 0U; i < length; ++i) { - amplitudes.erase(i); - } - - return; - } - - std::lock_guard lock(mtx); - for (bitCapIntOcl i = 0U; i < length; ++i) { - if (abs(copyIn[i]) <= REAL1_EPSILON) { - amplitudes.erase(i); - } else { - amplitudes[i + offset] = copyIn[i]; - } - } - } - - void copy_in( - StateVectorPtr copyInSv, const bitCapIntOcl srcOffset, const bitCapIntOcl dstOffset, const bitCapIntOcl length) - { - StateVectorSparsePtr copyIn = std::dynamic_pointer_cast(copyInSv); - - if (!copyIn) { - std::lock_guard lock(mtx); - for (bitCapIntOcl i = 0U; i < length; ++i) { - amplitudes.erase(i + srcOffset); - } - - return; - } - - std::lock_guard lock(mtx); - for (bitCapIntOcl i = 0U; i < length; ++i) { - complex amp = copyIn->read(i + srcOffset); - if (abs(amp) <= REAL1_EPSILON) { - amplitudes.erase(i + srcOffset); - } else { - amplitudes[i + dstOffset] = amp; - } - } - } - - void copy_out(complex* copyOut) - { - for (bitCapIntOcl i = 0U; i < capacity; ++i) { - copyOut[i] = read(i); - } - } - - void copy_out(complex* copyOut, const bitCapIntOcl offset, const bitCapIntOcl length) - { - for (bitCapIntOcl i = 0U; i < length; ++i) { - copyOut[i] = read(i + offset); - } - } - - void copy(const StateVectorPtr toCopy) { copy(std::dynamic_pointer_cast(toCopy)); } - - void copy(StateVectorSparsePtr toCopy) - { - std::lock_guard lock(mtx); - amplitudes = toCopy->amplitudes; - } - - void shuffle(StateVectorPtr svp) { shuffle(std::dynamic_pointer_cast(svp)); } - - void shuffle(StateVectorSparsePtr svp) - { - const size_t halfCap = (size_t)(capacity >> 1U); - std::lock_guard lock(mtx); - for (bitCapIntOcl i = 0U; i < halfCap; ++i) { - complex amp = svp->read(i); - svp->write(i, read(i + halfCap)); - write(i + halfCap, amp); - } - } - - void get_probs(real1* outArray) - { - for (bitCapIntOcl i = 0U; i < capacity; ++i) { - outArray[i] = norm(read(i)); - } - } - - bool is_sparse() { return (amplitudes.size() < (size_t)(capacity >> 1U)); } - - std::vector iterable() - { - std::vector> toRet(GetConcurrencyLevel()); - std::vector>::iterator toRetIt; - - // For lock_guard scope - if (true) { - std::lock_guard lock(mtx); - - par_for(0U, amplitudes.size(), [&](const bitCapIntOcl& lcv, const unsigned& cpu) { - auto it = amplitudes.begin(); - std::advance(it, lcv); - toRet[cpu].push_back(it->first); - }); - } - - for (int64_t i = (int64_t)(toRet.size() - 1U); i >= 0; i--) { - if (toRet[i].empty()) { - toRetIt = toRet.begin(); - std::advance(toRetIt, i); - toRet.erase(toRetIt); - } - } - - if (toRet.empty()) { - return {}; - } - - while (toRet.size() > 1U) { - // Work odd unit into collapse sequence: - if (toRet.size() & 1U) { - toRet[toRet.size() - 2U].insert( - toRet[toRet.size() - 2U].end(), toRet[toRet.size() - 1U].begin(), toRet[toRet.size() - 1U].end()); - toRet.pop_back(); - } - - const int64_t combineCount = (int64_t)(toRet.size() >> 1U); -#if ENABLE_PTHREAD - std::vector> futures(combineCount); - for (int64_t i = (combineCount - 1); i >= 0; i--) { - futures[i] = std::async(std::launch::async, [i, combineCount, &toRet]() { - toRet[i].insert(toRet[i].end(), toRet[i + combineCount].begin(), toRet[i + combineCount].end()); - toRet[i + combineCount].clear(); - }); - } - for (int64_t i = (combineCount - 1); i >= 0; i--) { - futures[i].get(); - toRet.pop_back(); - } -#else - for (int64_t i = (combineCount - 1); i >= 0; i--) { - toRet[i].insert(toRet[i].end(), toRet[i + combineCount].begin(), toRet[i + combineCount].end()); - toRet.pop_back(); - } -#endif - } - - return toRet[0U]; - } - - /// Returns empty if iteration should be over full set, otherwise just the iterable elements: - std::set iterable( - const bitCapIntOcl& setMask, const bitCapIntOcl& filterMask = 0, const bitCapIntOcl& filterValues = 0) - { - if (!filterMask && filterValues) { - return {}; - } - - const bitCapIntOcl unsetMask = ~setMask; - - std::vector> toRet(GetConcurrencyLevel()); - std::vector>::iterator toRetIt; - - // For lock_guard scope - if (true) { - std::lock_guard lock(mtx); - - if (!filterMask && !filterValues) { - par_for(0U, amplitudes.size(), [&](const bitCapIntOcl& lcv, const unsigned& cpu) { - auto it = amplitudes.begin(); - std::advance(it, lcv); - toRet[cpu].insert(it->first & unsetMask); - }); - } else { - const bitCapIntOcl unfilterMask = ~filterMask; - par_for(0U, amplitudes.size(), [&](const bitCapIntOcl lcv, const unsigned& cpu) { - auto it = amplitudes.begin(); - std::advance(it, lcv); - if ((it->first & filterMask) == filterValues) { - toRet[cpu].insert(it->first & unsetMask & unfilterMask); - } - }); - } - } - - for (int64_t i = (int64_t)(toRet.size() - 1U); i >= 0; i--) { - if (toRet[i].empty()) { - toRetIt = toRet.begin(); - std::advance(toRetIt, i); - toRet.erase(toRetIt); - } - } - - if (toRet.empty()) { - return {}; - } - - while (toRet.size() > 1U) { - // Work odd unit into collapse sequence: - if (toRet.size() & 1U) { - toRet[toRet.size() - 2U].insert(toRet[toRet.size() - 1U].begin(), toRet[toRet.size() - 1U].end()); - toRet.pop_back(); - } - - const int64_t combineCount = (int64_t)(toRet.size() >> 1U); -#if ENABLE_PTHREAD - std::vector> futures(combineCount); - for (int64_t i = (combineCount - 1); i >= 0; i--) { - futures[i] = std::async(std::launch::async, [i, combineCount, &toRet]() { - toRet[i].insert(toRet[i + combineCount].begin(), toRet[i + combineCount].end()); - toRet[i + combineCount].clear(); - }); - } - - for (int64_t i = (combineCount - 1); i >= 0; i--) { - futures[i].get(); - toRet.pop_back(); - } -#else - for (int64_t i = (combineCount - 1); i >= 0; i--) { - toRet[i].insert(toRet[i + combineCount].begin(), toRet[i + combineCount].end()); - toRet.pop_back(); - } -#endif - } - - return toRet[0U]; - } }; - } // namespace Qrack diff --git a/src/qengine/arithmetic.cpp b/src/qengine/arithmetic.cpp index 3dd3b4625..3a05a7865 100644 --- a/src/qengine/arithmetic.cpp +++ b/src/qengine/arithmetic.cpp @@ -47,18 +47,12 @@ void QEngineCPU::ROL(bitLenInt shift, bitLenInt start, bitLenInt length) StateVectorPtr nStateVec = AllocStateVec(maxQPowerOcl); stateVec->isReadLocked = false; - ParallelFunc fn = [&](const bitCapIntOcl& lcv, const unsigned& cpu) { + par_for(0, maxQPowerOcl, [&](const bitCapIntOcl& lcv, const unsigned& cpu) { const bitCapIntOcl otherRes = lcv & otherMask; const bitCapIntOcl regInt = (lcv & regMask) >> start; const bitCapIntOcl outInt = (regInt >> (length - shift)) | ((regInt << shift) & lengthMask); nStateVec->write((outInt << start) | otherRes, stateVec->read(lcv)); - }; - - if (stateVec->is_sparse()) { - par_for_set(CastStateVecSparse()->iterable(), fn); - } else { - par_for(0, maxQPowerOcl, fn); - } + }); ResetStateVec(nStateVec); } @@ -91,18 +85,12 @@ void QEngineCPU::INC(const bitCapInt& toAdd, bitLenInt inOutStart, bitLenInt len StateVectorPtr nStateVec = AllocStateVec(maxQPowerOcl); stateVec->isReadLocked = false; - ParallelFunc fn = [&](const bitCapIntOcl& lcv, const unsigned& cpu) { + par_for(0, maxQPowerOcl, [&](const bitCapIntOcl& lcv, const unsigned& cpu) { const bitCapIntOcl otherRes = lcv & otherMask; const bitCapIntOcl inOutInt = (lcv & inOutMask) >> inOutStart; const bitCapIntOcl outInt = (inOutInt + toAddOcl) & lengthMask; nStateVec->write((outInt << inOutStart) | otherRes, stateVec->read(lcv)); - }; - - if (stateVec->is_sparse()) { - par_for_set(CastStateVecSparse()->iterable(), fn); - } else { - par_for(0, maxQPowerOcl, fn); - } + }); ResetStateVec(nStateVec); } @@ -249,7 +237,7 @@ void QEngineCPU::INCS(const bitCapInt& toAdd, bitLenInt inOutStart, bitLenInt le nStateVec->clear(); stateVec->isReadLocked = false; - ParallelFunc fn = [&](const bitCapIntOcl& lcv, const unsigned& cpu) { + par_for(0, maxQPowerOcl, [&](const bitCapIntOcl& lcv, const unsigned& cpu) { const bitCapIntOcl otherRes = lcv & otherMask; const bitCapIntOcl inOutInt = (lcv & inOutMask) >> inOutStart; const bitCapIntOcl outInt = inOutInt + toAddOcl; @@ -261,13 +249,7 @@ void QEngineCPU::INCS(const bitCapInt& toAdd, bitLenInt inOutStart, bitLenInt le } else { nStateVec->write(outRes, stateVec->read(lcv)); } - }; - - if (stateVec->is_sparse()) { - par_for_set(CastStateVecSparse()->iterable(), fn); - } else { - par_for(0, maxQPowerOcl, fn); - } + }); ResetStateVec(nStateVec); } @@ -778,7 +760,7 @@ void QEngineCPU::INCBCD(const bitCapInt& toAdd, bitLenInt inOutStart, bitLenInt nStateVec->clear(); stateVec->isReadLocked = false; - ParallelFunc fn = [&](const bitCapIntOcl& lcv, const unsigned& cpu) { + par_for(0, maxQPowerOcl, [&](const bitCapIntOcl& lcv, const unsigned& cpu) { const bitCapIntOcl otherRes = lcv & otherMask; bitCapIntOcl partToAdd = toAddOcl; bitCapIntOcl inOutInt = (lcv & inOutMask) >> inOutStart; @@ -809,13 +791,7 @@ void QEngineCPU::INCBCD(const bitCapInt& toAdd, bitLenInt inOutStart, bitLenInt } else { nStateVec->write(lcv, stateVec->read(lcv)); } - }; - - if (stateVec->is_sparse()) { - par_for_set(CastStateVecSparse()->iterable(), fn); - } else { - par_for(0, maxQPowerOcl, fn); - } + }); ResetStateVec(nStateVec); } @@ -975,11 +951,7 @@ bitCapInt QEngineCPU::IndexedLDA(bitLenInt indexStart, bitLenInt indexLength, bi }; } - if (stateVec->is_sparse()) { - par_for_set(CastStateVecSparse()->iterable(0, skipPower, 0), fn); - } else { - par_for_skip(0, maxQPowerOcl, skipPower, valueLength, fn); - } + par_for_skip(0, maxQPowerOcl, skipPower, valueLength, fn); ResetStateVec(nStateVec); @@ -1046,7 +1018,7 @@ bitCapInt QEngineCPU::IndexedADC(bitLenInt indexStart, bitLenInt indexLength, bi const bitCapIntOcl otherMask = (maxQPowerOcl - 1U) & (~(inputMask | outputMask | carryMask)); const bitCapIntOcl skipPower = pow2Ocl(carryIndex); - ParallelFunc fn = [&](const bitCapIntOcl& lcv, const unsigned& cpu) { + par_for_skip(0, maxQPowerOcl, skipPower, 1, [&](const bitCapIntOcl& lcv, const unsigned& cpu) { // These are qubits that are not directly involved in the // operation. We iterate over all of their possibilities, but their // input value matches their output value: @@ -1093,13 +1065,7 @@ bitCapInt QEngineCPU::IndexedADC(bitLenInt indexStart, bitLenInt indexLength, bi outputRes = outputInt << valueStart; nStateVec->write(outputRes | inputRes | otherRes | carryRes, stateVec->read(lcv)); - }; - - if (stateVec->is_sparse()) { - par_for_set(CastStateVecSparse()->iterable(0, skipPower, 0), fn); - } else { - par_for_skip(0, maxQPowerOcl, skipPower, 1, fn); - } + }); // We dealloc the old state vector and replace it with the one we // just calculated. @@ -1168,7 +1134,7 @@ bitCapInt QEngineCPU::IndexedSBC(bitLenInt indexStart, bitLenInt indexLength, bi const bitCapIntOcl otherMask = (maxQPowerOcl - 1U) & (~(inputMask | outputMask | carryMask)); const bitCapIntOcl skipPower = pow2Ocl(carryIndex); - ParallelFunc fn = [&](const bitCapIntOcl& lcv, const unsigned& cpu) { + par_for_skip(0, maxQPowerOcl, skipPower, valueLength, [&](const bitCapIntOcl& lcv, const unsigned& cpu) { // These are qubits that are not directly involved in the // operation. We iterate over all of their possibilities, but their // input value matches their output value: @@ -1219,13 +1185,7 @@ bitCapInt QEngineCPU::IndexedSBC(bitLenInt indexStart, bitLenInt indexLength, bi outputRes = outputInt << valueStart; nStateVec->write(outputRes | inputRes | otherRes | carryRes, stateVec->read(lcv)); - }; - - if (stateVec->is_sparse()) { - par_for_set(CastStateVecSparse()->iterable(0, skipPower, 0), fn); - } else { - par_for_skip(0, maxQPowerOcl, skipPower, valueLength, fn); - } + }); // We dealloc the old state vector and replace it with the one we // just calculated. @@ -1256,7 +1216,7 @@ void QEngineCPU::Hash(bitLenInt start, bitLenInt length, const unsigned char* va nStateVec->clear(); stateVec->isReadLocked = false; - ParallelFunc fn = [&](const bitCapIntOcl& lcv, const unsigned& cpu) { + par_for(0, maxQPowerOcl, [&](const bitCapIntOcl& lcv, const unsigned& cpu) { const bitCapIntOcl inputRes = lcv & inputMask; const bitCapIntOcl inputInt = inputRes >> start; bitCapIntOcl outputInt = 0; @@ -1273,13 +1233,7 @@ void QEngineCPU::Hash(bitLenInt start, bitLenInt length, const unsigned char* va } bitCapIntOcl outputRes = outputInt << start; nStateVec->write(outputRes | (lcv & ~inputRes), stateVec->read(lcv)); - }; - - if (stateVec->is_sparse()) { - par_for_set(CastStateVecSparse()->iterable(), fn); - } else { - par_for(0, maxQPowerOcl, fn); - } + }); ResetStateVec(nStateVec); } diff --git a/src/qengine/state.cpp b/src/qengine/state.cpp index 696f5c0ac..8c32ec14a 100644 --- a/src/qengine/state.cpp +++ b/src/qengine/state.cpp @@ -452,18 +452,7 @@ void QEngineCPU::Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const comp } } - if (stateVec->is_sparse()) { - const bitCapIntOcl setMask = offset1 ^ offset2; - bitCapIntOcl filterMask = 0U; - for (bitLenInt i = 0U; i < bitCount; ++i) { - filterMask |= qPowersSorted[i]; - } - filterMask &= ~setMask; - const bitCapIntOcl filterValues = filterMask & offset1 & offset2; - par_for_set(CastStateVecSparse()->iterable(setMask, filterMask, filterValues), fn); - } else { - par_for_mask(0U, maxQPowerOcl, qPowersSorted, fn); - } + par_for_mask(0U, maxQPowerOcl, qPowersSorted, fn); if (doApplyNorm) { runningNorm = ONE_R1; @@ -635,18 +624,7 @@ void QEngineCPU::Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const comp } } - if (stateVec->is_sparse()) { - const bitCapIntOcl setMask = offset1 ^ offset2; - bitCapIntOcl filterMask = 0U; - for (bitLenInt i = 0U; i < bitCount; ++i) { - filterMask |= qPowersSorted[i]; - } - filterMask &= ~setMask; - const bitCapIntOcl filterValues = filterMask & offset1 & offset2; - par_for_set(CastStateVecSparse()->iterable(setMask, filterMask, filterValues), fn); - } else { - par_for_mask(0U, maxQPowerOcl, qPowersSorted, fn); - } + par_for_mask(0U, maxQPowerOcl, qPowersSorted, fn); if (doApplyNorm) { runningNorm = ONE_R1; @@ -687,11 +665,6 @@ void QEngineCPU::XMask(const bitCapInt& mask) return; } - if (stateVec->is_sparse()) { - QInterface::XMask(mask); - return; - } - Dispatch(maxQPowerOcl, [this, mask] { const bitCapIntOcl maskOcl = (bitCapIntOcl)mask; const bitCapIntOcl otherMask = (maxQPowerOcl - 1U) ^ maskOcl; @@ -734,11 +707,6 @@ void QEngineCPU::PhaseParity(real1_f radians, const bitCapInt& mask) return; } - if (stateVec->is_sparse()) { - QInterface::PhaseParity(radians, mask); - return; - } - Dispatch(maxQPowerOcl, [this, mask, radians] { const bitCapIntOcl parityStartSize = 4U * sizeof(bitCapIntOcl); const complex phaseFac = std::polar(ONE_R1, (real1)(radians / 2)); @@ -787,11 +755,6 @@ void QEngineCPU::PhaseRootNMask(bitLenInt n, const bitCapInt& mask) return; } - if (stateVec->is_sparse()) { - QInterface::PhaseRootNMask(n, mask); - return; - } - Dispatch(maxQPowerOcl, [this, n, mask, radians] { const bitCapIntOcl maskOcl = (bitCapIntOcl)mask; const bitCapIntOcl nPhases = pow2Ocl(n); @@ -935,7 +898,8 @@ void QEngineCPU::UniformParityRZ(const bitCapInt& mask, real1_f angle) const real1 sine = (real1)sin(angle); const complex phaseFac(cosine, sine); const complex phaseFacAdj(cosine, -sine); - ParallelFunc fn = [&](const bitCapIntOcl& lcv, const unsigned& cpu) { + + par_for(0U, maxQPowerOcl, [&](const bitCapIntOcl& lcv, const unsigned& cpu) { bitCapIntOcl perm = lcv & (bitCapIntOcl)mask; // From https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetNaive // c accumulates the total bits set in v @@ -945,13 +909,7 @@ void QEngineCPU::UniformParityRZ(const bitCapInt& mask, real1_f angle) perm &= perm - 1U; } stateVec->write(lcv, stateVec->read(lcv) * ((c & 1U) ? phaseFac : phaseFacAdj)); - }; - - if (stateVec->is_sparse()) { - par_for_set(CastStateVecSparse()->iterable(), fn); - } else { - par_for(0U, maxQPowerOcl, fn); - } + }); }); } @@ -1057,21 +1015,14 @@ bitLenInt QEngineCPU::Compose(QEngineCPUPtr toCopy) StateVectorPtr nStateVec = AllocStateVec(nMaxQPower); stateVec->isReadLocked = false; - ParallelFunc fn = [&](const bitCapIntOcl& lcv, const unsigned& cpu) { - nStateVec->write(lcv, stateVec->read(lcv & startMask) * toCopy->stateVec->read((lcv & endMask) >> qubitCount)); - }; - if ((toCopy->doNormalize) && (toCopy->runningNorm != ONE_R1)) { toCopy->NormalizeState(); } toCopy->Finish(); - if (stateVec->is_sparse() || toCopy->stateVec->is_sparse()) { - par_for_sparse_compose( - CastStateVecSparse()->iterable(), toCopy->CastStateVecSparse()->iterable(), qubitCount, fn); - } else { - par_for(0U, nMaxQPower, fn); - } + par_for(0U, nMaxQPower, [&](const bitCapIntOcl& lcv, const unsigned& cpu) { + nStateVec->write(lcv, stateVec->read(lcv & startMask) * toCopy->stateVec->read((lcv & endMask) >> qubitCount)); + }); SetQubitCount(nQubitCount); @@ -1457,19 +1408,15 @@ real1_f QEngineCPU::Prob(bitLenInt qubit) } stateVec->isReadLocked = false; - if (stateVec->is_sparse()) { - par_for_set(CastStateVecSparse()->iterable(qPower, qPower, qPower), fn); - } else { #if ENABLE_COMPLEX_X2 - if (qPower == 1U) { - par_for(0U, maxQPowerOcl >> 2U, fn); - } else { - par_for_skip(0U, maxQPowerOcl >> 1U, qPower >> 1U, 1U, fn); - } + if (qPower == 1U) { + par_for(0U, maxQPowerOcl >> 2U, fn); + } else { + par_for_skip(0U, maxQPowerOcl >> 1U, qPower >> 1U, 1U, fn); + } #else - par_for_skip(0U, maxQPowerOcl, qPower, 1U, fn); + par_for_skip(0U, maxQPowerOcl, qPower, 1U, fn); #endif - } stateVec->isReadLocked = true; real1 oneChance = ZERO_R1; @@ -1517,11 +1464,7 @@ real1_f QEngineCPU::CtrlOrAntiProb(bool controlState, bitLenInt control, bitLenI }); stateVec->isReadLocked = false; - if (stateVec->is_sparse()) { - par_for_set(CastStateVecSparse()->iterable(qPower, qPower, qPower), fn); - } else { - par_for_skip(0U, maxQPowerOcl, qPower, 1U, fn); - } + par_for_skip(0U, maxQPowerOcl, qPower, 1U, fn); stateVec->isReadLocked = true; real1 oneChance = ZERO_R1; @@ -1554,11 +1497,7 @@ real1_f QEngineCPU::ProbReg(bitLenInt start, bitLenInt length, const bitCapInt& }; stateVec->isReadLocked = false; - if (stateVec->is_sparse()) { - par_for_set(CastStateVecSparse()->iterable(0, bitRegMaskOcl(start, length), perm), fn); - } else { - par_for_skip(0U, maxQPowerOcl, pow2Ocl(start), length, fn); - } + par_for_skip(0U, maxQPowerOcl, pow2Ocl(start), length, fn); stateVec->isReadLocked = true; real1 prob = ZERO_R1; @@ -1632,7 +1571,9 @@ real1_f QEngineCPU::ProbParity(const bitCapInt& mask) std::unique_ptr oddChanceBuff(new real1[numCores]()); const bitCapIntOcl maskOcl = (bitCapIntOcl)mask; - ParallelFunc fn = [&](const bitCapIntOcl& lcv, const unsigned& cpu) { + + stateVec->isReadLocked = false; + par_for(0U, maxQPowerOcl, [&](const bitCapIntOcl& lcv, const unsigned& cpu) { bool parity = false; bitCapIntOcl v = lcv & maskOcl; while (v) { @@ -1643,14 +1584,7 @@ real1_f QEngineCPU::ProbParity(const bitCapInt& mask) if (parity) { oddChanceBuff[cpu] += norm(stateVec->read(lcv)); } - }; - - stateVec->isReadLocked = false; - if (stateVec->is_sparse()) { - par_for_set(CastStateVecSparse()->iterable(), fn); - } else { - par_for(0U, maxQPowerOcl, fn); - } + }); stateVec->isReadLocked = true; for (unsigned i = 0U; i < numCores; ++i) { @@ -1704,7 +1638,9 @@ bool QEngineCPU::ForceMParity(const bitCapInt& mask, bool result, bool doForce) std::unique_ptr oddChanceBuff(new real1[numCores]()); const bitCapIntOcl maskOcl = (bitCapIntOcl)mask; - ParallelFunc fn = [&](const bitCapIntOcl& lcv, const unsigned& cpu) { + + stateVec->isReadLocked = false; + par_for(0U, maxQPowerOcl, [&](const bitCapIntOcl& lcv, const unsigned& cpu) { bool parity = false; bitCapIntOcl v = lcv & maskOcl; while (v) { @@ -1717,14 +1653,7 @@ bool QEngineCPU::ForceMParity(const bitCapInt& mask, bool result, bool doForce) } else { stateVec->write(lcv, ZERO_CMPLX); } - }; - - stateVec->isReadLocked = false; - if (stateVec->is_sparse()) { - par_for_set(CastStateVecSparse()->iterable(), fn); - } else { - par_for(0U, maxQPowerOcl, fn); - } + }); stateVec->isReadLocked = true; for (unsigned i = 0U; i < numCores; ++i) { @@ -1811,19 +1740,14 @@ void QEngineCPU::ApplyM(const bitCapInt& regMask, const bitCapInt& result, const Dispatch(maxQPowerOcl, [this, regMask, result, nrm] { const bitCapIntOcl regMaskOcl = (bitCapIntOcl)regMask; const bitCapIntOcl resultOcl = (bitCapIntOcl)result; - ParallelFunc fn = [&](const bitCapIntOcl& i, const unsigned& cpu) { + + par_for(0U, maxQPowerOcl, [&](const bitCapIntOcl& i, const unsigned& cpu) { if ((i & regMaskOcl) == resultOcl) { stateVec->write(i, nrm * stateVec->read(i)); } else { stateVec->write(i, ZERO_CMPLX); } - }; - - if (stateVec->is_sparse()) { - par_for_set(CastStateVecSparse()->iterable(), fn); - } else { - par_for(0U, maxQPowerOcl, fn); - } + }); runningNorm = ONE_R1; }); @@ -1899,10 +1823,6 @@ void QEngineCPU::UpdateRunningNorm(real1_f norm_thresh) StateVectorPtr QEngineCPU::AllocStateVec(bitCapIntOcl elemCount) { - if (isSparse) { - return std::make_shared(elemCount); - } else { - return std::make_shared(elemCount); - } + return std::make_shared(elemCount); } } // namespace Qrack diff --git a/src/qunit.cpp b/src/qunit.cpp index 8c43140aa..f5bb898fb 100644 --- a/src/qunit.cpp +++ b/src/qunit.cpp @@ -366,7 +366,7 @@ void QUnit::Detach(bitLenInt start, bitLenInt length, QUnitPtr dest) dest->shards[i] = shard; } } - + // Find the rest of the qubits. for (auto&& shard : shards) { const auto subunit = subunits.find(shard.unit); diff --git a/test/benchmarks.cpp b/test/benchmarks.cpp index 5cda08596..8dafa373f 100644 --- a/test/benchmarks.cpp +++ b/test/benchmarks.cpp @@ -121,7 +121,7 @@ void benchmarkLoopVariable(std::function fn, bit for (bitLenInt numBits = mnQbts; numBits <= mxQbts; numBits++) { QInterfacePtr qftReg = CreateQuantumInterface(engineStack, numBits, ZERO_BCI, rng, CMPLX_DEFAULT_ARG, - enable_normalization, true, use_host_dma, device_id, !disable_hardware_rng, sparse, REAL1_EPSILON, devList); + enable_normalization, true, use_host_dma, device_id, !disable_hardware_rng, false, REAL1_EPSILON, devList); if (disable_t_injection) { qftReg->SetTInjection(false); } @@ -182,7 +182,7 @@ void benchmarkLoopVariable(std::function fn, bit // Re-alloc: qftReg = CreateQuantumInterface(engineStack, numBits, ZERO_BCI, rng, CMPLX_DEFAULT_ARG, enable_normalization, - true, use_host_dma, device_id, !disable_hardware_rng, sparse, REAL1_EPSILON, devList); + true, use_host_dma, device_id, !disable_hardware_rng, false, REAL1_EPSILON, devList); if (disable_t_injection) { qftReg->SetTInjection(false); } @@ -207,7 +207,7 @@ void benchmarkLoopVariable(std::function fn, bit // Re-alloc: qftReg = CreateQuantumInterface(engineStack, numBits, ZERO_BCI, rng, CMPLX_DEFAULT_ARG, - enable_normalization, true, use_host_dma, device_id, !disable_hardware_rng, sparse, + enable_normalization, true, use_host_dma, device_id, !disable_hardware_rng, false, REAL1_EPSILON, devList); if (disable_t_injection) { qftReg->SetTInjection(false); @@ -261,7 +261,7 @@ void benchmarkLoopVariable(std::function fn, bit // Re-alloc: qftReg = CreateQuantumInterface(engineStack, numBits, ZERO_BCI, rng, CMPLX_DEFAULT_ARG, enable_normalization, - true, use_host_dma, device_id, !disable_hardware_rng, sparse, REAL1_EPSILON, devList); + true, use_host_dma, device_id, !disable_hardware_rng, false, REAL1_EPSILON, devList); if (disable_t_injection) { qftReg->SetTInjection(false); } @@ -4749,7 +4749,7 @@ TEST_CASE("test_universal_circuit_digital_cross_entropy", "[supreme]") std::cout << "Gold standard vs. gold standard cross entropy (out of 1.0): " << crossEntropy << std::endl; QInterfacePtr testCase = CreateQuantumInterface({ testEngineType, testSubEngineType }, n, ZERO_BCI, rng, ONE_CMPLX, - enable_normalization, true, use_host_dma, device_id, !disable_hardware_rng, sparse); + enable_normalization, true, use_host_dma, device_id, !disable_hardware_rng, false); if (disable_t_injection) { testCase->SetTInjection(false); } diff --git a/test/benchmarks_main.cpp b/test/benchmarks_main.cpp index c7aa3c98c..622aec831 100644 --- a/test/benchmarks_main.cpp +++ b/test/benchmarks_main.cpp @@ -36,7 +36,6 @@ bool disable_terminal_measurement = false; bool use_host_dma = false; bool disable_hardware_rng = false; bool async_time = false; -bool sparse = false; int device_id = -1; bitLenInt max_qubits = 24; bitLenInt min_qubits = 4; @@ -160,8 +159,6 @@ int main(int argc, char* argv[]) "type should be binary. (By default, it is " "human-readable.)") | Opt(single_qubit_run)["--single"]("Only run single (maximum) qubit count for tests") | - Opt(sparse)["--sparse"]( - "(For QEngineCPU, under QUnit:) Use a state vector optimized for sparse representation and iteration.") | Opt(benchmarkSamples, "samples")["--samples"]("number of samples to collect (default: 100)") | Opt(benchmarkDepth, "depth")["--benchmark-depth"]( "depth of randomly constructed circuits, when applicable, with 1 round of single qubit and 1 round of " @@ -423,11 +420,7 @@ int main(int argc, char* argv[]) if (num_failed == 0 && qunit) { testEngineType = QINTERFACE_QUNIT; if (num_failed == 0 && cpu) { - if (sparse) { - session.config().stream() << "############ QUnit -> QEngine -> CPU (Sparse) ############" << std::endl; - } else { - session.config().stream() << "############ QUnit -> QEngine -> CPU ############" << std::endl; - } + session.config().stream() << "############ QUnit -> QEngine -> CPU ############" << std::endl; testSubEngineType = QINTERFACE_CPU; num_failed = session.run(); } diff --git a/test/test_main.cpp b/test/test_main.cpp index 517ecd6ef..5184dfa03 100644 --- a/test/test_main.cpp +++ b/test/test_main.cpp @@ -32,7 +32,6 @@ bool disable_reactive_separation = false; bool use_host_dma = false; bool disable_hardware_rng = false; bool async_time = false; -bool sparse = false; int device_id = -1; bitLenInt max_qubits = 24; std::string mOutputFileName; @@ -126,8 +125,6 @@ int main(int argc, char* argv[]) Opt(isBinaryOutput)["--binary-output"]("If included, specifies that the --measure-output file " "type should be binary. (By default, it is " "human-readable.)") | - Opt(sparse)["--sparse"]( - "(For QEngineCPU, under QUnit:) Use a state vector optimized for sparse representation and iteration.") | Opt(benchmarkSamples, "samples")["--samples"]("number of samples to collect (default: 100)") | Opt(benchmarkDepth, "depth")["--benchmark-depth"]( "depth of randomly constructed circuits, when applicable, with 1 round of single qubit and 1 round of " @@ -301,11 +298,7 @@ int main(int argc, char* argv[]) if (num_failed == 0 && qunit) { testEngineType = QINTERFACE_QUNIT; if (num_failed == 0 && cpu) { - if (sparse) { - session.config().stream() << "############ QUnit -> QEngine -> CPU (Sparse) ############" << std::endl; - } else { - session.config().stream() << "############ QUnit -> QEngine -> CPU ############" << std::endl; - } + session.config().stream() << "############ QUnit -> QEngine -> CPU ############" << std::endl; testSubEngineType = QINTERFACE_CPU; num_failed = session.run(); } @@ -539,7 +532,7 @@ QInterfaceTestFixture::QInterfaceTestFixture() qftReg = CreateQuantumInterface( { testEngineType, testSubEngineType, testSubSubEngineType, testSubSubSubEngineType }, 20, ZERO_BCI, rng, - ONE_CMPLX, enable_normalization, true, false, device_id, !disable_hardware_rng, sparse, REAL1_EPSILON, devList); + ONE_CMPLX, enable_normalization, true, false, device_id, !disable_hardware_rng, false, REAL1_EPSILON, devList); if (disable_t_injection) { qftReg->SetTInjection(false); diff --git a/test/tests.cpp b/test/tests.cpp index 2938ecb7c..8064c1b37 100644 --- a/test/tests.cpp +++ b/test/tests.cpp @@ -65,7 +65,7 @@ QInterfacePtr MakeEngine(bitLenInt qubitCount) { QInterfacePtr toRet = CreateQuantumInterface( { testEngineType, testSubEngineType, testSubSubEngineType, testSubSubSubEngineType }, qubitCount, ZERO_BCI, rng, - ONE_CMPLX, enable_normalization, true, false, device_id, !disable_hardware_rng, sparse, REAL1_EPSILON, devList); + ONE_CMPLX, enable_normalization, true, false, device_id, !disable_hardware_rng, false, REAL1_EPSILON, devList); if (disable_t_injection) { toRet->SetTInjection(false); @@ -659,7 +659,7 @@ TEST_CASE_METHOD(QInterfaceTestFixture, "test_cswap") QInterfacePtr qftReg2 = CreateQuantumInterface({ testEngineType, testSubEngineType, testSubSubEngineType, testSubSubSubEngineType }, - 20U, ZERO_BCI, rng, ONE_CMPLX, enable_normalization, true, false, device_id, !disable_hardware_rng, sparse, + 20U, ZERO_BCI, rng, ONE_CMPLX, enable_normalization, true, false, device_id, !disable_hardware_rng, false, REAL1_DEFAULT_ARG, devList, 10); control[0] = 9; @@ -1257,11 +1257,11 @@ TEST_CASE_METHOD(QInterfaceTestFixture, "test_approxcompare") { qftReg = CreateQuantumInterface({ testEngineType, testSubEngineType, testSubSubEngineType, testSubSubSubEngineType }, 3U, - ZERO_BCI, rng, ONE_CMPLX, enable_normalization, true, false, device_id, !disable_hardware_rng, sparse, + ZERO_BCI, rng, ONE_CMPLX, enable_normalization, true, false, device_id, !disable_hardware_rng, false, REAL1_DEFAULT_ARG, devList, 10); QInterfacePtr qftReg2 = CreateQuantumInterface({ testEngineType, testSubEngineType, testSubSubEngineType, testSubSubSubEngineType }, 3U, - ZERO_BCI, rng, ONE_CMPLX, enable_normalization, true, false, device_id, !disable_hardware_rng, sparse, + ZERO_BCI, rng, ONE_CMPLX, enable_normalization, true, false, device_id, !disable_hardware_rng, false, REAL1_DEFAULT_ARG, devList, 10); qftReg->X(0); @@ -2881,7 +2881,7 @@ TEST_CASE_METHOD(QInterfaceTestFixture, "test_zero_phase_flip") REQUIRE_THAT(qftReg, HasProbability(0, 8, 0x03)); QInterfacePtr qftReg2 = CreateQuantumInterface({ testEngineType, testSubEngineType, testSubSubEngineType, testSubSubSubEngineType }, 20U, - ZERO_BCI, rng, ONE_CMPLX, enable_normalization, true, false, device_id, !disable_hardware_rng, sparse, + ZERO_BCI, rng, ONE_CMPLX, enable_normalization, true, false, device_id, !disable_hardware_rng, false, REAL1_DEFAULT_ARG, devList, 10); qftReg2->SetPermutation(3U << 9U); @@ -2958,7 +2958,7 @@ TEST_CASE_METHOD(QInterfaceTestFixture, "test_decompose", "[sd_xfail]") // Try across device/heap allocation case: qftReg2 = CreateQuantumInterface( { testEngineType, testSubEngineType, testSubSubEngineType, testSubSubSubEngineType }, 4, ZERO_BCI, rng, - ONE_CMPLX, enable_normalization, true, true, device_id, !disable_hardware_rng, sparse, REAL1_EPSILON, devList); + ONE_CMPLX, enable_normalization, true, true, device_id, !disable_hardware_rng, false, REAL1_EPSILON, devList); qftReg->SetPermutation(0x2b); qftReg->Decompose(0, qftReg2); diff --git a/test/tests.hpp b/test/tests.hpp index a09cf4d07..7511db2fc 100644 --- a/test/tests.hpp +++ b/test/tests.hpp @@ -38,7 +38,6 @@ extern bool disable_terminal_measurement; extern bool use_host_dma; extern bool disable_hardware_rng; extern bool async_time; -extern bool sparse; extern int device_id; extern bitLenInt max_qubits; extern bitLenInt min_qubits;