Skip to content

Commit

Permalink
Add Support for the QIR Adaptive Profile (#741)
Browse files Browse the repository at this point in the history
* Allow OQC email and password to be undefined for local emulation
* Actual changes to support Adaptive Profile
* Add named blocks to work around PyQIR limitation
* Properly set qubitMeasurementFeedback for array-saved measurements
* Add new adaptive profile tests and update old ones
* Create __quantum__qis__read_result__body stub in NVQIR
* Add qir_test_cond_for_loop-6.cpp (XFAIL), which shows a compiler issue
* Comments and function rename
* Update PyQIR comment
* Use FileCheck instead of asserts in new tests
* Add reference to new issue for __quantum__qis__read_result__body
* Update tests for latest merge
* More test updates

---------

Co-authored-by: Eric Schweitz <[email protected]>
  • Loading branch information
bmhowe23 and schweitzpgi authored Oct 16, 2023
1 parent 3d00067 commit bf84be4
Show file tree
Hide file tree
Showing 28 changed files with 864 additions and 56 deletions.
11 changes: 11 additions & 0 deletions include/cudaq/Optimizer/CodeGen/Peephole.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,17 @@ inline mlir::Value createMeasureCall(mlir::PatternRewriter &builder,
return {};
}

inline mlir::Value createReadResultCall(mlir::PatternRewriter &builder,
mlir::Location loc,
mlir::OpResult result) {
auto i1Ty = mlir::IntegerType::get(builder.getContext(), 1);
return builder
.create<mlir::LLVM::CallOp>(loc, mlir::TypeRange{i1Ty},
cudaq::opt::QIRReadResultBody,
mlir::ArrayRef<mlir::Value>{result})
.getResult();
}

namespace {
#include "cudaq/Optimizer/CodeGen/Peephole.inc"
}
26 changes: 26 additions & 0 deletions include/cudaq/Optimizer/CodeGen/Peephole.td
Original file line number Diff line number Diff line change
Expand Up @@ -148,4 +148,30 @@ def MeasureToRegisterCallConv : Pat<
(CreateMeasureCall $call, $args),
[(IsaMeasureToRegisterCall:$callee), (IsaIntToPtrOperand $args)]>;

//===----------------------------------------------------------------------===//

def HasI1PtrType : Constraint<CPred<
"$_self.getType() == cudaq::opt::factory::getPointerType("
" mlir::IntegerType::get($_self.getContext(), 1))">>;

def HasResultType : Constraint<CPred<
"$_self.getType() == cudaq::opt::getResultType($_self.getContext())">>;

def IsaIntAttr : Constraint<CPred<"$_self.isa<mlir::IntegerAttr>()">>;

def CreateReadResultCall : NativeCodeCall<
"createReadResultCall($_builder, $_loc, $0)">;

// %1 = llvm.constant 1
// %2 = llvm.inttoptr %1 : i64 -> Result*
// %3 = llvm.bitcast %2 : Result* -> i1*
// %4 = llvm.load %3
// ─────────────────────────────────────
// %4 = call @read_result %2
def LoadMeasureResult : Pat<
(LLVM_LoadOp:$load (LLVM_BitcastOp:$bitcast (LLVM_IntToPtrOp:$cast
(LLVM_ConstantOp $attr))), $_, $_, $_, $_, $_, $_),
(CreateReadResultCall $cast),
[(HasI1PtrType:$bitcast), (HasResultType:$cast), (IsaIntAttr:$attr)]>;

#endif
2 changes: 2 additions & 0 deletions include/cudaq/Optimizer/CodeGen/QIRFunctionNames.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ constexpr static const char QIRMeasureToRegister[] =

constexpr static const char QIRCnot[] = "__quantum__qis__cnot";
constexpr static const char QIRCphase[] = "__quantum__qis__cphase";
constexpr static const char QIRReadResultBody[] =
"__quantum__qis__read_result__body";

constexpr static const char NVQIRInvokeWithControlBits[] =
"invokeWithControlQubits";
Expand Down
48 changes: 23 additions & 25 deletions lib/Optimizer/CodeGen/LowerToQIRProfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,11 @@ namespace {
struct FunctionAnalysisData {
std::size_t nQubits = 0;
std::size_t nResults = 0;
// Use std::map to keep these sorted in ascending order.
// map[qb] --> [result,regName]
std::map<std::size_t, std::pair<std::size_t, StringAttr>> resultPtrValues;
// Additionally store by result to prevent collisions on a single qubit having
// Store by result to prevent collisions on a single qubit having
// multiple measurements (Adaptive Profile)
// map[result] --> [qb,regName]
// Use std::map to keep these sorted in ascending order. While this isn't
// required, it makes viewing the QIR easier.
std::map<std::size_t, std::pair<std::size_t, std::string>> resultQubitVals;
DenseMap<Operation *, std::size_t> allocationOffsets;
};
Expand Down Expand Up @@ -164,26 +163,18 @@ struct FunctionProfileAnalysis {
}
if (optQb) {
auto qb = *optQb;
auto iter = data.resultPtrValues.find(qb);
auto *ctx = callOp.getContext();
auto intTy = IntegerType::get(ctx, 64);
if (iter == data.resultPtrValues.end()) {
auto resIdx = IntegerAttr::get(intTy, data.nResults);
callOp->setAttr(resultIndexName, resIdx);
auto regName = [&]() -> StringAttr {
if (auto nameAttr = callOp->getAttr("registerName")
.dyn_cast_or_null<StringAttr>())
return nameAttr;
return {};
}();
data.resultQubitVals.insert(std::make_pair(
data.nResults, std::make_pair(qb, regName.data())));
data.resultPtrValues.insert(
std::make_pair(qb, std::make_pair(data.nResults++, regName)));
} else {
auto resIdx = IntegerAttr::get(intTy, iter->second.first);
callOp->setAttr(resultIndexName, resIdx);
}
auto resIdx = IntegerAttr::get(intTy, data.nResults);
callOp->setAttr(resultIndexName, resIdx);
auto regName = [&]() -> StringAttr {
if (auto nameAttr = callOp->getAttr("registerName")
.dyn_cast_or_null<StringAttr>())
return nameAttr;
return {};
}();
data.resultQubitVals.insert(std::make_pair(
data.nResults++, std::make_pair(qb, regName.data())));
} else {
callOp.emitError("could not trace offset value");
}
Expand Down Expand Up @@ -237,13 +228,13 @@ struct AddFuncAttribute : public OpRewritePattern<LLVM::LLVMFuncOp> {
auto resultTy = cudaq::opt::getResultType(rewriter.getContext());
auto i64Ty = rewriter.getI64Type();
auto module = op->getParentOfType<ModuleOp>();
for (auto &iv : info.resultPtrValues) {
for (auto &iv : info.resultQubitVals) {
auto &rec = iv.second;
Value idx = builder.create<LLVM::ConstantOp>(loc, i64Ty, rec.first);
Value idx = builder.create<LLVM::ConstantOp>(loc, i64Ty, iv.first);
Value ptr = builder.create<LLVM::IntToPtrOp>(loc, resultTy, idx);
auto regName = [&]() -> Value {
auto charPtrTy = cudaq::opt::getCharPointerType(builder.getContext());
if (rec.second) {
if (!rec.second.empty()) {
// Note: it should be the case that this string literal has already
// been added to the IR, so this step does not actually update the
// module.
Expand Down Expand Up @@ -422,6 +413,8 @@ struct QIRToQIRProfileQIRPass
CalleeConv, EraseArrayAlloc, EraseArrayRelease,
EraseDeadArrayGEP, MeasureCallConv,
MeasureToRegisterCallConv, XCtrlOneTargetToCNot>(context);
if (convertTo.getValue() == "qir-adaptive")
patterns.insert<LoadMeasureResult>(context);
if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns))))
signalPassFailure();
LLVM_DEBUG(llvm::dbgs() << "After QIR profile:\n" << *op << '\n');
Expand Down Expand Up @@ -472,6 +465,10 @@ struct QIRProfilePreparationPass
{cudaq::opt::getQubitType(ctx), cudaq::opt::getResultType(ctx)},
module);

cudaq::opt::factory::createLLVMFunctionSymbol(
cudaq::opt::QIRReadResultBody, IntegerType::get(ctx, 1),
{cudaq::opt::getResultType(ctx)}, module);

// Add record functions for any
// measurements.
cudaq::opt::factory::createLLVMFunctionSymbol(
Expand Down Expand Up @@ -585,6 +582,7 @@ cudaq::opt::verifyQIRProfilePass(llvm::StringRef convertTo) {

void cudaq::opt::addQIRProfilePipeline(OpPassManager &pm,
llvm::StringRef convertTo) {
assert(convertTo == "qir-adaptive" || convertTo == "qir-base");
pm.addPass(createQIRProfilePreparationPass());
pm.addNestedPass<LLVM::LLVMFuncOp>(createConvertToQIRFuncPass(convertTo));
pm.addPass(createQIRToQIRProfilePass(convertTo));
Expand Down
31 changes: 31 additions & 0 deletions lib/Optimizer/Transforms/QuakeAddMetadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ struct QuakeFunctionAnalysis {
auto allocValue = storeOp.getOperand(1);
if (auto cp = allocValue.getDefiningOp<cudaq::cc::ComputePtrOp>())
allocValue = cp.getBase();
if (auto castOp = allocValue.getDefiningOp<cudaq::cc::CastOp>())
allocValue = castOp.getOperand();

if (auto allocaOp = allocValue.getDefiningOp<cudaq::cc::AllocaOp>()) {
// Get the alloca users
Expand All @@ -122,6 +124,35 @@ struct QuakeFunctionAnalysis {
return WalkResult::interrupt();
}
}

// Look for any future cast/compute_ptr/load, and if that load is
// used by a conditional statement
if (auto cast = dyn_cast<cudaq::cc::CastOp>(allocUser)) {
for (auto castUser : cast->getUsers()) {
if (auto cp = dyn_cast<cudaq::cc::ComputePtrOp>(castUser)) {
for (auto cpUser : cp->getUsers()) {
if (auto load = dyn_cast<cudaq::cc::LoadOp>(cpUser)) {
auto loadUser = *load->getUsers().begin();

// Loaded Val could be used directly or by an Arith
// boolean operation
while (loadUser->getDialect()->getNamespace() ==
"arith") {
auto res = loadUser->getResult(0);
loadUser = *res.getUsers().begin();
}

// At this point we should be able to check if we are
// being used by a conditional
if (isa<cudaq::cc::IfOp, cf::CondBranchOp>(loadUser)) {
data.hasConditionalsOnMeasure = true;
return WalkResult::interrupt();
}
}
}
}
}
}
}
}
}
Expand Down
68 changes: 60 additions & 8 deletions runtime/common/RuntimeMLIR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,8 @@ void applyWriteOnlyAttributes(llvm::Module *llvmModule) {
// overlap.
// Reference:
// https://github.com/qir-alliance/qir-spec/blob/main/specification/under_development/profiles/Base_Profile.md?plain=1#L237
mlir::LogicalResult verifyMeasurementOrdering(llvm::Module *llvmModule) {
mlir::LogicalResult
verifyBaseProfileMeasurementOrdering(llvm::Module *llvmModule) {
bool irreversibleSeenYet = false;
for (llvm::Function &func : *llvmModule)
for (llvm::BasicBlock &block : func)
Expand Down Expand Up @@ -321,14 +322,32 @@ mlir::LogicalResult verifyQubitAndResultRanges(llvm::Module *llvmModule) {
}

// Verify that only the allowed LLVM instructions are present
mlir::LogicalResult verifyLLVMInstructions(llvm::Module *llvmModule) {
mlir::LogicalResult verifyLLVMInstructions(llvm::Module *llvmModule,
bool isBaseProfile) {
bool isAdaptiveProfile = !isBaseProfile;
for (llvm::Function &func : *llvmModule)
for (llvm::BasicBlock &block : func)
for (llvm::Instruction &inst : block) {
// Only call, br, and ret instructions are allowed at the top level.
if (!llvm::isa<llvm::CallBase>(inst) &&
!llvm::isa<llvm::BranchInst>(inst) &&
!llvm::isa<llvm::ReturnInst>(inst)) {
// Only specific instructions are allowed at the top level, depending on
// the specific profile
bool isValidBaseProfileInstruction =
llvm::isa<llvm::CallBase>(inst) ||
llvm::isa<llvm::BranchInst>(inst) ||
llvm::isa<llvm::ReturnInst>(inst);
// Note: there is an outstanding question about the adaptive profile
// with respect to `switch` and `select` instructions. They are
// currently described as "optional" in the spec, but there is no way to
// specify their presence via module flags. So to be cautious, for now
// we will assume they are not allowed in cuda-quantum programs.
bool isValidAdaptiveProfileInstruction = isValidBaseProfileInstruction;
// bool isValidAdaptiveProfileInstruction =
// isValidBaseProfileInstruction ||
// llvm::isa<llvm::SwitchInst>(inst) ||
// llvm::isa<llvm::SelectInst>(inst);
if (isBaseProfile && !isValidBaseProfileInstruction) {
llvm::errs() << "error - invalid instruction found: " << inst << '\n';
return failure();
} else if (isAdaptiveProfile && !isValidAdaptiveProfileInstruction) {
llvm::errs() << "error - invalid instruction found: " << inst << '\n';
return failure();
}
Expand Down Expand Up @@ -366,6 +385,9 @@ qirProfileTranslationFunction(const char *qirProfile, Operation *op,
const uint32_t qir_major_version = 1;
const uint32_t qir_minor_version = 0;

const bool isAdaptiveProfile = std::string{qirProfile} == "qir-adaptive";
const bool isBaseProfile = !isAdaptiveProfile;

auto context = op->getContext();
PassManager pm(context);
if (printIntermediateMLIR)
Expand Down Expand Up @@ -398,26 +420,56 @@ qirProfileTranslationFunction(const char *qirProfile, Operation *op,
"dynamic_qubit_management", falseValue);
llvmModule->addModuleFlag(llvm::Module::ModFlagBehavior::Error,
"dynamic_result_management", falseValue);
if (isAdaptiveProfile) {
auto trueValue =
llvm::ConstantInt::getTrue(llvm::Type::getInt1Ty(*llvmContext));
llvmModule->addModuleFlag(llvm::Module::ModFlagBehavior::Error,
"qubit_resetting", trueValue);
llvmModule->addModuleFlag(llvm::Module::ModFlagBehavior::Error,
"classical_ints", falseValue);
llvmModule->addModuleFlag(llvm::Module::ModFlagBehavior::Error,
"classical_floats", falseValue);
llvmModule->addModuleFlag(llvm::Module::ModFlagBehavior::Error,
"classical_fixed_points", falseValue);
llvmModule->addModuleFlag(llvm::Module::ModFlagBehavior::Error,
"user_functions", falseValue);
llvmModule->addModuleFlag(llvm::Module::ModFlagBehavior::Error,
"dynamic_float_args", falseValue);
llvmModule->addModuleFlag(llvm::Module::ModFlagBehavior::Error,
"extern_functions", falseValue);
llvmModule->addModuleFlag(llvm::Module::ModFlagBehavior::Error,
"backwards_branching", falseValue);
}

// Note: optimizeLLVM is the one that is setting nonnull attributes on
// the @__quantum__rt__result_record_output calls.
cudaq::optimizeLLVM(llvmModule.get());
if (!cudaq::setupTargetTriple(llvmModule.get()))
throw std::runtime_error("Failed to setup the llvm module target triple.");

// PyQIR currently requires named blocks. It's not clear if blocks can share
// names across functions, so we are being conservative by giving every block
// in the module a unique name for now.
int blockCounter = 0;
for (llvm::Function &func : *llvmModule)
for (llvm::BasicBlock &block : func)
if (!block.hasName())
block.setName(std::to_string(blockCounter++));

if (printIR)
llvm::errs() << *llvmModule;

if (failed(verifyOutputRecordingFunctions(llvmModule.get())))
return failure();

if (failed(verifyMeasurementOrdering(llvmModule.get())))
if (isBaseProfile &&
failed(verifyBaseProfileMeasurementOrdering(llvmModule.get())))
return failure();

if (failed(verifyQubitAndResultRanges(llvmModule.get())))
return failure();

if (failed(verifyLLVMInstructions(llvmModule.get())))
if (failed(verifyLLVMInstructions(llvmModule.get(), isBaseProfile)))
return failure();

// Map the LLVM Module to Bitcode that can be submitted
Expand Down
48 changes: 43 additions & 5 deletions runtime/cudaq/platform/default/rest/RemoteRESTQPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,16 +119,20 @@ class RemoteRESTQPU : public cudaq::QPU {
/// of JIT engines for invoking the kernels.
std::vector<ExecutionEngine *> jitEngines;

/// @brief Invoke the kernel in the JIT engine and then delete the JIT engine.
void invokeJITKernelAndRelease(ExecutionEngine *jit,
const std::string &kernelName) {
/// @brief Invoke the kernel in the JIT engine
void invokeJITKernel(ExecutionEngine *jit, const std::string &kernelName) {
auto funcPtr = jit->lookup(std::string("__nvqpp__mlirgen__") + kernelName);
if (!funcPtr) {
throw std::runtime_error(
"cudaq::builder failed to get kernelReg function.");
}
reinterpret_cast<void (*)()>(*funcPtr)();
// We're done, delete the pointer.
}

/// @brief Invoke the kernel in the JIT engine and then delete the JIT engine.
void invokeJITKernelAndRelease(ExecutionEngine *jit,
const std::string &kernelName) {
invokeJITKernel(jit, kernelName);
delete jit;
}

Expand Down Expand Up @@ -165,7 +169,9 @@ class RemoteRESTQPU : public cudaq::QPU {
bool isSimulator() override { return emulate; }

/// @brief Return true if the current backend supports conditional feedback
bool supportsConditionalFeedback() override { return false; }
bool supportsConditionalFeedback() override {
return codegenTranslation == "qir-adaptive";
}

/// Provide the number of shots
void setShots(int _nShots) override {
Expand Down Expand Up @@ -491,6 +497,38 @@ class RemoteRESTQPU : public cudaq::QPU {
if (seed > 0)
cudaq::set_random_seed(seed);

bool hasConditionals =
cudaq::kernelHasConditionalFeedback(kernelName);
if (hasConditionals && codes.size() > 1)
throw std::runtime_error("error: spin_ops not yet supported with "
"kernels containing conditionals");
if (hasConditionals) {
executor->setShots(1); // run one shot at a time

// If this is adaptive profile and the kernel has conditionals,
// then you have to run the code localShots times instead of
// running the kernel once and sampling the state localShots
// times.
if (hasConditionals) {
// Populate `counts` one shot at a time
cudaq::sample_result counts;
for (std::size_t shot = 0; shot < localShots; shot++) {
cudaq::ExecutionContext context("sample", 1);
context.hasConditionalsOnMeasureResults = true;
cudaq::getExecutionManager()->setExecutionContext(&context);
invokeJITKernel(localJIT[0], kernelName);
cudaq::getExecutionManager()->resetExecutionContext();
counts += context.result;
}
// Process `counts` and store into `results`
for (auto &regName : counts.register_names()) {
results.emplace_back(counts.to_map(regName), regName);
results.back().sequentialData =
counts.sequential_data(regName);
}
}
}

for (std::size_t i = 0; i < codes.size(); i++) {
cudaq::ExecutionContext context("sample", localShots);
cudaq::getExecutionManager()->setExecutionContext(&context);
Expand Down
Loading

0 comments on commit bf84be4

Please sign in to comment.