Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/SYCLomatic' into blas_q1
Browse files Browse the repository at this point in the history
  • Loading branch information
zhiweij1 committed Mar 1, 2024
2 parents 22a520d + dccbbf4 commit fd9a511
Show file tree
Hide file tree
Showing 42 changed files with 595 additions and 3,904 deletions.
13 changes: 10 additions & 3 deletions clang/include/clang/DPCT/DPCTOptions.inc
Original file line number Diff line number Diff line change
Expand Up @@ -180,9 +180,13 @@ DPCT_NON_ENUM_OPTION(DPCT_OPT_TYPE(static llvm::cl::opt<bool, true>), CheckUnico
"bi-directional formatting codes and homoglyphs in identifiers. Default: off.\n"),
llvm::cl::cat(DPCTCat), llvm::cl::location(CheckUnicodeSecurityFlag))

DPCT_NON_ENUM_OPTION(DPCT_OPT_TYPE(static llvm::cl::opt<bool, true>), EnablepProfiling, "enable-profiling",
llvm::cl::desc("Enable SYCL queue profiling in helper functions. Default: off.\n"),
llvm::cl::cat(DPCTCat), llvm::cl::location(EnablepProfilingFlag))
DPCT_NON_ENUM_OPTION(
DPCT_OPT_TYPE(static llvm::cl::opt<bool, true>), EnablepProfiling,
"enable-profiling",
llvm::cl::desc("Enable SYCL queue profiling in helper functions. default: "
"auto (enable-profiling if\nthe tool deduces that "
"profiling is required during migration, else not).\n"),
llvm::cl::cat(DPCTCat), llvm::cl::location(EnablepProfilingFlag))

DPCT_NON_ENUM_OPTION(DPCT_OPT_TYPE(static llvm::cl::opt<bool, true>), SyclNamedLambda, "sycl-named-lambda",
llvm::cl::desc("Generates kernels with the kernel name. Default: off.\n"),
Expand Down Expand Up @@ -409,6 +413,9 @@ DPCT_ENUM_OPTION(DPCT_OPT_TYPE(static llvm::cl::bits<ExperimentalFeatures>), Exp
false),
DPCT_OPT_ENUM("bindless_images", int(ExperimentalFeatures::Exp_BindlessImages),
"Experimental extension that allows use of bindless images APIs.\n",
false),
DPCT_OPT_ENUM("non-uniform-groups", int(ExperimentalFeatures::Exp_NonUniformGroups),
"Experimental extension that allows use of non-uniform groups.\n",
false)
),
llvm::cl::desc("Comma-separated list of experimental features to be used in migrated "
Expand Down
5 changes: 5 additions & 0 deletions clang/lib/DPCT/APINamesTemplateType.inc
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,11 @@ TYPE_REWRITE_ENTRY(
"thrust::modulus",
TYPE_FACTORY(STR("std::modulus"), TEMPLATE_ARG(0)))

//thrust::reverse_iterator
TYPE_REWRITE_ENTRY(
"thrust::reverse_iterator",
TYPE_FACTORY(STR("oneapi::dpl::reverse_iterator"), TEMPLATE_ARG(0)))

TYPE_REWRITE_ENTRY(
"cub::DoubleBuffer",
TYPE_CONDITIONAL_FACTORY(
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/DPCT/APINames_ASM.inc
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ ENTRY("alloca", "alloca", false, NO_FLAG, P1, "Comment")
ENTRY("and", "and", true, NO_FLAG, P1, "Successful")
ENTRY("applypriority", "applypriority", false, NO_FLAG, P1, "Comment")
ENTRY("atom", "atom", false, NO_FLAG, P1, "Comment")
ENTRY("bar", "bar", false, NO_FLAG, P1, "Comment")
ENTRY("bar", "bar", true, NO_FLAG, P1, "Partial")
ENTRY("barrier", "barrier", false, NO_FLAG, P1, "Comment")
ENTRY("bfe", "bfe", true, NO_FLAG, P1, "Successful")
ENTRY("bfi", "bfi", true, NO_FLAG, P1, "Successful")
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/DPCT/APINames_CUB.inc
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,8 @@ ENTRY_MEMBER_FUNCTION(cub::DeviceHistogram, cub::DeviceHistogram, HistogramEven,
ENTRY_MEMBER_FUNCTION(cub::DeviceHistogram, cub::DeviceHistogram, MultiHistogramEven, MultiHistogramEven, true, NO_FLAG, P4, "Successful: DPCT1026/DPCT1027")
ENTRY_MEMBER_FUNCTION(cub::DeviceHistogram, cub::DeviceHistogram, HistogramRange, HistogramRange, true, NO_FLAG, P4, "Successful: DPCT1026/DPCT1027")
ENTRY_MEMBER_FUNCTION(cub::DeviceHistogram, cub::DeviceHistogram, MultiHistogramRange, MultiHistogramRange, true, NO_FLAG, P4, "Successful: DPCT1026/DPCT1027")
ENTRY_MEMBER_FUNCTION(cub::DevicePartition, cub::DevicePartition, Flagged, Flagged, false, NO_FLAG, P4, "Comment")
ENTRY_MEMBER_FUNCTION(cub::DevicePartition, cub::DevicePartition, If, If, false, NO_FLAG, P4, "Comment")
ENTRY_MEMBER_FUNCTION(cub::DevicePartition, cub::DevicePartition, Flagged, Flagged, true, NO_FLAG, P4, "Successful: DPCT1026/DPCT1027")
ENTRY_MEMBER_FUNCTION(cub::DevicePartition, cub::DevicePartition, If, If, true, NO_FLAG, P4, "Successful: DPCT1026/DPCT1027")
ENTRY_MEMBER_FUNCTION(cub::DeviceMergeSort, cub::DeviceMergeSort, SortPairs, SortPairs, true, NO_FLAG, P4, "Successful: DPCT1026/DPCT1027")
ENTRY_MEMBER_FUNCTION(cub::DeviceMergeSort, cub::DeviceMergeSort, SortPairsCopy, SortPairsCopy, false, NO_FLAG, P4, "Comment")
ENTRY_MEMBER_FUNCTION(cub::DeviceMergeSort, cub::DeviceMergeSort, SortKeys, SortKeys, true, NO_FLAG, P4, "Successful: DPCT1026/DPCT1027")
Expand Down
37 changes: 37 additions & 0 deletions clang/lib/DPCT/ASTTraversal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8102,6 +8102,43 @@ void EventAPICallRule::handleOrdinaryCalls(const CallExpr *Call) {

REGISTER_RULE(EventAPICallRule, PassKind::PK_Migration)

void ProfilingEnableOnDemandRule::registerMatcher(MatchFinder &MF) {
MF.addMatcher(callExpr(allOf(callee(functionDecl(hasAnyName(
"cudaEventElapsedTime", "cudaEventRecord"))),
parentStmt()))
.bind("cudaEventElapsedTimeCall"),
this);
MF.addMatcher(
callExpr(allOf(callee(functionDecl(hasName("cudaEventElapsedTime"))),
unless(parentStmt())))
.bind("cudaEventElapsedTimeUsed"),
this);
}

// When cudaEventElapsedTimeCall() is called in the source code, event profiling
// opton "--enable-profiling" is enabled to measure the execution time of a
// specific kernel or command in SYCL device.
void ProfilingEnableOnDemandRule::runRule(
const MatchFinder::MatchResult &Result) {

if (DpctGlobalInfo::getEnablepProfilingFlag())
return;

const CallExpr *CE =
getNodeAsType<CallExpr>(Result, "cudaEventElapsedTimeCall");
if (!CE) {
if (!(CE = getNodeAsType<CallExpr>(Result, "cudaEventElapsedTimeUsed")))
return;
}

if (!CE->getDirectCallee())
return;

DpctGlobalInfo::setEnablepProfilingFlag(true);
}

REGISTER_RULE(ProfilingEnableOnDemandRule, PassKind::PK_Analysis)

void StreamAPICallRule::registerMatcher(MatchFinder &MF) {
auto streamFunctionName = [&]() {
return hasAnyName("cudaStreamCreate", "cudaStreamCreateWithFlags",
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/DPCT/ASTTraversal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1367,6 +1367,13 @@ class MemVarRefMigrationRule : public NamedMigrationRule<MemVarRefMigrationRule>
void runRule(const ast_matchers::MatchFinder::MatchResult &Result);
};

class ProfilingEnableOnDemandRule
: public NamedMigrationRule<ProfilingEnableOnDemandRule> {
public:
void registerMatcher(ast_matchers::MatchFinder &MF) override;
void runRule(const ast_matchers::MatchFinder::MatchResult &Result);
};

/// Migration rule for memory management routine.
/// Current implementation is intentionally simplistic. The following things
/// need a more detailed design:
Expand Down
39 changes: 29 additions & 10 deletions clang/lib/DPCT/AnalysisInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -870,8 +870,11 @@ void DpctFileInfo::insertHeader(HeaderType Type, unsigned Offset,
return insertHeader(OS.str(), FirstIncludeOffset,
InsertPosition::IP_AlwaysLeft);
case HT_SYCL:
if (DpctGlobalInfo::getEnablepProfilingFlag())
OS << "#define DPCT_PROFILING_ENABLED" << getNL();
// Add the label for profiling macro "DPCT_PROFILING_ENABLED", which will be
// replaced by "#define DPCT_PROFILING_ENABLED" or not in the post
// replacement.
OS << "{{NEEDREPLACEP0}}";

if (DpctGlobalInfo::getUsmLevel() == UsmLevel::UL_None)
OS << "#define DPCT_USM_LEVEL_NONE" << getNL();
concatHeader(OS, getHeaderSpelling(Type));
Expand Down Expand Up @@ -900,10 +903,10 @@ void DpctFileInfo::insertHeader(HeaderType Type, unsigned Offset,
<< DpctGlobalInfo::getGlobalDeviceName() << ", "
<< MapNames::getClNamespace() << "property_list{"
<< MapNames::getClNamespace() << "property::queue::in_order()";
if (DpctGlobalInfo::getEnablepProfilingFlag()) {
OS << ", " << MapNames::getClNamespace()
<< "property::queue::enable_profiling()";
}

// replaced to insert "property::queue::enable_profiling()" or not
// in the post replacement.
OS << "{{NEEDREPLACEI0}}";
OS << "});" << getNL();
Flag = false;
} else {
Expand Down Expand Up @@ -1399,6 +1402,7 @@ std::string DpctGlobalInfo::getStringForRegexReplacement(StringRef MatchedStr) {
// F: free queries function migration, such as this_nd_item, this_group,
// this_sub_group.
// E: extension, used for c source file migration
// P: profiling enable or disable for time measurement.
switch (Method) {
case 'R':
if (DpctGlobalInfo::getAssumedNDRangeDim() == 1) {
Expand Down Expand Up @@ -1442,6 +1446,21 @@ std::string DpctGlobalInfo::getStringForRegexReplacement(StringRef MatchedStr) {
? ("c" + DpctGlobalInfo::getSYCLSourceExtension())
: "c";
}
case 'P': {
std::string ReplStr;
if (DpctGlobalInfo::getEnablepProfilingFlag())
ReplStr = std::string("#define DPCT_PROFILING_ENABLED") + getNL();

return ReplStr;
}
case 'I': {
std::string ReplStr;
if (DpctGlobalInfo::getEnablepProfilingFlag())
ReplStr = ", " + MapNames::getClNamespace() +
"property::queue::enable_profiling()";

return ReplStr;
}
case FreeQueriesInfo::FreeQueriesRegexCh:
return FreeQueriesInfo::getReplaceString(Index);
default:
Expand Down Expand Up @@ -1613,10 +1632,10 @@ void DpctGlobalInfo::buildReplacements() {
// Now the UsmLevel must not be UL_None here.
QDecl << "q_ct1(dev_ct1, " << MapNames::getClNamespace() << "property_list{"
<< MapNames::getClNamespace() << "property::queue::in_order()";
if (DpctGlobalInfo::getEnablepProfilingFlag()) {
QDecl << ", " << MapNames::getClNamespace()
<< "property::queue::enable_profiling()";
}

// replaced to insert of "property::queue::enable_profiling()" or not in
// the post replacement.
QDecl << "{{NEEDREPLACEI0}}";
QDecl << "});";
}

Expand Down
3 changes: 3 additions & 0 deletions clang/lib/DPCT/AnalysisInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1223,6 +1223,9 @@ class DpctGlobalInfo {
static bool useExtBindlessImages() {
return getUsingExperimental<ExperimentalFeatures::Exp_BindlessImages>();
}
static bool useExpNonUniformGroups() {
return getUsingExperimental<ExperimentalFeatures::Exp_NonUniformGroups>();
}
static bool useNoQueueDevice() {
return getHelperFuncPreference(HelperFuncPreference::NoQueueDevice);
}
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/DPCT/Asm/AsmIdentifierTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,7 @@ void InlineAsmIdentifierTable::AddKeywords() {
get(Y, asmtok::kw_##X).setFlag(InlineAsmIdentifierInfo::InstAttr);
#define BIN_OP(X, Y) \
get(Y, asmtok::kw_##X).setFlag(InlineAsmIdentifierInfo::InstAttr);
#define SYNC_OP(X, Y) \
get(Y, asmtok::kw_##X).setFlag(InlineAsmIdentifierInfo::InstAttr);
#include "AsmTokenKinds.def"
}
32 changes: 16 additions & 16 deletions clang/lib/DPCT/Asm/AsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,9 @@ static inline InstAttr ConvertToInstAttr(asmtok::TokenKind Kind) {
#define BIN_OP(X, Y) \
case asmtok::kw_##X: \
return InstAttr::X;
#define SYNC_OP(X, Y) \
case asmtok::kw_##X: \
return InstAttr::X;
#include "Asm/AsmTokenKinds.def"
default:
llvm_unreachable("Kind is not an instruction attribute");
Expand All @@ -286,7 +289,6 @@ InlineAsmStmtResult InlineAsmParser::ParseInstruction() {
InlineAsmIdentifierInfo *Opcode = Tok.getIdentifier();
ConsumeToken();

unsigned OpIndex = 0;
SmallVector<InstAttr, 4> Attrs;
SmallVector<InlineAsmType *, 4> Types;
SmallVector<InlineAsmExpr *, 4> Ops;
Expand All @@ -305,32 +307,30 @@ InlineAsmStmtResult InlineAsmParser::ParseInstruction() {
ConsumeToken(); // consume instruction attribute
}

auto ParseOperand = [&]() {
InlineAsmExprResult E = ParseExpression();
if (E.isInvalid() || OpIndex >= Types.size())
return AsmExprError();
Ops.push_back(E.get());
return InlineAsmExprResult();
};

if (ParseOperand().isInvalid())
InlineAsmExprResult Pred, Out;
if ((Out = ParseExpression()).isInvalid())
return AsmStmtError();

bool HasPredOutput = TryConsumeToken(asmtok::pipe);
InlineAsmExprResult PredOutput;
if (HasPredOutput && ParseOperand().isInvalid())
if (TryConsumeToken(asmtok::pipe) && (Pred = ParseExpression()).isInvalid())
return AsmStmtError();

while (TryConsumeToken(asmtok::comma)) {
if (ParseOperand().isInvalid())
InlineAsmExprResult E = ParseExpression();
if (E.isInvalid())
return AsmStmtError();
Ops.push_back(E.get());
}

if (!TryConsumeToken(asmtok::semi))
return AsmStmtError();

// bar.warp.sync only has one input operand.
if (Opcode->getTokenID() == asmtok::op_bar) {
Ops.push_back(Out.get());
Out = nullptr;
}

return ::new (Context)
InlineAsmInstruction(Opcode, Attrs, Types, Ops, HasPredOutput);
InlineAsmInstruction(Opcode, Attrs, Types, Out.get(), Pred.get(), Ops);
}

InlineAsmExprResult InlineAsmParser::ParseExpression() {
Expand Down
58 changes: 21 additions & 37 deletions clang/lib/DPCT/Asm/AsmParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ enum class InstAttr {
#define MUL_MOD(X, Y) X,
#define CMP_OP(X, Y) X,
#define BIN_OP(X, Y) X,
#define SYNC_OP(X, Y) X,
#include "Asm/AsmTokenKinds.def"
};

Expand Down Expand Up @@ -311,22 +312,25 @@ class InlineAsmInstruction : public InlineAsmStmt {
/// This represents types in instruction, e.g. mov.u32.
SmallVector<InlineAsmType *, 4> Types;

/// The operands of instruction. Operands[0] is output operand,
/// If HasPredOutput is true, Operands[1] is pred output operand,
/// therest are input operands.
SmallVector<InlineAsmExpr *, 4> Operands;
// The output operand of instruction.
InlineAsmExpr *OutputOp = nullptr;

// Predicate output, e.g. given shfl.sync.up.b32 Ry|p, Rx, 0x1, 0x0,
// The predicate output operand of instruction.
// e.g. given shfl.sync.up.b32 Ry|p, Rx, 0x1, 0x0,
// 0xffffffff; p is a predicate output.
bool HasPredOutput = false;
InlineAsmExpr *PredOutputOp = nullptr;

/// The input operands of instruction. Operands[0] is output operand,
/// If HasPredOutput is true, Operands[1] is pred output operand,
/// therest are input operands.
SmallVector<InlineAsmExpr *, 4> InputOps;

public:
InlineAsmInstruction(InlineAsmIdentifierInfo *Op, ArrayRef<InstAttr> Attrs,
ArrayRef<InlineAsmType *> Types,
ArrayRef<InlineAsmExpr *> Ops, bool HasPred = false)
ArrayRef<InlineAsmType *> Types, InlineAsmExpr *Out,
InlineAsmExpr *Pred, ArrayRef<InlineAsmExpr *> InOps)
: InlineAsmStmt(InstructionClass), Opcode(Op), Types(Types),
Operands(Ops), HasPredOutput(HasPred) {
assert(Operands.size() >= 1U + HasPredOutput);
OutputOp(Out), PredOutputOp(Pred), InputOps(InOps) {
Attributes.insert(Attrs.begin(), Attrs.end());
}

Expand All @@ -345,46 +349,26 @@ class InlineAsmInstruction : public InlineAsmStmt {
return is(OpKind) || (is(OpKinds) || ...);
}

template <typename... Ts>
bool hasAttr(Ts... Attrs) const { return (Attributes.contains(Attrs) || ...); }
template <typename... Ts> bool hasAttr(Ts... Attrs) const {
return (Attributes.contains(Attrs) || ...);
}
const InlineAsmIdentifierInfo *getOpcodeID() const { return Opcode; }
asmtok::TokenKind getOpcode() const { return Opcode->getTokenID(); }
ArrayRef<InlineAsmType *> getTypes() const { return Types; }
const InlineAsmType *getType(unsigned I) const { return Types[I]; }
unsigned getNumTypes() const { return Types.size(); }

const InlineAsmExpr *getOutputOperand() const {
assert(!Operands.empty());
return Operands.front();
}

const InlineAsmExpr *getPredOutputOperand() const {
assert(HasPredOutput && Operands.size() >= 2U);
return Operands[1];
}

ArrayRef<InlineAsmExpr *> getInputOperands() const {
assert(Operands.size() > 1U + HasPredOutput);
return ArrayRef<InlineAsmExpr *>(Operands.begin() + 1 + HasPredOutput,
Operands.end());
}

const InlineAsmExpr *getOutputOperand() const { return OutputOp; }
const InlineAsmExpr *getPredOutputOperand() const { return PredOutputOp; }
ArrayRef<InlineAsmExpr *> getInputOperands() const { return InputOps; }
const InlineAsmExpr *getInputOperand(unsigned I) const {
return getInputOperands()[I];
}

size_t getNumInputOperands() const {
return Operands.size() - 1 - HasPredOutput;
}

size_t getNumInputOperands() const { return InputOps.size(); }
attr_range attrs() const {
return attr_range(Attributes.begin(), Attributes.end());
}

type_range types() const { return type_range(Types.begin(), Types.end()); }

op_range input_operands() const { return op_range(getInputOperands()); }

static bool classof(const InlineAsmStmt *S) {
return InstructionClass <= S->getStmtClass();
}
Expand Down
Loading

0 comments on commit fd9a511

Please sign in to comment.