Skip to content

Commit

Permalink
[SYCL] [Driver] Add offload-arch support for SYCL offloading (#15624)
Browse files Browse the repository at this point in the history
Implement `--offload-arch` option to enable SYCL offloading to `Intel
CPUs`, `Intel GPUs`, `NVidia` and `AMD GPUs`.

`--offload-arch` implementation is currently supported in the new driver
model ( i.e. with `--offload-new-driver` option)

**Example usage:**
```
clang++   --offload-new-driver  -fsycl  --offload-arch=bdw         // Offload SYCL code to Intel GPU
clang++   --offload-new-driver  -fsycl  --offload-arch=broadwell   // Offload SYCL code to Intel CPU
clang++   --offload-new-driver  -fsycl  --offload-arch=sm_80       // Offload SYCL code to NVidia GPU
clang++   --offload-new-driver  -fsycl  --offload-arch=gfx700      // Offload SYCL code to AMD GPU
```
  • Loading branch information
srividya-sundaram authored Oct 24, 2024
1 parent f03fc04 commit 326e54e
Show file tree
Hide file tree
Showing 8 changed files with 724 additions and 1 deletion.
4 changes: 4 additions & 0 deletions clang/include/clang/Basic/DiagnosticDriverKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,10 @@ def err_drv_sycl_missing_amdgpu_arch : Error<
"missing AMDGPU architecture for SYCL offloading; specify it with '-Xsycl-target-backend%select{|=%1}0 --offload-arch=<arch-name>'">;
def err_drv_sycl_thinlto_split_off: Error<
"'%0' is not supported when '%1' is set with '-fsycl'">;
def err_drv_sycl_offload_arch_new_driver: Error<
"'--offload-arch' is supported when '-fsycl' is set with '--offload-new-driver'">;
def err_drv_sycl_offload_arch_missing_value : Error<
"must pass in an explicit cpu or gpu architecture to '--offload-arch'">;
def warn_drv_sycl_offload_target_duplicate : Warning<
"SYCL offloading target '%0' is similar to target '%1' already specified; "
"will be ignored">, InGroup<SyclTarget>;
Expand Down
106 changes: 105 additions & 1 deletion clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1191,12 +1191,13 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
llvm::StringMap<llvm::DenseSet<StringRef>> DerivedArchs;
llvm::StringMap<StringRef> FoundNormalizedTriples;
llvm::SmallVector<llvm::Triple, 4> UniqueSYCLTriplesVec;
// StringSet to contain SYCL target triples.
llvm::StringSet<> SYCLTriples;
if (HasSYCLTargetsOption) {
// At this point, we know we have a valid combination
// of -fsycl*target options passed
Arg *SYCLTargetsValues = SYCLTargets;
if (SYCLTargetsValues) {
llvm::StringSet<> SYCLTriples;
if (SYCLTargetsValues->getNumValues()) {

// Multiple targets are currently not supported when using
Expand Down Expand Up @@ -1296,6 +1297,109 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
Diag(clang::diag::warn_drv_empty_joined_argument)
<< SYCLTargetsValues->getAsString(C.getInputArgs());
}
}
// If the user specified --offload-arch, deduce the offloading
// target triple(s) from the set of architecture(s).
// Create a toolchain for each valid triple.
// We do not support SYCL offloading if any of the inputs is a
// .cu (for CUDA type) or .hip (for HIP type) file.
else if (HasValidSYCLRuntime &&
C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && !IsHIP &&
!IsCuda) {
// SYCL offloading to AOT Targets with '--offload-arch'
// is currently enabled only with '--offload-new-driver' option.
// Emit a diagnostic if '--offload-arch' is invoked without
// '--offload-new driver' option.
if (!C.getInputArgs().hasFlag(options::OPT_offload_new_driver,
options::OPT_no_offload_new_driver, false)) {
Diag(clang::diag::err_drv_sycl_offload_arch_new_driver);
return;
}
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
auto AMDTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs());
auto NVPTXTriple = getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(),
HostTC->getTriple());

// Attempt to deduce the offloading triple from the set of architectures.
// We need to temporarily create these toolchains so that we can access
// tools for inferring architectures.
llvm::DenseSet<StringRef> Archs;
if (NVPTXTriple) {
auto TempTC = std::make_unique<toolchains::CudaToolChain>(
*this, *NVPTXTriple, *HostTC, C.getInputArgs(), Action::OFK_None);
for (StringRef Arch :
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true))
Archs.insert(Arch);
}
if (AMDTriple) {
auto TempTC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>(
*this, *AMDTriple, *HostTC, C.getInputArgs());
for (StringRef Arch :
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true))
Archs.insert(Arch);
}
if (!AMDTriple && !NVPTXTriple) {
for (StringRef Arch :
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, nullptr, true))
Archs.insert(Arch);
}
for (StringRef Arch : Archs) {
if (NVPTXTriple && IsSYCLSupportedNVidiaGPUArch(StringToOffloadArch(
getProcessorFromTargetID(*NVPTXTriple, Arch)))) {
DerivedArchs[NVPTXTriple->getTriple()].insert(Arch);
} else if (AMDTriple &&
IsSYCLSupportedAMDGPUArch(StringToOffloadArch(
getProcessorFromTargetID(*AMDTriple, Arch)))) {
DerivedArchs[AMDTriple->getTriple()].insert(Arch);
} else if (IsSYCLSupportedIntelCPUArch(StringToOffloadArchSYCL(Arch))) {
DerivedArchs[MakeSYCLDeviceTriple("spir64_x86_64").getTriple()].insert(
Arch);
} else if (IsSYCLSupportedIntelGPUArch(StringToOffloadArchSYCL(Arch))) {
StringRef IntelGPUArch;
// For Intel Graphics AOT target, valid values for '--offload-arch'
// are mapped to valid device names accepted by OCLOC (the Intel GPU AOT
// compiler) via the '-device' option. The mapIntelGPUArchName
// function maps the accepted values for '--offload-arch' to enable SYCL
// offloading to Intel GPUs and the corresponding '-device' value passed
// to OCLOC.
IntelGPUArch = mapIntelGPUArchName(Arch).data();
DerivedArchs[MakeSYCLDeviceTriple("spir64_gen").getTriple()].insert(
IntelGPUArch);
} else {
Diag(clang::diag::err_drv_invalid_sycl_target) << Arch;
return;
}
}
// Emit an error if architecture value is not provided
// to --offload-arch.
if (Archs.empty()) {
Diag(clang::diag::err_drv_sycl_offload_arch_missing_value);
return;
}

for (const auto &TripleAndArchs : DerivedArchs)
SYCLTriples.insert(TripleAndArchs.first());

for (const auto &Val : SYCLTriples) {
llvm::Triple SYCLTargetTriple(MakeSYCLDeviceTriple(Val.getKey()));
std::string NormalizedName = SYCLTargetTriple.normalize();

// Make sure we don't have a duplicate triple.
auto Duplicate = FoundNormalizedTriples.find(NormalizedName);
if (Duplicate != FoundNormalizedTriples.end()) {
Diag(clang::diag::warn_drv_sycl_offload_target_duplicate)
<< Val.getKey() << Duplicate->second;
continue;
}

// Store the current triple so that we can check for duplicates in the
// following iterations.
FoundNormalizedTriples[NormalizedName] = Val.getKey();
UniqueSYCLTriplesVec.push_back(SYCLTargetTriple);
}

addSYCLDefaultTriple(C, UniqueSYCLTriplesVec);

} else {
// If -fsycl is supplied without -fsycl-targets we will assume SPIR-V.
// For -fsycl-device-only, we also setup the implied triple as needed.
Expand Down
119 changes: 119 additions & 0 deletions clang/lib/Driver/ToolChains/SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,125 @@ using namespace clang::driver::tools;
using namespace clang;
using namespace llvm::opt;

// Struct that relates an AOT target value with
// Intel CPUs and Intel GPUs.
struct StringToOffloadArchSYCLMap {
const char *ArchName;
SYCLSupportedIntelArchs IntelArch;
};

// Mapping of supported SYCL offloading architectures.
static const StringToOffloadArchSYCLMap StringToArchNamesMap[] = {
// Intel CPU mapping.
{"skylake-avx512", SYCLSupportedIntelArchs::SKYLAKEAVX512},
{"core-avx2", SYCLSupportedIntelArchs::COREAVX2},
{"corei7-avx", SYCLSupportedIntelArchs::COREI7AVX},
{"corei7", SYCLSupportedIntelArchs::COREI7},
{"westmere", SYCLSupportedIntelArchs::WESTMERE},
{"sandybridge", SYCLSupportedIntelArchs::SANDYBRIDGE},
{"ivybridge", SYCLSupportedIntelArchs::IVYBRIDGE},
{"broadwell", SYCLSupportedIntelArchs::BROADWELL},
{"coffeelake", SYCLSupportedIntelArchs::COFFEELAKE},
{"alderlake", SYCLSupportedIntelArchs::ALDERLAKE},
{"skylake", SYCLSupportedIntelArchs::SKYLAKE},
{"skx", SYCLSupportedIntelArchs::SKX},
{"cascadelake", SYCLSupportedIntelArchs::CASCADELAKE},
{"icelake-client", SYCLSupportedIntelArchs::ICELAKECLIENT},
{"icelake-server", SYCLSupportedIntelArchs::ICELAKESERVER},
{"sapphirerapids", SYCLSupportedIntelArchs::SAPPHIRERAPIDS},
{"graniterapids", SYCLSupportedIntelArchs::GRANITERAPIDS},
// Intel GPU mapping.
{"bdw", SYCLSupportedIntelArchs::BDW},
{"skl", SYCLSupportedIntelArchs::SKL},
{"kbl", SYCLSupportedIntelArchs::KBL},
{"cfl", SYCLSupportedIntelArchs::CFL},
{"apl", SYCLSupportedIntelArchs::APL},
{"bxt", SYCLSupportedIntelArchs::BXT},
{"glk", SYCLSupportedIntelArchs::GLK},
{"whl", SYCLSupportedIntelArchs::WHL},
{"aml", SYCLSupportedIntelArchs::AML},
{"cml", SYCLSupportedIntelArchs::CML},
{"icllp", SYCLSupportedIntelArchs::ICLLP},
{"icl", SYCLSupportedIntelArchs::ICL},
{"ehl", SYCLSupportedIntelArchs::EHL},
{"jsl", SYCLSupportedIntelArchs::JSL},
{"tgllp", SYCLSupportedIntelArchs::TGLLP},
{"tgl", SYCLSupportedIntelArchs::TGL},
{"rkl", SYCLSupportedIntelArchs::RKL},
{"adl_s", SYCLSupportedIntelArchs::ADL_S},
{"rpl_s", SYCLSupportedIntelArchs::RPL_S},
{"adl_p", SYCLSupportedIntelArchs::ADL_P},
{"adl_n", SYCLSupportedIntelArchs::ADL_N},
{"dg1", SYCLSupportedIntelArchs::DG1},
{"acm_g10", SYCLSupportedIntelArchs::ACM_G10},
{"dg2_g10", SYCLSupportedIntelArchs::DG2_G10},
{"acm_g11", SYCLSupportedIntelArchs::ACM_G11},
{"dg2_g10", SYCLSupportedIntelArchs::DG2_G10},
{"dg2_g11", SYCLSupportedIntelArchs::DG2_G11},
{"acm_g12", SYCLSupportedIntelArchs::ACM_G12},
{"dg2_g12", SYCLSupportedIntelArchs::DG2_G12},
{"pvc", SYCLSupportedIntelArchs::PVC},
{"pvc_vg", SYCLSupportedIntelArchs::PVC_VG},
{"mtl_u", SYCLSupportedIntelArchs::MTL_U},
{"mtl_s", SYCLSupportedIntelArchs::MTL_S},
{"arl_u", SYCLSupportedIntelArchs::ARL_U},
{"arl_s", SYCLSupportedIntelArchs::ARL_S},
{"mtl_h", SYCLSupportedIntelArchs::MTL_H},
{"arl_h", SYCLSupportedIntelArchs::ARL_H},
{"bmg_g21", SYCLSupportedIntelArchs::BMG_G21},
{"lnl_m", SYCLSupportedIntelArchs::LNL_M}};

// Check if the user provided value for --offload-arch is a valid
// SYCL supported Intel AOT target.
SYCLSupportedIntelArchs
clang::driver::StringToOffloadArchSYCL(llvm::StringRef ArchNameAsString) {
auto result = std::find_if(
std::begin(StringToArchNamesMap), std::end(StringToArchNamesMap),
[ArchNameAsString](const StringToOffloadArchSYCLMap &map) {
return ArchNameAsString == map.ArchName;
});
if (result == std::end(StringToArchNamesMap))
return SYCLSupportedIntelArchs::UNKNOWN;
return result->IntelArch;
}

// This is a mapping between the user provided --offload-arch value for Intel
// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU
// AOT compiler).
StringRef clang::driver::mapIntelGPUArchName(StringRef ArchName) {
StringRef Arch;
Arch = llvm::StringSwitch<StringRef>(ArchName)
.Case("bdw", "bdw")
.Case("skl", "skl")
.Case("kbl", "kbl")
.Case("cfl", "cfl")
.Cases("apl", "bxt", "apl")
.Case("glk", "glk")
.Case("whl", "whl")
.Case("aml", "aml")
.Case("cml", "cml")
.Cases("icllp", "icl", "icllp")
.Cases("ehl", "jsl", "ehl")
.Cases("tgllp", "tgl", "tgllp")
.Case("rkl", "rkl")
.Cases("adl_s", "rpl_s", "adl_s")
.Case("adl_p", "adl_p")
.Case("adl_n", "adl_n")
.Case("dg1", "dg1")
.Cases("acm_g10", "dg2_g10", "acm_g10")
.Cases("acm_g11", "dg2_g11", "acm_g11")
.Cases("acm_g12", "dg2_g12", "acm_g12")
.Case("pvc", "pvc")
.Case("pvc_vg", "pvc_vg")
.Cases("mtl_u", "mtl_s", "arl_u", "arl_s", "mtl_u")
.Case("mtl_h", "mtl_h")
.Case("arl_h", "arl_h")
.Case("bmg_g21", "bmg_g21")
.Case("lnl_m", "lnl_m")
.Default("");
return Arch;
}

SYCLInstallationDetector::SYCLInstallationDetector(const Driver &D)
: D(D), InstallationCandidates() {
InstallationCandidates.emplace_back(D.Dir + "/..");
Expand Down
109 changes: 109 additions & 0 deletions clang/lib/Driver/ToolChains/SYCL.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,122 @@
#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_SYCL_H
#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_SYCL_H

#include "clang/Basic/Cuda.h"
#include "clang/Driver/Options.h"
#include "clang/Driver/Tool.h"
#include "clang/Driver/ToolChain.h"

namespace clang {
namespace driver {

// List of architectures (Intel CPUs and Intel GPUs)
// that support SYCL offloading.
enum class SYCLSupportedIntelArchs {
// Intel CPUs
UNKNOWN,
SKYLAKEAVX512,
COREAVX2,
COREI7AVX,
COREI7,
WESTMERE,
SANDYBRIDGE,
IVYBRIDGE,
BROADWELL,
COFFEELAKE,
ALDERLAKE,
SKYLAKE,
SKX,
CASCADELAKE,
ICELAKECLIENT,
ICELAKESERVER,
SAPPHIRERAPIDS,
GRANITERAPIDS,
// Intel GPUs
BDW,
SKL,
KBL,
CFL,
APL,
BXT,
GLK,
WHL,
AML,
CML,
ICLLP,
ICL,
EHL,
JSL,
TGLLP,
TGL,
RKL,
ADL_S,
RPL_S,
ADL_P,
ADL_N,
DG1,
ACM_G10,
DG2_G10,
ACM_G11,
DG2_G11,
ACM_G12,
DG2_G12,
PVC,
PVC_VG,
MTL_U,
MTL_S,
ARL_U,
ARL_S,
MTL_H,
ARL_H,
BMG_G21,
LNL_M,
};

// Check if the given Arch value is a Generic AMD GPU.
// Currently GFX*_GENERIC AMD GPUs do not support SYCL offloading.
// This list is used to filter out GFX*_GENERIC AMD GPUs in
// `IsSYCLSupportedAMDGPUArch`.
static inline bool IsAMDGenericGPUArch(OffloadArch Arch) {
return Arch == OffloadArch::GFX9_GENERIC ||
Arch == OffloadArch::GFX10_1_GENERIC ||
Arch == OffloadArch::GFX10_3_GENERIC ||
Arch == OffloadArch::GFX11_GENERIC ||
Arch == OffloadArch::GFX12_GENERIC;
}

// Check if the given Arch value is a valid SYCL supported AMD GPU.
static inline bool IsSYCLSupportedAMDGPUArch(OffloadArch Arch) {
return Arch >= OffloadArch::GFX700 && Arch < OffloadArch::AMDGCNSPIRV &&
!IsAMDGenericGPUArch(Arch);
}

// Check if the given Arch value is a valid SYCL supported NVidia GPU.
static inline bool IsSYCLSupportedNVidiaGPUArch(OffloadArch Arch) {
return Arch >= OffloadArch::SM_50 && Arch <= OffloadArch::SM_90a;
}

// Check if the given Arch value is a valid SYCL supported Intel CPU.
static inline bool IsSYCLSupportedIntelCPUArch(SYCLSupportedIntelArchs Arch) {
return Arch >= SYCLSupportedIntelArchs::SKYLAKEAVX512 &&
Arch <= SYCLSupportedIntelArchs::GRANITERAPIDS;
}

// Check if the given Arch value is a valid SYCL supported Intel GPU.
static inline bool IsSYCLSupportedIntelGPUArch(SYCLSupportedIntelArchs Arch) {
return Arch >= SYCLSupportedIntelArchs::BDW &&
Arch <= SYCLSupportedIntelArchs::LNL_M;
}

// Check if the user provided value for --offload-arch is a valid
// SYCL supported Intel AOT target.
SYCLSupportedIntelArchs
StringToOffloadArchSYCL(llvm::StringRef ArchNameAsString);

// This is a mapping between the user provided --offload-arch value for Intel
// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU
// AOT compiler).
StringRef mapIntelGPUArchName(StringRef ArchName);

class SYCLInstallationDetector {
public:
SYCLInstallationDetector(const Driver &D);
Expand Down
Loading

0 comments on commit 326e54e

Please sign in to comment.