forked from NVIDIA/FasterTransformer
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1,095 changed files
with
189,244 additions
and
6,550 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
docker | ||
.dockerignore | ||
.gitlab | ||
.gitlab-ci.yml | ||
|
||
*build* | ||
./models | ||
__pycache__ | ||
.vscode | ||
translation | ||
.cache | ||
*.npy | ||
*.pth | ||
*.o | ||
**/.ipynb_checkpoints |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# | ||
# Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
cmake_minimum_required(VERSION 3.8) | ||
|
||
set(cuda_driver_wrapper_files | ||
cudaDriverWrapper.cpp | ||
) | ||
|
||
add_library(cuda_driver_wrapper STATIC ${cuda_driver_wrapper_files}) | ||
target_link_libraries(cuda_driver_wrapper PRIVATE -lcublas -lcudart) | ||
set_property(TARGET cuda_driver_wrapper PROPERTY POSITION_INDEPENDENT_CODE ON) | ||
set_property(TARGET cuda_driver_wrapper PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
/* | ||
* Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#define CUDA_LIB_NAME "cuda" | ||
|
||
#if defined(_WIN32) | ||
#if !defined(WIN32_LEAN_AND_MEAN) | ||
#define WIN32_LEAN_AND_MEAN | ||
#endif // defined(WIN32_LEAN_AND_MEAN) | ||
#include <windows.h> | ||
#define dllOpen(name) (void*) LoadLibraryA("nv" name ".dll") | ||
#define dllClose(handle) FreeLibrary(static_cast<HMODULE>(handle)) | ||
#define dllGetSym(handle, name) GetProcAddress(static_cast<HMODULE>(handle), name) | ||
#else | ||
#include <dlfcn.h> | ||
#define dllOpen(name) dlopen("lib" name ".so", RTLD_LAZY) | ||
#define dllClose(handle) dlclose(handle) | ||
#define dllGetSym(handle, name) dlsym(handle, name) | ||
#endif | ||
|
||
#include "cudaDriverWrapper.h" | ||
// #include "plugin.h" | ||
#include <cuda.h> | ||
#include <stdio.h> | ||
|
||
// using namespace nvinfer1; | ||
|
||
CUDADriverWrapper::CUDADriverWrapper() | ||
{ | ||
handle = dllOpen(CUDA_LIB_NAME); | ||
// ASSERT(handle != nullptr); // TODO check | ||
|
||
auto load_sym = [](void* handle, const char* name) { | ||
void* ret = dllGetSym(handle, name); | ||
// ASSERT(ret != nullptr); // TODO check | ||
return ret; | ||
}; | ||
|
||
*(void**) (&_cuGetErrorName) = load_sym(handle, "cuGetErrorName"); | ||
*(void**) (&_cuFuncSetAttribute) = load_sym(handle, "cuFuncSetAttribute"); | ||
*(void**) (&_cuLinkComplete) = load_sym(handle, "cuLinkComplete"); | ||
*(void**) (&_cuModuleUnload) = load_sym(handle, "cuModuleUnload"); | ||
*(void**) (&_cuLinkDestroy) = load_sym(handle, "cuLinkDestroy"); | ||
*(void**) (&_cuModuleLoadData) = load_sym(handle, "cuModuleLoadData"); | ||
*(void**) (&_cuLinkCreate) = load_sym(handle, "cuLinkCreate_v2"); | ||
*(void**) (&_cuModuleGetFunction) = load_sym(handle, "cuModuleGetFunction"); | ||
*(void**) (&_cuLinkAddFile) = load_sym(handle, "cuLinkAddFile_v2"); | ||
*(void**) (&_cuLinkAddData) = load_sym(handle, "cuLinkAddData_v2"); | ||
*(void**) (&_cuLaunchCooperativeKernel) = load_sym(handle, "cuLaunchCooperativeKernel"); | ||
*(void**) (&_cuLaunchKernel) = load_sym(handle, "cuLaunchKernel"); | ||
} | ||
|
||
CUDADriverWrapper::~CUDADriverWrapper() | ||
{ | ||
dllClose(handle); | ||
} | ||
|
||
CUresult CUDADriverWrapper::cuGetErrorName(CUresult error, const char** pStr) const | ||
{ | ||
return (*_cuGetErrorName)(error, pStr); | ||
} | ||
|
||
CUresult CUDADriverWrapper::cuFuncSetAttribute(CUfunction hfunc, CUfunction_attribute attrib, int value) const | ||
{ | ||
return (*_cuFuncSetAttribute)(hfunc, attrib, value); | ||
} | ||
|
||
CUresult CUDADriverWrapper::cuLinkComplete(CUlinkState state, void** cubinOut, size_t* sizeOut) const | ||
{ | ||
return (*_cuLinkComplete)(state, cubinOut, sizeOut); | ||
} | ||
|
||
CUresult CUDADriverWrapper::cuModuleUnload(CUmodule hmod) const | ||
{ | ||
return (*_cuModuleUnload)(hmod); | ||
} | ||
|
||
CUresult CUDADriverWrapper::cuLinkDestroy(CUlinkState state) const | ||
{ | ||
return (*_cuLinkDestroy)(state); | ||
} | ||
|
||
CUresult CUDADriverWrapper::cuModuleLoadData(CUmodule* module, const void* image) const | ||
{ | ||
return (*_cuModuleLoadData)(module, image); | ||
} | ||
|
||
CUresult CUDADriverWrapper::cuLinkCreate( | ||
unsigned int numOptions, CUjit_option* options, void** optionValues, CUlinkState* stateOut) const | ||
{ | ||
return (*_cuLinkCreate)(numOptions, options, optionValues, stateOut); | ||
} | ||
|
||
CUresult CUDADriverWrapper::cuModuleGetFunction(CUfunction* hfunc, CUmodule hmod, const char* name) const | ||
{ | ||
return (*_cuModuleGetFunction)(hfunc, hmod, name); | ||
} | ||
|
||
CUresult CUDADriverWrapper::cuLinkAddFile(CUlinkState state, CUjitInputType type, const char* path, | ||
unsigned int numOptions, CUjit_option* options, void** optionValues) const | ||
{ | ||
return (*_cuLinkAddFile)(state, type, path, numOptions, options, optionValues); | ||
} | ||
|
||
CUresult CUDADriverWrapper::cuLinkAddData(CUlinkState state, CUjitInputType type, void* data, size_t size, | ||
const char* name, unsigned int numOptions, CUjit_option* options, void** optionValues) const | ||
{ | ||
return (*_cuLinkAddData)(state, type, data, size, name, numOptions, options, optionValues); | ||
} | ||
|
||
CUresult CUDADriverWrapper::cuLaunchCooperativeKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, | ||
unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, | ||
unsigned int sharedMemBytes, CUstream hStream, void** kernelParams) const | ||
{ | ||
return (*_cuLaunchCooperativeKernel)( | ||
f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams); | ||
} | ||
|
||
CUresult CUDADriverWrapper::cuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, | ||
unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, | ||
unsigned int sharedMemBytes, CUstream hStream, void** kernelParams, void** extra) const | ||
{ | ||
return (*_cuLaunchKernel)( | ||
f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
/* | ||
* Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#ifndef CUDA_DRIVER_WRAPPER_H | ||
#define CUDA_DRIVER_WRAPPER_H | ||
|
||
#include <cstdio> | ||
#include <cuda.h> | ||
#pragma once | ||
|
||
#define cuErrCheck(stat, wrap) \ | ||
{ \ | ||
cuErrCheck_((stat), wrap, __FILE__, __LINE__); \ | ||
} | ||
|
||
// namespace nvinfer1 | ||
// { | ||
class CUDADriverWrapper | ||
{ | ||
public: | ||
CUDADriverWrapper(); | ||
|
||
~CUDADriverWrapper(); | ||
|
||
CUresult cuGetErrorName(CUresult error, const char** pStr) const; | ||
|
||
CUresult cuFuncSetAttribute(CUfunction hfunc, CUfunction_attribute attrib, int value) const; | ||
|
||
CUresult cuLinkComplete(CUlinkState state, void** cubinOut, size_t* sizeOut) const; | ||
|
||
CUresult cuModuleUnload(CUmodule hmod) const; | ||
|
||
CUresult cuLinkDestroy(CUlinkState state) const; | ||
|
||
CUresult cuModuleLoadData(CUmodule* module, const void* image) const; | ||
|
||
CUresult cuLinkCreate( | ||
unsigned int numOptions, CUjit_option* options, void** optionValues, CUlinkState* stateOut) const; | ||
|
||
CUresult cuModuleGetFunction(CUfunction* hfunc, CUmodule hmod, const char* name) const; | ||
|
||
CUresult cuLinkAddFile(CUlinkState state, CUjitInputType type, const char* path, unsigned int numOptions, | ||
CUjit_option* options, void** optionValues) const; | ||
|
||
CUresult cuLinkAddData(CUlinkState state, CUjitInputType type, void* data, size_t size, const char* name, | ||
unsigned int numOptions, CUjit_option* options, void** optionValues) const; | ||
|
||
CUresult cuLaunchCooperativeKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, | ||
unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, | ||
unsigned int sharedMemBytes, CUstream hStream, void** kernelParams) const; | ||
|
||
CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, | ||
unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, | ||
CUstream hStream, void** kernelParams, void** extra) const; | ||
|
||
private: | ||
void* handle; | ||
CUresult (*_cuGetErrorName)(CUresult, const char**); | ||
CUresult (*_cuFuncSetAttribute)(CUfunction, CUfunction_attribute, int); | ||
CUresult (*_cuLinkComplete)(CUlinkState, void**, size_t*); | ||
CUresult (*_cuModuleUnload)(CUmodule); | ||
CUresult (*_cuLinkDestroy)(CUlinkState); | ||
CUresult (*_cuLinkCreate)(unsigned int, CUjit_option*, void**, CUlinkState*); | ||
CUresult (*_cuModuleLoadData)(CUmodule*, const void*); | ||
CUresult (*_cuModuleGetFunction)(CUfunction*, CUmodule, const char*); | ||
CUresult (*_cuLinkAddFile)(CUlinkState, CUjitInputType, const char*, unsigned int, CUjit_option*, void**); | ||
CUresult (*_cuLinkAddData)( | ||
CUlinkState, CUjitInputType, void*, size_t, const char*, unsigned int, CUjit_option*, void**); | ||
CUresult (*_cuLaunchCooperativeKernel)(CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, | ||
unsigned int, unsigned int, unsigned int, CUstream, void**); | ||
CUresult (*_cuLaunchKernel)(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, | ||
unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, | ||
CUstream hStream, void** kernelParams, void** extra); | ||
}; | ||
|
||
inline void cuErrCheck_(CUresult stat, const CUDADriverWrapper& wrap, const char* file, int line) | ||
{ | ||
if (stat != CUDA_SUCCESS) | ||
{ | ||
const char* msg = nullptr; | ||
wrap.cuGetErrorName(stat, &msg); | ||
fprintf(stderr, "CUDA Error: %s %s %d\n", msg, file, line); | ||
} | ||
} | ||
|
||
// } // namespace nvinfer1 | ||
|
||
#endif // CUDA_DRIVER_WRAPPER_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
cmake_minimum_required(VERSION 3.8) | ||
|
||
set(fp8_gemm_1x1_files | ||
fp8_qgmma_1x1_utils.cu | ||
) | ||
|
||
file(GLOB fp8_gemm_1x1_files ${fp8_gemm_1x1_files} cubins/*.cubin.cpp) | ||
|
||
add_library(fp8_qgmma_1x1_utils STATIC ${fp8_gemm_1x1_files}) | ||
target_link_libraries(fp8_qgmma_1x1_utils PUBLIC cuda_driver_wrapper) | ||
set_property(TARGET fp8_qgmma_1x1_utils PROPERTY POSITION_INDEPENDENT_CODE ON) | ||
set_property(TARGET fp8_qgmma_1x1_utils PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#include "fp8_gemm_1x1.h" | ||
|
||
struct Compute { | ||
struct Host { | ||
ComputeParams _params; | ||
__host__ Host() {} | ||
|
||
__host__ ComputeParams params() { return _params; } | ||
|
||
__host__ void configure(uint8_t* D, int N, int P, int Q, int C, int K, float ab_scale, float d_scale) | ||
{ | ||
_params.D = D; | ||
_params.N = N; | ||
_params.NPQ = N*P*Q; | ||
_params.PQ = P*Q; | ||
_params.P = P; | ||
_params.Q = Q; | ||
_params.C = C; | ||
_params.K = K; | ||
_params.ab_scale = ab_scale; | ||
_params.d_scale = d_scale; | ||
} | ||
}; | ||
}; |
Oops, something went wrong.