From f352c8610f1ea56eae4134547852af665633c6e1 Mon Sep 17 00:00:00 2001 From: huynh-anh Date: Mon, 14 Feb 2022 19:57:16 +0000 Subject: [PATCH 1/4] changed import because it was deprecaated --- adet/layers/csrc/ml_nms/ml_nms.cu | 10 +- collect_env.py | 469 ++++++++++++++++++++++++++++++ 2 files changed, 474 insertions(+), 5 deletions(-) create mode 100644 collect_env.py diff --git a/adet/layers/csrc/ml_nms/ml_nms.cu b/adet/layers/csrc/ml_nms/ml_nms.cu index f1c1a4206..13be92a13 100644 --- a/adet/layers/csrc/ml_nms/ml_nms.cu +++ b/adet/layers/csrc/ml_nms/ml_nms.cu @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include @@ -65,7 +65,7 @@ __global__ void ml_nms_kernel(const int n_boxes, const float nms_overlap_thresh, t |= 1ULL << i; } } - const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock); + const int col_blocks = ceil_div(n_boxes, threadsPerBlock); dev_mask[cur_box_idx * col_blocks + col_start] = t; } } @@ -82,7 +82,7 @@ at::Tensor ml_nms_cuda(const at::Tensor boxes, const float nms_overlap_thresh) { int boxes_num = boxes.size(0); - const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock); + const int col_blocks = ceil_div(boxes_num, threadsPerBlock); scalar_t* boxes_dev = boxes_sorted.data(); @@ -94,8 +94,8 @@ at::Tensor ml_nms_cuda(const at::Tensor boxes, const float nms_overlap_thresh) { mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long)); - dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock), - THCCeilDiv(boxes_num, threadsPerBlock)); + dim3 blocks(ATenCeilDiv(boxes_num, threadsPerBlock), + ATenCeilDiv(boxes_num, threadsPerBlock)); dim3 threads(threadsPerBlock); ml_nms_kernel<<>>(boxes_num, nms_overlap_thresh, diff --git a/collect_env.py b/collect_env.py new file mode 100644 index 000000000..85543c5ce --- /dev/null +++ b/collect_env.py @@ -0,0 +1,469 @@ +from __future__ import print_function + +# Unlike the rest of the PyTorch this file must be python2 compliant. +# This script outputs relevant system environment info +# Run it with `python collect_env.py`. +import datetime +import locale +import re +import subprocess +import sys +import os +from collections import namedtuple + + +try: + import torch + TORCH_AVAILABLE = True +except (ImportError, NameError, AttributeError, OSError): + TORCH_AVAILABLE = False + +# System Environment Information +SystemEnv = namedtuple('SystemEnv', [ + 'torch_version', + 'is_debug_build', + 'cuda_compiled_version', + 'gcc_version', + 'clang_version', + 'cmake_version', + 'os', + 'libc_version', + 'python_version', + 'python_platform', + 'is_cuda_available', + 'cuda_runtime_version', + 'nvidia_driver_version', + 'nvidia_gpu_models', + 'cudnn_version', + 'pip_version', # 'pip' or 'pip3' + 'pip_packages', + 'conda_packages', + 'hip_compiled_version', + 'hip_runtime_version', + 'miopen_runtime_version', + 'caching_allocator_config', +]) + + +def run(command): + """Returns (return-code, stdout, stderr)""" + p = subprocess.Popen(command, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, shell=True) + raw_output, raw_err = p.communicate() + rc = p.returncode + if get_platform() == 'win32': + enc = 'oem' + else: + enc = locale.getpreferredencoding() + output = raw_output.decode(enc) + err = raw_err.decode(enc) + return rc, output.strip(), err.strip() + + +def run_and_read_all(run_lambda, command): + """Runs command using run_lambda; reads and returns entire output if rc is 0""" + rc, out, _ = run_lambda(command) + if rc != 0: + return None + return out + + +def run_and_parse_first_match(run_lambda, command, regex): + """Runs command using run_lambda, returns the first regex match if it exists""" + rc, out, _ = run_lambda(command) + if rc != 0: + return None + match = re.search(regex, out) + if match is None: + return None + return match.group(1) + +def run_and_return_first_line(run_lambda, command): + """Runs command using run_lambda and returns first line if output is not empty""" + rc, out, _ = run_lambda(command) + if rc != 0: + return None + return out.split('\n')[0] + + +def get_conda_packages(run_lambda): + if get_platform() == 'win32': + system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows') + findstr_cmd = os.path.join(system_root, 'System32', 'findstr') + grep_cmd = r'{} /R "torch numpy cudatoolkit soumith mkl magma mypy"'.format(findstr_cmd) + else: + grep_cmd = r'grep "torch\|numpy\|cudatoolkit\|soumith\|mkl\|magma\|mypy"' + conda = os.environ.get('CONDA_EXE', 'conda') + out = run_and_read_all(run_lambda, conda + ' list | ' + grep_cmd) + if out is None: + return out + # Comment starting at beginning of line + comment_regex = re.compile(r'^#.*\n') + return re.sub(comment_regex, '', out) + + +def get_gcc_version(run_lambda): + return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)') + +def get_clang_version(run_lambda): + return run_and_parse_first_match(run_lambda, 'clang --version', r'clang version (.*)') + + +def get_cmake_version(run_lambda): + return run_and_parse_first_match(run_lambda, 'cmake --version', r'cmake (.*)') + + +def get_nvidia_driver_version(run_lambda): + if get_platform() == 'darwin': + cmd = 'kextstat | grep -i cuda' + return run_and_parse_first_match(run_lambda, cmd, + r'com[.]nvidia[.]CUDA [(](.*?)[)]') + smi = get_nvidia_smi() + return run_and_parse_first_match(run_lambda, smi, r'Driver Version: (.*?) ') + + +def get_gpu_info(run_lambda): + if get_platform() == 'darwin' or (TORCH_AVAILABLE and hasattr(torch.version, 'hip') and torch.version.hip is not None): + if TORCH_AVAILABLE and torch.cuda.is_available(): + return torch.cuda.get_device_name(None) + return None + smi = get_nvidia_smi() + uuid_regex = re.compile(r' \(UUID: .+?\)') + rc, out, _ = run_lambda(smi + ' -L') + if rc != 0: + return None + # Anonymize GPUs by removing their UUID + return re.sub(uuid_regex, '', out) + + +def get_running_cuda_version(run_lambda): + return run_and_parse_first_match(run_lambda, 'nvcc --version', r'release .+ V(.*)') + + +def get_cudnn_version(run_lambda): + """This will return a list of libcudnn.so; it's hard to tell which one is being used""" + if get_platform() == 'win32': + system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows') + cuda_path = os.environ.get('CUDA_PATH', "%CUDA_PATH%") + where_cmd = os.path.join(system_root, 'System32', 'where') + cudnn_cmd = '{} /R "{}\\bin" cudnn*.dll'.format(where_cmd, cuda_path) + elif get_platform() == 'darwin': + # CUDA libraries and drivers can be found in /usr/local/cuda/. See + # https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#install + # https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installmac + # Use CUDNN_LIBRARY when cudnn library is installed elsewhere. + cudnn_cmd = 'ls /usr/local/cuda/lib/libcudnn*' + else: + cudnn_cmd = 'ldconfig -p | grep libcudnn | rev | cut -d" " -f1 | rev' + rc, out, _ = run_lambda(cudnn_cmd) + # find will return 1 if there are permission errors or if not found + if len(out) == 0 or (rc != 1 and rc != 0): + l = os.environ.get('CUDNN_LIBRARY') + if l is not None and os.path.isfile(l): + return os.path.realpath(l) + return None + files_set = set() + for fn in out.split('\n'): + fn = os.path.realpath(fn) # eliminate symbolic links + if os.path.isfile(fn): + files_set.add(fn) + if not files_set: + return None + # Alphabetize the result because the order is non-deterministic otherwise + files = list(sorted(files_set)) + if len(files) == 1: + return files[0] + result = '\n'.join(files) + return 'Probably one of the following:\n{}'.format(result) + + +def get_nvidia_smi(): + # Note: nvidia-smi is currently available only on Windows and Linux + smi = 'nvidia-smi' + if get_platform() == 'win32': + system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows') + program_files_root = os.environ.get('PROGRAMFILES', 'C:\\Program Files') + legacy_path = os.path.join(program_files_root, 'NVIDIA Corporation', 'NVSMI', smi) + new_path = os.path.join(system_root, 'System32', smi) + smis = [new_path, legacy_path] + for candidate_smi in smis: + if os.path.exists(candidate_smi): + smi = '"{}"'.format(candidate_smi) + break + return smi + + +def get_platform(): + if sys.platform.startswith('linux'): + return 'linux' + elif sys.platform.startswith('win32'): + return 'win32' + elif sys.platform.startswith('cygwin'): + return 'cygwin' + elif sys.platform.startswith('darwin'): + return 'darwin' + else: + return sys.platform + + +def get_mac_version(run_lambda): + return run_and_parse_first_match(run_lambda, 'sw_vers -productVersion', r'(.*)') + + +def get_windows_version(run_lambda): + system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows') + wmic_cmd = os.path.join(system_root, 'System32', 'Wbem', 'wmic') + findstr_cmd = os.path.join(system_root, 'System32', 'findstr') + return run_and_read_all(run_lambda, '{} os get Caption | {} /v Caption'.format(wmic_cmd, findstr_cmd)) + + +def get_lsb_version(run_lambda): + return run_and_parse_first_match(run_lambda, 'lsb_release -a', r'Description:\t(.*)') + + +def check_release_file(run_lambda): + return run_and_parse_first_match(run_lambda, 'cat /etc/*-release', + r'PRETTY_NAME="(.*)"') + + +def get_os(run_lambda): + from platform import machine + platform = get_platform() + + if platform == 'win32' or platform == 'cygwin': + return get_windows_version(run_lambda) + + if platform == 'darwin': + version = get_mac_version(run_lambda) + if version is None: + return None + return 'macOS {} ({})'.format(version, machine()) + + if platform == 'linux': + # Ubuntu/Debian based + desc = get_lsb_version(run_lambda) + if desc is not None: + return '{} ({})'.format(desc, machine()) + + # Try reading /etc/*-release + desc = check_release_file(run_lambda) + if desc is not None: + return '{} ({})'.format(desc, machine()) + + return '{} ({})'.format(platform, machine()) + + # Unknown platform + return platform + + +def get_python_platform(): + import platform + return platform.platform() + + +def get_libc_version(): + import platform + if get_platform() != 'linux': + return 'N/A' + return '-'.join(platform.libc_ver()) + + +def get_pip_packages(run_lambda): + """Returns `pip list` output. Note: will also find conda-installed pytorch + and numpy packages.""" + # People generally have `pip` as `pip` or `pip3` + # But here it is incoved as `python -mpip` + def run_with_pip(pip): + if get_platform() == 'win32': + system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows') + findstr_cmd = os.path.join(system_root, 'System32', 'findstr') + grep_cmd = r'{} /R "numpy torch mypy"'.format(findstr_cmd) + else: + grep_cmd = r'grep "torch\|numpy\|mypy"' + return run_and_read_all(run_lambda, pip + ' list --format=freeze | ' + grep_cmd) + + pip_version = 'pip3' if sys.version[0] == '3' else 'pip' + out = run_with_pip(sys.executable + ' -mpip') + + return pip_version, out + + +def get_cachingallocator_config(): + ca_config = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', '') + return ca_config + + +def get_env_info(): + run_lambda = run + pip_version, pip_list_output = get_pip_packages(run_lambda) + + if TORCH_AVAILABLE: + version_str = torch.__version__ + debug_mode_str = str(torch.version.debug) + cuda_available_str = str(torch.cuda.is_available()) + cuda_version_str = torch.version.cuda + if not hasattr(torch.version, 'hip') or torch.version.hip is None: # cuda version + hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A' + else: # HIP version + cfg = torch._C._show_config().split('\n') + hip_runtime_version = [s.rsplit(None, 1)[-1] for s in cfg if 'HIP Runtime' in s][0] + miopen_runtime_version = [s.rsplit(None, 1)[-1] for s in cfg if 'MIOpen' in s][0] + cuda_version_str = 'N/A' + hip_compiled_version = torch.version.hip + else: + version_str = debug_mode_str = cuda_available_str = cuda_version_str = 'N/A' + hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A' + + sys_version = sys.version.replace("\n", " ") + + return SystemEnv( + torch_version=version_str, + is_debug_build=debug_mode_str, + python_version='{} ({}-bit runtime)'.format(sys_version, sys.maxsize.bit_length() + 1), + python_platform=get_python_platform(), + is_cuda_available=cuda_available_str, + cuda_compiled_version=cuda_version_str, + cuda_runtime_version=get_running_cuda_version(run_lambda), + nvidia_gpu_models=get_gpu_info(run_lambda), + nvidia_driver_version=get_nvidia_driver_version(run_lambda), + cudnn_version=get_cudnn_version(run_lambda), + hip_compiled_version=hip_compiled_version, + hip_runtime_version=hip_runtime_version, + miopen_runtime_version=miopen_runtime_version, + pip_version=pip_version, + pip_packages=pip_list_output, + conda_packages=get_conda_packages(run_lambda), + os=get_os(run_lambda), + libc_version=get_libc_version(), + gcc_version=get_gcc_version(run_lambda), + clang_version=get_clang_version(run_lambda), + cmake_version=get_cmake_version(run_lambda), + caching_allocator_config=get_cachingallocator_config(), + ) + +env_info_fmt = """ +PyTorch version: {torch_version} +Is debug build: {is_debug_build} +CUDA used to build PyTorch: {cuda_compiled_version} +ROCM used to build PyTorch: {hip_compiled_version} + +OS: {os} +GCC version: {gcc_version} +Clang version: {clang_version} +CMake version: {cmake_version} +Libc version: {libc_version} + +Python version: {python_version} +Python platform: {python_platform} +Is CUDA available: {is_cuda_available} +CUDA runtime version: {cuda_runtime_version} +GPU models and configuration: {nvidia_gpu_models} +Nvidia driver version: {nvidia_driver_version} +cuDNN version: {cudnn_version} +HIP runtime version: {hip_runtime_version} +MIOpen runtime version: {miopen_runtime_version} + +Versions of relevant libraries: +{pip_packages} +{conda_packages} +""".strip() + + +def pretty_str(envinfo): + def replace_nones(dct, replacement='Could not collect'): + for key in dct.keys(): + if dct[key] is not None: + continue + dct[key] = replacement + return dct + + def replace_bools(dct, true='Yes', false='No'): + for key in dct.keys(): + if dct[key] is True: + dct[key] = true + elif dct[key] is False: + dct[key] = false + return dct + + def prepend(text, tag='[prepend]'): + lines = text.split('\n') + updated_lines = [tag + line for line in lines] + return '\n'.join(updated_lines) + + def replace_if_empty(text, replacement='No relevant packages'): + if text is not None and len(text) == 0: + return replacement + return text + + def maybe_start_on_next_line(string): + # If `string` is multiline, prepend a \n to it. + if string is not None and len(string.split('\n')) > 1: + return '\n{}\n'.format(string) + return string + + mutable_dict = envinfo._asdict() + + # If nvidia_gpu_models is multiline, start on the next line + mutable_dict['nvidia_gpu_models'] = \ + maybe_start_on_next_line(envinfo.nvidia_gpu_models) + + # If the machine doesn't have CUDA, report some fields as 'No CUDA' + dynamic_cuda_fields = [ + 'cuda_runtime_version', + 'nvidia_gpu_models', + 'nvidia_driver_version', + ] + all_cuda_fields = dynamic_cuda_fields + ['cudnn_version'] + all_dynamic_cuda_fields_missing = all( + mutable_dict[field] is None for field in dynamic_cuda_fields) + if TORCH_AVAILABLE and not torch.cuda.is_available() and all_dynamic_cuda_fields_missing: + for field in all_cuda_fields: + mutable_dict[field] = 'No CUDA' + if envinfo.cuda_compiled_version is None: + mutable_dict['cuda_compiled_version'] = 'None' + + # Replace True with Yes, False with No + mutable_dict = replace_bools(mutable_dict) + + # Replace all None objects with 'Could not collect' + mutable_dict = replace_nones(mutable_dict) + + # If either of these are '', replace with 'No relevant packages' + mutable_dict['pip_packages'] = replace_if_empty(mutable_dict['pip_packages']) + mutable_dict['conda_packages'] = replace_if_empty(mutable_dict['conda_packages']) + + # Tag conda and pip packages with a prefix + # If they were previously None, they'll show up as ie '[conda] Could not collect' + if mutable_dict['pip_packages']: + mutable_dict['pip_packages'] = prepend(mutable_dict['pip_packages'], + '[{}] '.format(envinfo.pip_version)) + if mutable_dict['conda_packages']: + mutable_dict['conda_packages'] = prepend(mutable_dict['conda_packages'], + '[conda] ') + return env_info_fmt.format(**mutable_dict) + + +def get_pretty_env_info(): + return pretty_str(get_env_info()) + + +def main(): + print("Collecting environment information...") + output = get_pretty_env_info() + print(output) + + if TORCH_AVAILABLE and hasattr(torch, 'utils') and hasattr(torch.utils, '_crash_handler'): + minidump_dir = torch.utils._crash_handler.DEFAULT_MINIDUMP_DIR + if sys.platform == "linux" and os.path.exists(minidump_dir): + dumps = [os.path.join(minidump_dir, dump) for dump in os.listdir(minidump_dir)] + latest = max(dumps, key=os.path.getctime) + ctime = os.path.getctime(latest) + creation_time = datetime.datetime.fromtimestamp(ctime).strftime('%Y-%m-%d %H:%M:%S') + msg = "\n*** Detected a minidump at {} created on {}, ".format(latest, creation_time) + \ + "if this is related to your bug please include it when you file a report ***" + print(msg, file=sys.stderr) + + + +if __name__ == '__main__': + main() From fde08d628afd4c9ff0bdf6fc9bacc3fa7dbf7bf5 Mon Sep 17 00:00:00 2001 From: huynh-anh Date: Mon, 14 Feb 2022 19:58:24 +0000 Subject: [PATCH 2/4] removed wrong file --- collect_env.py | 469 ------------------------------------------------- 1 file changed, 469 deletions(-) delete mode 100644 collect_env.py diff --git a/collect_env.py b/collect_env.py deleted file mode 100644 index 85543c5ce..000000000 --- a/collect_env.py +++ /dev/null @@ -1,469 +0,0 @@ -from __future__ import print_function - -# Unlike the rest of the PyTorch this file must be python2 compliant. -# This script outputs relevant system environment info -# Run it with `python collect_env.py`. -import datetime -import locale -import re -import subprocess -import sys -import os -from collections import namedtuple - - -try: - import torch - TORCH_AVAILABLE = True -except (ImportError, NameError, AttributeError, OSError): - TORCH_AVAILABLE = False - -# System Environment Information -SystemEnv = namedtuple('SystemEnv', [ - 'torch_version', - 'is_debug_build', - 'cuda_compiled_version', - 'gcc_version', - 'clang_version', - 'cmake_version', - 'os', - 'libc_version', - 'python_version', - 'python_platform', - 'is_cuda_available', - 'cuda_runtime_version', - 'nvidia_driver_version', - 'nvidia_gpu_models', - 'cudnn_version', - 'pip_version', # 'pip' or 'pip3' - 'pip_packages', - 'conda_packages', - 'hip_compiled_version', - 'hip_runtime_version', - 'miopen_runtime_version', - 'caching_allocator_config', -]) - - -def run(command): - """Returns (return-code, stdout, stderr)""" - p = subprocess.Popen(command, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, shell=True) - raw_output, raw_err = p.communicate() - rc = p.returncode - if get_platform() == 'win32': - enc = 'oem' - else: - enc = locale.getpreferredencoding() - output = raw_output.decode(enc) - err = raw_err.decode(enc) - return rc, output.strip(), err.strip() - - -def run_and_read_all(run_lambda, command): - """Runs command using run_lambda; reads and returns entire output if rc is 0""" - rc, out, _ = run_lambda(command) - if rc != 0: - return None - return out - - -def run_and_parse_first_match(run_lambda, command, regex): - """Runs command using run_lambda, returns the first regex match if it exists""" - rc, out, _ = run_lambda(command) - if rc != 0: - return None - match = re.search(regex, out) - if match is None: - return None - return match.group(1) - -def run_and_return_first_line(run_lambda, command): - """Runs command using run_lambda and returns first line if output is not empty""" - rc, out, _ = run_lambda(command) - if rc != 0: - return None - return out.split('\n')[0] - - -def get_conda_packages(run_lambda): - if get_platform() == 'win32': - system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows') - findstr_cmd = os.path.join(system_root, 'System32', 'findstr') - grep_cmd = r'{} /R "torch numpy cudatoolkit soumith mkl magma mypy"'.format(findstr_cmd) - else: - grep_cmd = r'grep "torch\|numpy\|cudatoolkit\|soumith\|mkl\|magma\|mypy"' - conda = os.environ.get('CONDA_EXE', 'conda') - out = run_and_read_all(run_lambda, conda + ' list | ' + grep_cmd) - if out is None: - return out - # Comment starting at beginning of line - comment_regex = re.compile(r'^#.*\n') - return re.sub(comment_regex, '', out) - - -def get_gcc_version(run_lambda): - return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)') - -def get_clang_version(run_lambda): - return run_and_parse_first_match(run_lambda, 'clang --version', r'clang version (.*)') - - -def get_cmake_version(run_lambda): - return run_and_parse_first_match(run_lambda, 'cmake --version', r'cmake (.*)') - - -def get_nvidia_driver_version(run_lambda): - if get_platform() == 'darwin': - cmd = 'kextstat | grep -i cuda' - return run_and_parse_first_match(run_lambda, cmd, - r'com[.]nvidia[.]CUDA [(](.*?)[)]') - smi = get_nvidia_smi() - return run_and_parse_first_match(run_lambda, smi, r'Driver Version: (.*?) ') - - -def get_gpu_info(run_lambda): - if get_platform() == 'darwin' or (TORCH_AVAILABLE and hasattr(torch.version, 'hip') and torch.version.hip is not None): - if TORCH_AVAILABLE and torch.cuda.is_available(): - return torch.cuda.get_device_name(None) - return None - smi = get_nvidia_smi() - uuid_regex = re.compile(r' \(UUID: .+?\)') - rc, out, _ = run_lambda(smi + ' -L') - if rc != 0: - return None - # Anonymize GPUs by removing their UUID - return re.sub(uuid_regex, '', out) - - -def get_running_cuda_version(run_lambda): - return run_and_parse_first_match(run_lambda, 'nvcc --version', r'release .+ V(.*)') - - -def get_cudnn_version(run_lambda): - """This will return a list of libcudnn.so; it's hard to tell which one is being used""" - if get_platform() == 'win32': - system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows') - cuda_path = os.environ.get('CUDA_PATH', "%CUDA_PATH%") - where_cmd = os.path.join(system_root, 'System32', 'where') - cudnn_cmd = '{} /R "{}\\bin" cudnn*.dll'.format(where_cmd, cuda_path) - elif get_platform() == 'darwin': - # CUDA libraries and drivers can be found in /usr/local/cuda/. See - # https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#install - # https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installmac - # Use CUDNN_LIBRARY when cudnn library is installed elsewhere. - cudnn_cmd = 'ls /usr/local/cuda/lib/libcudnn*' - else: - cudnn_cmd = 'ldconfig -p | grep libcudnn | rev | cut -d" " -f1 | rev' - rc, out, _ = run_lambda(cudnn_cmd) - # find will return 1 if there are permission errors or if not found - if len(out) == 0 or (rc != 1 and rc != 0): - l = os.environ.get('CUDNN_LIBRARY') - if l is not None and os.path.isfile(l): - return os.path.realpath(l) - return None - files_set = set() - for fn in out.split('\n'): - fn = os.path.realpath(fn) # eliminate symbolic links - if os.path.isfile(fn): - files_set.add(fn) - if not files_set: - return None - # Alphabetize the result because the order is non-deterministic otherwise - files = list(sorted(files_set)) - if len(files) == 1: - return files[0] - result = '\n'.join(files) - return 'Probably one of the following:\n{}'.format(result) - - -def get_nvidia_smi(): - # Note: nvidia-smi is currently available only on Windows and Linux - smi = 'nvidia-smi' - if get_platform() == 'win32': - system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows') - program_files_root = os.environ.get('PROGRAMFILES', 'C:\\Program Files') - legacy_path = os.path.join(program_files_root, 'NVIDIA Corporation', 'NVSMI', smi) - new_path = os.path.join(system_root, 'System32', smi) - smis = [new_path, legacy_path] - for candidate_smi in smis: - if os.path.exists(candidate_smi): - smi = '"{}"'.format(candidate_smi) - break - return smi - - -def get_platform(): - if sys.platform.startswith('linux'): - return 'linux' - elif sys.platform.startswith('win32'): - return 'win32' - elif sys.platform.startswith('cygwin'): - return 'cygwin' - elif sys.platform.startswith('darwin'): - return 'darwin' - else: - return sys.platform - - -def get_mac_version(run_lambda): - return run_and_parse_first_match(run_lambda, 'sw_vers -productVersion', r'(.*)') - - -def get_windows_version(run_lambda): - system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows') - wmic_cmd = os.path.join(system_root, 'System32', 'Wbem', 'wmic') - findstr_cmd = os.path.join(system_root, 'System32', 'findstr') - return run_and_read_all(run_lambda, '{} os get Caption | {} /v Caption'.format(wmic_cmd, findstr_cmd)) - - -def get_lsb_version(run_lambda): - return run_and_parse_first_match(run_lambda, 'lsb_release -a', r'Description:\t(.*)') - - -def check_release_file(run_lambda): - return run_and_parse_first_match(run_lambda, 'cat /etc/*-release', - r'PRETTY_NAME="(.*)"') - - -def get_os(run_lambda): - from platform import machine - platform = get_platform() - - if platform == 'win32' or platform == 'cygwin': - return get_windows_version(run_lambda) - - if platform == 'darwin': - version = get_mac_version(run_lambda) - if version is None: - return None - return 'macOS {} ({})'.format(version, machine()) - - if platform == 'linux': - # Ubuntu/Debian based - desc = get_lsb_version(run_lambda) - if desc is not None: - return '{} ({})'.format(desc, machine()) - - # Try reading /etc/*-release - desc = check_release_file(run_lambda) - if desc is not None: - return '{} ({})'.format(desc, machine()) - - return '{} ({})'.format(platform, machine()) - - # Unknown platform - return platform - - -def get_python_platform(): - import platform - return platform.platform() - - -def get_libc_version(): - import platform - if get_platform() != 'linux': - return 'N/A' - return '-'.join(platform.libc_ver()) - - -def get_pip_packages(run_lambda): - """Returns `pip list` output. Note: will also find conda-installed pytorch - and numpy packages.""" - # People generally have `pip` as `pip` or `pip3` - # But here it is incoved as `python -mpip` - def run_with_pip(pip): - if get_platform() == 'win32': - system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows') - findstr_cmd = os.path.join(system_root, 'System32', 'findstr') - grep_cmd = r'{} /R "numpy torch mypy"'.format(findstr_cmd) - else: - grep_cmd = r'grep "torch\|numpy\|mypy"' - return run_and_read_all(run_lambda, pip + ' list --format=freeze | ' + grep_cmd) - - pip_version = 'pip3' if sys.version[0] == '3' else 'pip' - out = run_with_pip(sys.executable + ' -mpip') - - return pip_version, out - - -def get_cachingallocator_config(): - ca_config = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', '') - return ca_config - - -def get_env_info(): - run_lambda = run - pip_version, pip_list_output = get_pip_packages(run_lambda) - - if TORCH_AVAILABLE: - version_str = torch.__version__ - debug_mode_str = str(torch.version.debug) - cuda_available_str = str(torch.cuda.is_available()) - cuda_version_str = torch.version.cuda - if not hasattr(torch.version, 'hip') or torch.version.hip is None: # cuda version - hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A' - else: # HIP version - cfg = torch._C._show_config().split('\n') - hip_runtime_version = [s.rsplit(None, 1)[-1] for s in cfg if 'HIP Runtime' in s][0] - miopen_runtime_version = [s.rsplit(None, 1)[-1] for s in cfg if 'MIOpen' in s][0] - cuda_version_str = 'N/A' - hip_compiled_version = torch.version.hip - else: - version_str = debug_mode_str = cuda_available_str = cuda_version_str = 'N/A' - hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A' - - sys_version = sys.version.replace("\n", " ") - - return SystemEnv( - torch_version=version_str, - is_debug_build=debug_mode_str, - python_version='{} ({}-bit runtime)'.format(sys_version, sys.maxsize.bit_length() + 1), - python_platform=get_python_platform(), - is_cuda_available=cuda_available_str, - cuda_compiled_version=cuda_version_str, - cuda_runtime_version=get_running_cuda_version(run_lambda), - nvidia_gpu_models=get_gpu_info(run_lambda), - nvidia_driver_version=get_nvidia_driver_version(run_lambda), - cudnn_version=get_cudnn_version(run_lambda), - hip_compiled_version=hip_compiled_version, - hip_runtime_version=hip_runtime_version, - miopen_runtime_version=miopen_runtime_version, - pip_version=pip_version, - pip_packages=pip_list_output, - conda_packages=get_conda_packages(run_lambda), - os=get_os(run_lambda), - libc_version=get_libc_version(), - gcc_version=get_gcc_version(run_lambda), - clang_version=get_clang_version(run_lambda), - cmake_version=get_cmake_version(run_lambda), - caching_allocator_config=get_cachingallocator_config(), - ) - -env_info_fmt = """ -PyTorch version: {torch_version} -Is debug build: {is_debug_build} -CUDA used to build PyTorch: {cuda_compiled_version} -ROCM used to build PyTorch: {hip_compiled_version} - -OS: {os} -GCC version: {gcc_version} -Clang version: {clang_version} -CMake version: {cmake_version} -Libc version: {libc_version} - -Python version: {python_version} -Python platform: {python_platform} -Is CUDA available: {is_cuda_available} -CUDA runtime version: {cuda_runtime_version} -GPU models and configuration: {nvidia_gpu_models} -Nvidia driver version: {nvidia_driver_version} -cuDNN version: {cudnn_version} -HIP runtime version: {hip_runtime_version} -MIOpen runtime version: {miopen_runtime_version} - -Versions of relevant libraries: -{pip_packages} -{conda_packages} -""".strip() - - -def pretty_str(envinfo): - def replace_nones(dct, replacement='Could not collect'): - for key in dct.keys(): - if dct[key] is not None: - continue - dct[key] = replacement - return dct - - def replace_bools(dct, true='Yes', false='No'): - for key in dct.keys(): - if dct[key] is True: - dct[key] = true - elif dct[key] is False: - dct[key] = false - return dct - - def prepend(text, tag='[prepend]'): - lines = text.split('\n') - updated_lines = [tag + line for line in lines] - return '\n'.join(updated_lines) - - def replace_if_empty(text, replacement='No relevant packages'): - if text is not None and len(text) == 0: - return replacement - return text - - def maybe_start_on_next_line(string): - # If `string` is multiline, prepend a \n to it. - if string is not None and len(string.split('\n')) > 1: - return '\n{}\n'.format(string) - return string - - mutable_dict = envinfo._asdict() - - # If nvidia_gpu_models is multiline, start on the next line - mutable_dict['nvidia_gpu_models'] = \ - maybe_start_on_next_line(envinfo.nvidia_gpu_models) - - # If the machine doesn't have CUDA, report some fields as 'No CUDA' - dynamic_cuda_fields = [ - 'cuda_runtime_version', - 'nvidia_gpu_models', - 'nvidia_driver_version', - ] - all_cuda_fields = dynamic_cuda_fields + ['cudnn_version'] - all_dynamic_cuda_fields_missing = all( - mutable_dict[field] is None for field in dynamic_cuda_fields) - if TORCH_AVAILABLE and not torch.cuda.is_available() and all_dynamic_cuda_fields_missing: - for field in all_cuda_fields: - mutable_dict[field] = 'No CUDA' - if envinfo.cuda_compiled_version is None: - mutable_dict['cuda_compiled_version'] = 'None' - - # Replace True with Yes, False with No - mutable_dict = replace_bools(mutable_dict) - - # Replace all None objects with 'Could not collect' - mutable_dict = replace_nones(mutable_dict) - - # If either of these are '', replace with 'No relevant packages' - mutable_dict['pip_packages'] = replace_if_empty(mutable_dict['pip_packages']) - mutable_dict['conda_packages'] = replace_if_empty(mutable_dict['conda_packages']) - - # Tag conda and pip packages with a prefix - # If they were previously None, they'll show up as ie '[conda] Could not collect' - if mutable_dict['pip_packages']: - mutable_dict['pip_packages'] = prepend(mutable_dict['pip_packages'], - '[{}] '.format(envinfo.pip_version)) - if mutable_dict['conda_packages']: - mutable_dict['conda_packages'] = prepend(mutable_dict['conda_packages'], - '[conda] ') - return env_info_fmt.format(**mutable_dict) - - -def get_pretty_env_info(): - return pretty_str(get_env_info()) - - -def main(): - print("Collecting environment information...") - output = get_pretty_env_info() - print(output) - - if TORCH_AVAILABLE and hasattr(torch, 'utils') and hasattr(torch.utils, '_crash_handler'): - minidump_dir = torch.utils._crash_handler.DEFAULT_MINIDUMP_DIR - if sys.platform == "linux" and os.path.exists(minidump_dir): - dumps = [os.path.join(minidump_dir, dump) for dump in os.listdir(minidump_dir)] - latest = max(dumps, key=os.path.getctime) - ctime = os.path.getctime(latest) - creation_time = datetime.datetime.fromtimestamp(ctime).strftime('%Y-%m-%d %H:%M:%S') - msg = "\n*** Detected a minidump at {} created on {}, ".format(latest, creation_time) + \ - "if this is related to your bug please include it when you file a report ***" - print(msg, file=sys.stderr) - - - -if __name__ == '__main__': - main() From 23da3ef7111696b7ec4a51c7b17551b46457219a Mon Sep 17 00:00:00 2001 From: huynh-anh Date: Mon, 14 Feb 2022 19:58:46 +0000 Subject: [PATCH 3/4] removed import because deprecated --- adet/layers/csrc/ml_nms/ml_nms.cu | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/adet/layers/csrc/ml_nms/ml_nms.cu b/adet/layers/csrc/ml_nms/ml_nms.cu index 13be92a13..5fa3d7d19 100644 --- a/adet/layers/csrc/ml_nms/ml_nms.cu +++ b/adet/layers/csrc/ml_nms/ml_nms.cu @@ -3,10 +3,12 @@ #include #include #include +#include #include #include +namespace cuda { int const threadsPerBlock = sizeof(unsigned long long) * 8; __device__ inline float devIoU(float const * const a, float const * const b) { @@ -82,7 +84,7 @@ at::Tensor ml_nms_cuda(const at::Tensor boxes, const float nms_overlap_thresh) { int boxes_num = boxes.size(0); - const int col_blocks = ceil_div(boxes_num, threadsPerBlock); + const int col_blocks = cuda::ATenCeilDiv(boxes_num, threadsPerBlock); scalar_t* boxes_dev = boxes_sorted.data(); @@ -135,5 +137,5 @@ at::Tensor ml_nms_cuda(const at::Tensor boxes, const float nms_overlap_thresh) { order_t.device(), keep.scalar_type()) }).sort(0, false)); } - +} } // namespace adet \ No newline at end of file From efab1f51e9cc892a2f8a78490b43b8118d3045da Mon Sep 17 00:00:00 2001 From: huynh-anh Date: Mon, 14 Feb 2022 21:11:44 +0000 Subject: [PATCH 4/4] replaced deprecated function/api call --- adet/layers/csrc/ml_nms/ml_nms.cu | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/adet/layers/csrc/ml_nms/ml_nms.cu b/adet/layers/csrc/ml_nms/ml_nms.cu index 5fa3d7d19..c565da7e5 100644 --- a/adet/layers/csrc/ml_nms/ml_nms.cu +++ b/adet/layers/csrc/ml_nms/ml_nms.cu @@ -4,11 +4,10 @@ #include #include #include - +#include #include #include -namespace cuda { int const threadsPerBlock = sizeof(unsigned long long) * 8; __device__ inline float devIoU(float const * const a, float const * const b) { @@ -67,7 +66,7 @@ __global__ void ml_nms_kernel(const int n_boxes, const float nms_overlap_thresh, t |= 1ULL << i; } } - const int col_blocks = ceil_div(n_boxes, threadsPerBlock); + const int col_blocks = at::ceil_div(n_boxes, threadsPerBlock); dev_mask[cur_box_idx * col_blocks + col_start] = t; } } @@ -84,20 +83,20 @@ at::Tensor ml_nms_cuda(const at::Tensor boxes, const float nms_overlap_thresh) { int boxes_num = boxes.size(0); - const int col_blocks = cuda::ATenCeilDiv(boxes_num, threadsPerBlock); + const int col_blocks = at::ceil_div(boxes_num, threadsPerBlock); scalar_t* boxes_dev = boxes_sorted.data(); - THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState + // THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState unsigned long long* mask_dev = NULL; //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev, // boxes_num * col_blocks * sizeof(unsigned long long))); - mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long)); + mask_dev = (unsigned long long*) c10::cuda::CUDACachingAllocator::raw_alloc(boxes_num * col_blocks * sizeof(unsigned long long)); - dim3 blocks(ATenCeilDiv(boxes_num, threadsPerBlock), - ATenCeilDiv(boxes_num, threadsPerBlock)); + dim3 blocks(at::ceil_div(boxes_num, threadsPerBlock), + at::ceil_div(boxes_num, threadsPerBlock)); dim3 threads(threadsPerBlock); ml_nms_kernel<<>>(boxes_num, nms_overlap_thresh, @@ -105,7 +104,7 @@ at::Tensor ml_nms_cuda(const at::Tensor boxes, const float nms_overlap_thresh) { mask_dev); std::vector mask_host(boxes_num * col_blocks); - THCudaCheck(cudaMemcpy(&mask_host[0], + c10::cuda::CUDACachingAllocator::raw_alloc(cudaMemcpy(&mask_host[0], mask_dev, sizeof(unsigned long long) * boxes_num * col_blocks, cudaMemcpyDeviceToHost)); @@ -130,12 +129,12 @@ at::Tensor ml_nms_cuda(const at::Tensor boxes, const float nms_overlap_thresh) { } } - THCudaFree(state, mask_dev); + c10::cuda::CUDACachingAllocator::raw_delete(mask_dev); // TODO improve this part return std::get<0>(order_t.index({ keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to( order_t.device(), keep.scalar_type()) }).sort(0, false)); } -} + } // namespace adet \ No newline at end of file