From f352c8610f1ea56eae4134547852af665633c6e1 Mon Sep 17 00:00:00 2001
From: huynh-anh <huynh-anh.le@halodi.com>
Date: Mon, 14 Feb 2022 19:57:16 +0000
Subject: [PATCH 1/4] changed import because it was deprecaated

---
 adet/layers/csrc/ml_nms/ml_nms.cu |  10 +-
 collect_env.py                    | 469 ++++++++++++++++++++++++++++++
 2 files changed, 474 insertions(+), 5 deletions(-)
 create mode 100644 collect_env.py
diff --git a/adet/layers/csrc/ml_nms/ml_nms.cu b/adet/layers/csrc/ml_nms/ml_nms.cu
index f1c1a4206..13be92a13 100644
--- a/adet/layers/csrc/ml_nms/ml_nms.cu
+++ b/adet/layers/csrc/ml_nms/ml_nms.cu
@@ -2,7 +2,7 @@
 #include <ATen/ATen.h>
 #include <ATen/cuda/CUDAContext.h>
 #include <THC/THC.h>
-#include <THC/THCDeviceUtils.cuh>
+#include <ATen/cuda/DeviceUtils.cuh>
 
 #include <vector>
 #include <iostream>
@@ -65,7 +65,7 @@ __global__ void ml_nms_kernel(const int n_boxes, const float nms_overlap_thresh,
         t |= 1ULL << i;
       }
     }
-    const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
+    const int col_blocks = ceil_div(n_boxes, threadsPerBlock);
     dev_mask[cur_box_idx * col_blocks + col_start] = t;
   }
 }
@@ -82,7 +82,7 @@ at::Tensor ml_nms_cuda(const at::Tensor boxes, const float nms_overlap_thresh) {
 
   int boxes_num = boxes.size(0);
 
-  const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
+  const int col_blocks = ceil_div(boxes_num, threadsPerBlock);
 
   scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();
 
@@ -94,8 +94,8 @@ at::Tensor ml_nms_cuda(const at::Tensor boxes, const float nms_overlap_thresh) {
 
   mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
 
-  dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
-              THCCeilDiv(boxes_num, threadsPerBlock));
+  dim3 blocks(ATenCeilDiv(boxes_num, threadsPerBlock),
+              ATenCeilDiv(boxes_num, threadsPerBlock));
   dim3 threads(threadsPerBlock);
   ml_nms_kernel<<<blocks, threads>>>(boxes_num,
                                   nms_overlap_thresh,
diff --git a/collect_env.py b/collect_env.py
new file mode 100644
index 000000000..85543c5ce
--- /dev/null
+++ b/collect_env.py
@@ -0,0 +1,469 @@
+from __future__ import print_function
+
+# Unlike the rest of the PyTorch this file must be python2 compliant.
+# This script outputs relevant system environment info
+# Run it with `python collect_env.py`.
+import datetime
+import locale
+import re
+import subprocess
+import sys
+import os
+from collections import namedtuple
+
+
+try:
+    import torch
+    TORCH_AVAILABLE = True
+except (ImportError, NameError, AttributeError, OSError):
+    TORCH_AVAILABLE = False
+
+# System Environment Information
+SystemEnv = namedtuple('SystemEnv', [
+    'torch_version',
+    'is_debug_build',
+    'cuda_compiled_version',
+    'gcc_version',
+    'clang_version',
+    'cmake_version',
+    'os',
+    'libc_version',
+    'python_version',
+    'python_platform',
+    'is_cuda_available',
+    'cuda_runtime_version',
+    'nvidia_driver_version',
+    'nvidia_gpu_models',
+    'cudnn_version',
+    'pip_version',  # 'pip' or 'pip3'
+    'pip_packages',
+    'conda_packages',
+    'hip_compiled_version',
+    'hip_runtime_version',
+    'miopen_runtime_version',
+    'caching_allocator_config',
+])
+
+
+def run(command):
+    """Returns (return-code, stdout, stderr)"""
+    p = subprocess.Popen(command, stdout=subprocess.PIPE,
+                         stderr=subprocess.PIPE, shell=True)
+    raw_output, raw_err = p.communicate()
+    rc = p.returncode
+    if get_platform() == 'win32':
+        enc = 'oem'
+    else:
+        enc = locale.getpreferredencoding()
+    output = raw_output.decode(enc)
+    err = raw_err.decode(enc)
+    return rc, output.strip(), err.strip()
+
+
+def run_and_read_all(run_lambda, command):
+    """Runs command using run_lambda; reads and returns entire output if rc is 0"""
+    rc, out, _ = run_lambda(command)
+    if rc != 0:
+        return None
+    return out
+
+
+def run_and_parse_first_match(run_lambda, command, regex):
+    """Runs command using run_lambda, returns the first regex match if it exists"""
+    rc, out, _ = run_lambda(command)
+    if rc != 0:
+        return None
+    match = re.search(regex, out)
+    if match is None:
+        return None
+    return match.group(1)
+
+def run_and_return_first_line(run_lambda, command):
+    """Runs command using run_lambda and returns first line if output is not empty"""
+    rc, out, _ = run_lambda(command)
+    if rc != 0:
+        return None
+    return out.split('\n')[0]
+
+
+def get_conda_packages(run_lambda):
+    if get_platform() == 'win32':
+        system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
+        findstr_cmd = os.path.join(system_root, 'System32', 'findstr')
+        grep_cmd = r'{} /R "torch numpy cudatoolkit soumith mkl magma mypy"'.format(findstr_cmd)
+    else:
+        grep_cmd = r'grep "torch\|numpy\|cudatoolkit\|soumith\|mkl\|magma\|mypy"'
+    conda = os.environ.get('CONDA_EXE', 'conda')
+    out = run_and_read_all(run_lambda, conda + ' list | ' + grep_cmd)
+    if out is None:
+        return out
+    # Comment starting at beginning of line
+    comment_regex = re.compile(r'^#.*\n')
+    return re.sub(comment_regex, '', out)
+
+
+def get_gcc_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)')
+
+def get_clang_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'clang --version', r'clang version (.*)')
+
+
+def get_cmake_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'cmake --version', r'cmake (.*)')
+
+
+def get_nvidia_driver_version(run_lambda):
+    if get_platform() == 'darwin':
+        cmd = 'kextstat | grep -i cuda'
+        return run_and_parse_first_match(run_lambda, cmd,
+                                         r'com[.]nvidia[.]CUDA [(](.*?)[)]')
+    smi = get_nvidia_smi()
+    return run_and_parse_first_match(run_lambda, smi, r'Driver Version: (.*?) ')
+
+
+def get_gpu_info(run_lambda):
+    if get_platform() == 'darwin' or (TORCH_AVAILABLE and hasattr(torch.version, 'hip') and torch.version.hip is not None):
+        if TORCH_AVAILABLE and torch.cuda.is_available():
+            return torch.cuda.get_device_name(None)
+        return None
+    smi = get_nvidia_smi()
+    uuid_regex = re.compile(r' \(UUID: .+?\)')
+    rc, out, _ = run_lambda(smi + ' -L')
+    if rc != 0:
+        return None
+    # Anonymize GPUs by removing their UUID
+    return re.sub(uuid_regex, '', out)
+
+
+def get_running_cuda_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'nvcc --version', r'release .+ V(.*)')
+
+
+def get_cudnn_version(run_lambda):
+    """This will return a list of libcudnn.so; it's hard to tell which one is being used"""
+    if get_platform() == 'win32':
+        system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
+        cuda_path = os.environ.get('CUDA_PATH', "%CUDA_PATH%")
+        where_cmd = os.path.join(system_root, 'System32', 'where')
+        cudnn_cmd = '{} /R "{}\\bin" cudnn*.dll'.format(where_cmd, cuda_path)
+    elif get_platform() == 'darwin':
+        # CUDA libraries and drivers can be found in /usr/local/cuda/. See
+        # https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#install
+        # https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installmac
+        # Use CUDNN_LIBRARY when cudnn library is installed elsewhere.
+        cudnn_cmd = 'ls /usr/local/cuda/lib/libcudnn*'
+    else:
+        cudnn_cmd = 'ldconfig -p | grep libcudnn | rev | cut -d" " -f1 | rev'
+    rc, out, _ = run_lambda(cudnn_cmd)
+    # find will return 1 if there are permission errors or if not found
+    if len(out) == 0 or (rc != 1 and rc != 0):
+        l = os.environ.get('CUDNN_LIBRARY')
+        if l is not None and os.path.isfile(l):
+            return os.path.realpath(l)
+        return None
+    files_set = set()
+    for fn in out.split('\n'):
+        fn = os.path.realpath(fn)  # eliminate symbolic links
+        if os.path.isfile(fn):
+            files_set.add(fn)
+    if not files_set:
+        return None
+    # Alphabetize the result because the order is non-deterministic otherwise
+    files = list(sorted(files_set))
+    if len(files) == 1:
+        return files[0]
+    result = '\n'.join(files)
+    return 'Probably one of the following:\n{}'.format(result)
+
+
+def get_nvidia_smi():
+    # Note: nvidia-smi is currently available only on Windows and Linux
+    smi = 'nvidia-smi'
+    if get_platform() == 'win32':
+        system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
+        program_files_root = os.environ.get('PROGRAMFILES', 'C:\\Program Files')
+        legacy_path = os.path.join(program_files_root, 'NVIDIA Corporation', 'NVSMI', smi)
+        new_path = os.path.join(system_root, 'System32', smi)
+        smis = [new_path, legacy_path]
+        for candidate_smi in smis:
+            if os.path.exists(candidate_smi):
+                smi = '"{}"'.format(candidate_smi)
+                break
+    return smi
+
+
+def get_platform():
+    if sys.platform.startswith('linux'):
+        return 'linux'
+    elif sys.platform.startswith('win32'):
+        return 'win32'
+    elif sys.platform.startswith('cygwin'):
+        return 'cygwin'
+    elif sys.platform.startswith('darwin'):
+        return 'darwin'
+    else:
+        return sys.platform
+
+
+def get_mac_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'sw_vers -productVersion', r'(.*)')
+
+
+def get_windows_version(run_lambda):
+    system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
+    wmic_cmd = os.path.join(system_root, 'System32', 'Wbem', 'wmic')
+    findstr_cmd = os.path.join(system_root, 'System32', 'findstr')
+    return run_and_read_all(run_lambda, '{} os get Caption | {} /v Caption'.format(wmic_cmd, findstr_cmd))
+
+
+def get_lsb_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'lsb_release -a', r'Description:\t(.*)')
+
+
+def check_release_file(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'cat /etc/*-release',
+                                     r'PRETTY_NAME="(.*)"')
+
+
+def get_os(run_lambda):
+    from platform import machine
+    platform = get_platform()
+
+    if platform == 'win32' or platform == 'cygwin':
+        return get_windows_version(run_lambda)
+
+    if platform == 'darwin':
+        version = get_mac_version(run_lambda)
+        if version is None:
+            return None
+        return 'macOS {} ({})'.format(version, machine())
+
+    if platform == 'linux':
+        # Ubuntu/Debian based
+        desc = get_lsb_version(run_lambda)
+        if desc is not None:
+            return '{} ({})'.format(desc, machine())
+
+        # Try reading /etc/*-release
+        desc = check_release_file(run_lambda)
+        if desc is not None:
+            return '{} ({})'.format(desc, machine())
+
+        return '{} ({})'.format(platform, machine())
+
+    # Unknown platform
+    return platform
+
+
+def get_python_platform():
+    import platform
+    return platform.platform()
+
+
+def get_libc_version():
+    import platform
+    if get_platform() != 'linux':
+        return 'N/A'
+    return '-'.join(platform.libc_ver())
+
+
+def get_pip_packages(run_lambda):
+    """Returns `pip list` output. Note: will also find conda-installed pytorch
+    and numpy packages."""
+    # People generally have `pip` as `pip` or `pip3`
+    # But here it is incoved as `python -mpip`
+    def run_with_pip(pip):
+        if get_platform() == 'win32':
+            system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
+            findstr_cmd = os.path.join(system_root, 'System32', 'findstr')
+            grep_cmd = r'{} /R "numpy torch mypy"'.format(findstr_cmd)
+        else:
+            grep_cmd = r'grep "torch\|numpy\|mypy"'
+        return run_and_read_all(run_lambda, pip + ' list --format=freeze | ' + grep_cmd)
+
+    pip_version = 'pip3' if sys.version[0] == '3' else 'pip'
+    out = run_with_pip(sys.executable + ' -mpip')
+
+    return pip_version, out
+
+
+def get_cachingallocator_config():
+    ca_config = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', '')
+    return ca_config
+
+
+def get_env_info():
+    run_lambda = run
+    pip_version, pip_list_output = get_pip_packages(run_lambda)
+
+    if TORCH_AVAILABLE:
+        version_str = torch.__version__
+        debug_mode_str = str(torch.version.debug)
+        cuda_available_str = str(torch.cuda.is_available())
+        cuda_version_str = torch.version.cuda
+        if not hasattr(torch.version, 'hip') or torch.version.hip is None:  # cuda version
+            hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A'
+        else:  # HIP version
+            cfg = torch._C._show_config().split('\n')
+            hip_runtime_version = [s.rsplit(None, 1)[-1] for s in cfg if 'HIP Runtime' in s][0]
+            miopen_runtime_version = [s.rsplit(None, 1)[-1] for s in cfg if 'MIOpen' in s][0]
+            cuda_version_str = 'N/A'
+            hip_compiled_version = torch.version.hip
+    else:
+        version_str = debug_mode_str = cuda_available_str = cuda_version_str = 'N/A'
+        hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A'
+
+    sys_version = sys.version.replace("\n", " ")
+
+    return SystemEnv(
+        torch_version=version_str,
+        is_debug_build=debug_mode_str,
+        python_version='{} ({}-bit runtime)'.format(sys_version, sys.maxsize.bit_length() + 1),
+        python_platform=get_python_platform(),
+        is_cuda_available=cuda_available_str,
+        cuda_compiled_version=cuda_version_str,
+        cuda_runtime_version=get_running_cuda_version(run_lambda),
+        nvidia_gpu_models=get_gpu_info(run_lambda),
+        nvidia_driver_version=get_nvidia_driver_version(run_lambda),
+        cudnn_version=get_cudnn_version(run_lambda),
+        hip_compiled_version=hip_compiled_version,
+        hip_runtime_version=hip_runtime_version,
+        miopen_runtime_version=miopen_runtime_version,
+        pip_version=pip_version,
+        pip_packages=pip_list_output,
+        conda_packages=get_conda_packages(run_lambda),
+        os=get_os(run_lambda),
+        libc_version=get_libc_version(),
+        gcc_version=get_gcc_version(run_lambda),
+        clang_version=get_clang_version(run_lambda),
+        cmake_version=get_cmake_version(run_lambda),
+        caching_allocator_config=get_cachingallocator_config(),
+    )
+
+env_info_fmt = """
+PyTorch version: {torch_version}
+Is debug build: {is_debug_build}
+CUDA used to build PyTorch: {cuda_compiled_version}
+ROCM used to build PyTorch: {hip_compiled_version}
+
+OS: {os}
+GCC version: {gcc_version}
+Clang version: {clang_version}
+CMake version: {cmake_version}
+Libc version: {libc_version}
+
+Python version: {python_version}
+Python platform: {python_platform}
+Is CUDA available: {is_cuda_available}
+CUDA runtime version: {cuda_runtime_version}
+GPU models and configuration: {nvidia_gpu_models}
+Nvidia driver version: {nvidia_driver_version}
+cuDNN version: {cudnn_version}
+HIP runtime version: {hip_runtime_version}
+MIOpen runtime version: {miopen_runtime_version}
+
+Versions of relevant libraries:
+{pip_packages}
+{conda_packages}
+""".strip()
+
+
+def pretty_str(envinfo):
+    def replace_nones(dct, replacement='Could not collect'):
+        for key in dct.keys():
+            if dct[key] is not None:
+                continue
+            dct[key] = replacement
+        return dct
+
+    def replace_bools(dct, true='Yes', false='No'):
+        for key in dct.keys():
+            if dct[key] is True:
+                dct[key] = true
+            elif dct[key] is False:
+                dct[key] = false
+        return dct
+
+    def prepend(text, tag='[prepend]'):
+        lines = text.split('\n')
+        updated_lines = [tag + line for line in lines]
+        return '\n'.join(updated_lines)
+
+    def replace_if_empty(text, replacement='No relevant packages'):
+        if text is not None and len(text) == 0:
+            return replacement
+        return text
+
+    def maybe_start_on_next_line(string):
+        # If `string` is multiline, prepend a \n to it.
+        if string is not None and len(string.split('\n')) > 1:
+            return '\n{}\n'.format(string)
+        return string
+
+    mutable_dict = envinfo._asdict()
+
+    # If nvidia_gpu_models is multiline, start on the next line
+    mutable_dict['nvidia_gpu_models'] = \
+        maybe_start_on_next_line(envinfo.nvidia_gpu_models)
+
+    # If the machine doesn't have CUDA, report some fields as 'No CUDA'
+    dynamic_cuda_fields = [
+        'cuda_runtime_version',
+        'nvidia_gpu_models',
+        'nvidia_driver_version',
+    ]
+    all_cuda_fields = dynamic_cuda_fields + ['cudnn_version']
+    all_dynamic_cuda_fields_missing = all(
+        mutable_dict[field] is None for field in dynamic_cuda_fields)
+    if TORCH_AVAILABLE and not torch.cuda.is_available() and all_dynamic_cuda_fields_missing:
+        for field in all_cuda_fields:
+            mutable_dict[field] = 'No CUDA'
+        if envinfo.cuda_compiled_version is None:
+            mutable_dict['cuda_compiled_version'] = 'None'
+
+    # Replace True with Yes, False with No
+    mutable_dict = replace_bools(mutable_dict)
+
+    # Replace all None objects with 'Could not collect'
+    mutable_dict = replace_nones(mutable_dict)
+
+    # If either of these are '', replace with 'No relevant packages'
+    mutable_dict['pip_packages'] = replace_if_empty(mutable_dict['pip_packages'])
+    mutable_dict['conda_packages'] = replace_if_empty(mutable_dict['conda_packages'])
+
+    # Tag conda and pip packages with a prefix
+    # If they were previously None, they'll show up as ie '[conda] Could not collect'
+    if mutable_dict['pip_packages']:
+        mutable_dict['pip_packages'] = prepend(mutable_dict['pip_packages'],
+                                               '[{}] '.format(envinfo.pip_version))
+    if mutable_dict['conda_packages']:
+        mutable_dict['conda_packages'] = prepend(mutable_dict['conda_packages'],
+                                                 '[conda] ')
+    return env_info_fmt.format(**mutable_dict)
+
+
+def get_pretty_env_info():
+    return pretty_str(get_env_info())
+
+
+def main():
+    print("Collecting environment information...")
+    output = get_pretty_env_info()
+    print(output)
+
+    if TORCH_AVAILABLE and hasattr(torch, 'utils') and hasattr(torch.utils, '_crash_handler'):
+        minidump_dir = torch.utils._crash_handler.DEFAULT_MINIDUMP_DIR
+        if sys.platform == "linux" and os.path.exists(minidump_dir):
+            dumps = [os.path.join(minidump_dir, dump) for dump in os.listdir(minidump_dir)]
+            latest = max(dumps, key=os.path.getctime)
+            ctime = os.path.getctime(latest)
+            creation_time = datetime.datetime.fromtimestamp(ctime).strftime('%Y-%m-%d %H:%M:%S')
+            msg = "\n*** Detected a minidump at {} created on {}, ".format(latest, creation_time) + \
+                  "if this is related to your bug please include it when you file a report ***"
+            print(msg, file=sys.stderr)
+
+
+
+if __name__ == '__main__':
+    main()

From fde08d628afd4c9ff0bdf6fc9bacc3fa7dbf7bf5 Mon Sep 17 00:00:00 2001
From: huynh-anh <huynh-anh.le@halodi.com>
Date: Mon, 14 Feb 2022 19:58:24 +0000
Subject: [PATCH 2/4] removed wrong file

---
 collect_env.py | 469 -------------------------------------------------
 1 file changed, 469 deletions(-)
 delete mode 100644 collect_env.py

diff --git a/collect_env.py b/collect_env.py
deleted file mode 100644
index 85543c5ce..000000000
--- a/collect_env.py
+++ /dev/null
@@ -1,469 +0,0 @@
-from __future__ import print_function
-
-# Unlike the rest of the PyTorch this file must be python2 compliant.
-# This script outputs relevant system environment info
-# Run it with `python collect_env.py`.
-import datetime
-import locale
-import re
-import subprocess
-import sys
-import os
-from collections import namedtuple
-
-
-try:
-    import torch
-    TORCH_AVAILABLE = True
-except (ImportError, NameError, AttributeError, OSError):
-    TORCH_AVAILABLE = False
-
-# System Environment Information
-SystemEnv = namedtuple('SystemEnv', [
-    'torch_version',
-    'is_debug_build',
-    'cuda_compiled_version',
-    'gcc_version',
-    'clang_version',
-    'cmake_version',
-    'os',
-    'libc_version',
-    'python_version',
-    'python_platform',
-    'is_cuda_available',
-    'cuda_runtime_version',
-    'nvidia_driver_version',
-    'nvidia_gpu_models',
-    'cudnn_version',
-    'pip_version',  # 'pip' or 'pip3'
-    'pip_packages',
-    'conda_packages',
-    'hip_compiled_version',
-    'hip_runtime_version',
-    'miopen_runtime_version',
-    'caching_allocator_config',
-])
-
-
-def run(command):
-    """Returns (return-code, stdout, stderr)"""
-    p = subprocess.Popen(command, stdout=subprocess.PIPE,
-                         stderr=subprocess.PIPE, shell=True)
-    raw_output, raw_err = p.communicate()
-    rc = p.returncode
-    if get_platform() == 'win32':
-        enc = 'oem'
-    else:
-        enc = locale.getpreferredencoding()
-    output = raw_output.decode(enc)
-    err = raw_err.decode(enc)
-    return rc, output.strip(), err.strip()
-
-
-def run_and_read_all(run_lambda, command):
-    """Runs command using run_lambda; reads and returns entire output if rc is 0"""
-    rc, out, _ = run_lambda(command)
-    if rc != 0:
-        return None
-    return out
-
-
-def run_and_parse_first_match(run_lambda, command, regex):
-    """Runs command using run_lambda, returns the first regex match if it exists"""
-    rc, out, _ = run_lambda(command)
-    if rc != 0:
-        return None
-    match = re.search(regex, out)
-    if match is None:
-        return None
-    return match.group(1)
-
-def run_and_return_first_line(run_lambda, command):
-    """Runs command using run_lambda and returns first line if output is not empty"""
-    rc, out, _ = run_lambda(command)
-    if rc != 0:
-        return None
-    return out.split('\n')[0]
-
-
-def get_conda_packages(run_lambda):
-    if get_platform() == 'win32':
-        system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
-        findstr_cmd = os.path.join(system_root, 'System32', 'findstr')
-        grep_cmd = r'{} /R "torch numpy cudatoolkit soumith mkl magma mypy"'.format(findstr_cmd)
-    else:
-        grep_cmd = r'grep "torch\|numpy\|cudatoolkit\|soumith\|mkl\|magma\|mypy"'
-    conda = os.environ.get('CONDA_EXE', 'conda')
-    out = run_and_read_all(run_lambda, conda + ' list | ' + grep_cmd)
-    if out is None:
-        return out
-    # Comment starting at beginning of line
-    comment_regex = re.compile(r'^#.*\n')
-    return re.sub(comment_regex, '', out)
-
-
-def get_gcc_version(run_lambda):
-    return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)')
-
-def get_clang_version(run_lambda):
-    return run_and_parse_first_match(run_lambda, 'clang --version', r'clang version (.*)')
-
-
-def get_cmake_version(run_lambda):
-    return run_and_parse_first_match(run_lambda, 'cmake --version', r'cmake (.*)')
-
-
-def get_nvidia_driver_version(run_lambda):
-    if get_platform() == 'darwin':
-        cmd = 'kextstat | grep -i cuda'
-        return run_and_parse_first_match(run_lambda, cmd,
-                                         r'com[.]nvidia[.]CUDA [(](.*?)[)]')
-    smi = get_nvidia_smi()
-    return run_and_parse_first_match(run_lambda, smi, r'Driver Version: (.*?) ')
-
-
-def get_gpu_info(run_lambda):
-    if get_platform() == 'darwin' or (TORCH_AVAILABLE and hasattr(torch.version, 'hip') and torch.version.hip is not None):
-        if TORCH_AVAILABLE and torch.cuda.is_available():
-            return torch.cuda.get_device_name(None)
-        return None
-    smi = get_nvidia_smi()
-    uuid_regex = re.compile(r' \(UUID: .+?\)')
-    rc, out, _ = run_lambda(smi + ' -L')
-    if rc != 0:
-        return None
-    # Anonymize GPUs by removing their UUID
-    return re.sub(uuid_regex, '', out)
-
-
-def get_running_cuda_version(run_lambda):
-    return run_and_parse_first_match(run_lambda, 'nvcc --version', r'release .+ V(.*)')
-
-
-def get_cudnn_version(run_lambda):
-    """This will return a list of libcudnn.so; it's hard to tell which one is being used"""
-    if get_platform() == 'win32':
-        system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
-        cuda_path = os.environ.get('CUDA_PATH', "%CUDA_PATH%")
-        where_cmd = os.path.join(system_root, 'System32', 'where')
-        cudnn_cmd = '{} /R "{}\\bin" cudnn*.dll'.format(where_cmd, cuda_path)
-    elif get_platform() == 'darwin':
-        # CUDA libraries and drivers can be found in /usr/local/cuda/. See
-        # https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#install
-        # https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installmac
-        # Use CUDNN_LIBRARY when cudnn library is installed elsewhere.
-        cudnn_cmd = 'ls /usr/local/cuda/lib/libcudnn*'
-    else:
-        cudnn_cmd = 'ldconfig -p | grep libcudnn | rev | cut -d" " -f1 | rev'
-    rc, out, _ = run_lambda(cudnn_cmd)
-    # find will return 1 if there are permission errors or if not found
-    if len(out) == 0 or (rc != 1 and rc != 0):
-        l = os.environ.get('CUDNN_LIBRARY')
-        if l is not None and os.path.isfile(l):
-            return os.path.realpath(l)
-        return None
-    files_set = set()
-    for fn in out.split('\n'):
-        fn = os.path.realpath(fn)  # eliminate symbolic links
-        if os.path.isfile(fn):
-            files_set.add(fn)
-    if not files_set:
-        return None
-    # Alphabetize the result because the order is non-deterministic otherwise
-    files = list(sorted(files_set))
-    if len(files) == 1:
-        return files[0]
-    result = '\n'.join(files)
-    return 'Probably one of the following:\n{}'.format(result)
-
-
-def get_nvidia_smi():
-    # Note: nvidia-smi is currently available only on Windows and Linux
-    smi = 'nvidia-smi'
-    if get_platform() == 'win32':
-        system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
-        program_files_root = os.environ.get('PROGRAMFILES', 'C:\\Program Files')
-        legacy_path = os.path.join(program_files_root, 'NVIDIA Corporation', 'NVSMI', smi)
-        new_path = os.path.join(system_root, 'System32', smi)
-        smis = [new_path, legacy_path]
-        for candidate_smi in smis:
-            if os.path.exists(candidate_smi):
-                smi = '"{}"'.format(candidate_smi)
-                break
-    return smi
-
-
-def get_platform():
-    if sys.platform.startswith('linux'):
-        return 'linux'
-    elif sys.platform.startswith('win32'):
-        return 'win32'
-    elif sys.platform.startswith('cygwin'):
-        return 'cygwin'
-    elif sys.platform.startswith('darwin'):
-        return 'darwin'
-    else:
-        return sys.platform
-
-
-def get_mac_version(run_lambda):
-    return run_and_parse_first_match(run_lambda, 'sw_vers -productVersion', r'(.*)')
-
-
-def get_windows_version(run_lambda):
-    system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
-    wmic_cmd = os.path.join(system_root, 'System32', 'Wbem', 'wmic')
-    findstr_cmd = os.path.join(system_root, 'System32', 'findstr')
-    return run_and_read_all(run_lambda, '{} os get Caption | {} /v Caption'.format(wmic_cmd, findstr_cmd))
-
-
-def get_lsb_version(run_lambda):
-    return run_and_parse_first_match(run_lambda, 'lsb_release -a', r'Description:\t(.*)')
-
-
-def check_release_file(run_lambda):
-    return run_and_parse_first_match(run_lambda, 'cat /etc/*-release',
-                                     r'PRETTY_NAME="(.*)"')
-
-
-def get_os(run_lambda):
-    from platform import machine
-    platform = get_platform()
-
-    if platform == 'win32' or platform == 'cygwin':
-        return get_windows_version(run_lambda)
-
-    if platform == 'darwin':
-        version = get_mac_version(run_lambda)
-        if version is None:
-            return None
-        return 'macOS {} ({})'.format(version, machine())
-
-    if platform == 'linux':
-        # Ubuntu/Debian based
-        desc = get_lsb_version(run_lambda)
-        if desc is not None:
-            return '{} ({})'.format(desc, machine())
-
-        # Try reading /etc/*-release
-        desc = check_release_file(run_lambda)
-        if desc is not None:
-            return '{} ({})'.format(desc, machine())
-
-        return '{} ({})'.format(platform, machine())
-
-    # Unknown platform
-    return platform
-
-
-def get_python_platform():
-    import platform
-    return platform.platform()
-
-
-def get_libc_version():
-    import platform
-    if get_platform() != 'linux':
-        return 'N/A'
-    return '-'.join(platform.libc_ver())
-
-
-def get_pip_packages(run_lambda):
-    """Returns `pip list` output. Note: will also find conda-installed pytorch
-    and numpy packages."""
-    # People generally have `pip` as `pip` or `pip3`
-    # But here it is incoved as `python -mpip`
-    def run_with_pip(pip):
-        if get_platform() == 'win32':
-            system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
-            findstr_cmd = os.path.join(system_root, 'System32', 'findstr')
-            grep_cmd = r'{} /R "numpy torch mypy"'.format(findstr_cmd)
-        else:
-            grep_cmd = r'grep "torch\|numpy\|mypy"'
-        return run_and_read_all(run_lambda, pip + ' list --format=freeze | ' + grep_cmd)
-
-    pip_version = 'pip3' if sys.version[0] == '3' else 'pip'
-    out = run_with_pip(sys.executable + ' -mpip')
-
-    return pip_version, out
-
-
-def get_cachingallocator_config():
-    ca_config = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', '')
-    return ca_config
-
-
-def get_env_info():
-    run_lambda = run
-    pip_version, pip_list_output = get_pip_packages(run_lambda)
-
-    if TORCH_AVAILABLE:
-        version_str = torch.__version__
-        debug_mode_str = str(torch.version.debug)
-        cuda_available_str = str(torch.cuda.is_available())
-        cuda_version_str = torch.version.cuda
-        if not hasattr(torch.version, 'hip') or torch.version.hip is None:  # cuda version
-            hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A'
-        else:  # HIP version
-            cfg = torch._C._show_config().split('\n')
-            hip_runtime_version = [s.rsplit(None, 1)[-1] for s in cfg if 'HIP Runtime' in s][0]
-            miopen_runtime_version = [s.rsplit(None, 1)[-1] for s in cfg if 'MIOpen' in s][0]
-            cuda_version_str = 'N/A'
-            hip_compiled_version = torch.version.hip
-    else:
-        version_str = debug_mode_str = cuda_available_str = cuda_version_str = 'N/A'
-        hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A'
-
-    sys_version = sys.version.replace("\n", " ")
-
-    return SystemEnv(
-        torch_version=version_str,
-        is_debug_build=debug_mode_str,
-        python_version='{} ({}-bit runtime)'.format(sys_version, sys.maxsize.bit_length() + 1),
-        python_platform=get_python_platform(),
-        is_cuda_available=cuda_available_str,
-        cuda_compiled_version=cuda_version_str,
-        cuda_runtime_version=get_running_cuda_version(run_lambda),
-        nvidia_gpu_models=get_gpu_info(run_lambda),
-        nvidia_driver_version=get_nvidia_driver_version(run_lambda),
-        cudnn_version=get_cudnn_version(run_lambda),
-        hip_compiled_version=hip_compiled_version,
-        hip_runtime_version=hip_runtime_version,
-        miopen_runtime_version=miopen_runtime_version,
-        pip_version=pip_version,
-        pip_packages=pip_list_output,
-        conda_packages=get_conda_packages(run_lambda),
-        os=get_os(run_lambda),
-        libc_version=get_libc_version(),
-        gcc_version=get_gcc_version(run_lambda),
-        clang_version=get_clang_version(run_lambda),
-        cmake_version=get_cmake_version(run_lambda),
-        caching_allocator_config=get_cachingallocator_config(),
-    )
-
-env_info_fmt = """
-PyTorch version: {torch_version}
-Is debug build: {is_debug_build}
-CUDA used to build PyTorch: {cuda_compiled_version}
-ROCM used to build PyTorch: {hip_compiled_version}
-
-OS: {os}
-GCC version: {gcc_version}
-Clang version: {clang_version}
-CMake version: {cmake_version}
-Libc version: {libc_version}
-
-Python version: {python_version}
-Python platform: {python_platform}
-Is CUDA available: {is_cuda_available}
-CUDA runtime version: {cuda_runtime_version}
-GPU models and configuration: {nvidia_gpu_models}
-Nvidia driver version: {nvidia_driver_version}
-cuDNN version: {cudnn_version}
-HIP runtime version: {hip_runtime_version}
-MIOpen runtime version: {miopen_runtime_version}
-
-Versions of relevant libraries:
-{pip_packages}
-{conda_packages}
-""".strip()
-
-
-def pretty_str(envinfo):
-    def replace_nones(dct, replacement='Could not collect'):
-        for key in dct.keys():
-            if dct[key] is not None:
-                continue
-            dct[key] = replacement
-        return dct
-
-    def replace_bools(dct, true='Yes', false='No'):
-        for key in dct.keys():
-            if dct[key] is True:
-                dct[key] = true
-            elif dct[key] is False:
-                dct[key] = false
-        return dct
-
-    def prepend(text, tag='[prepend]'):
-        lines = text.split('\n')
-        updated_lines = [tag + line for line in lines]
-        return '\n'.join(updated_lines)
-
-    def replace_if_empty(text, replacement='No relevant packages'):
-        if text is not None and len(text) == 0:
-            return replacement
-        return text
-
-    def maybe_start_on_next_line(string):
-        # If `string` is multiline, prepend a \n to it.
-        if string is not None and len(string.split('\n')) > 1:
-            return '\n{}\n'.format(string)
-        return string
-
-    mutable_dict = envinfo._asdict()
-
-    # If nvidia_gpu_models is multiline, start on the next line
-    mutable_dict['nvidia_gpu_models'] = \
-        maybe_start_on_next_line(envinfo.nvidia_gpu_models)
-
-    # If the machine doesn't have CUDA, report some fields as 'No CUDA'
-    dynamic_cuda_fields = [
-        'cuda_runtime_version',
-        'nvidia_gpu_models',
-        'nvidia_driver_version',
-    ]
-    all_cuda_fields = dynamic_cuda_fields + ['cudnn_version']
-    all_dynamic_cuda_fields_missing = all(
-        mutable_dict[field] is None for field in dynamic_cuda_fields)
-    if TORCH_AVAILABLE and not torch.cuda.is_available() and all_dynamic_cuda_fields_missing:
-        for field in all_cuda_fields:
-            mutable_dict[field] = 'No CUDA'
-        if envinfo.cuda_compiled_version is None:
-            mutable_dict['cuda_compiled_version'] = 'None'
-
-    # Replace True with Yes, False with No
-    mutable_dict = replace_bools(mutable_dict)
-
-    # Replace all None objects with 'Could not collect'
-    mutable_dict = replace_nones(mutable_dict)
-
-    # If either of these are '', replace with 'No relevant packages'
-    mutable_dict['pip_packages'] = replace_if_empty(mutable_dict['pip_packages'])
-    mutable_dict['conda_packages'] = replace_if_empty(mutable_dict['conda_packages'])
-
-    # Tag conda and pip packages with a prefix
-    # If they were previously None, they'll show up as ie '[conda] Could not collect'
-    if mutable_dict['pip_packages']:
-        mutable_dict['pip_packages'] = prepend(mutable_dict['pip_packages'],
-                                               '[{}] '.format(envinfo.pip_version))
-    if mutable_dict['conda_packages']:
-        mutable_dict['conda_packages'] = prepend(mutable_dict['conda_packages'],
-                                                 '[conda] ')
-    return env_info_fmt.format(**mutable_dict)
-
-
-def get_pretty_env_info():
-    return pretty_str(get_env_info())
-
-
-def main():
-    print("Collecting environment information...")
-    output = get_pretty_env_info()
-    print(output)
-
-    if TORCH_AVAILABLE and hasattr(torch, 'utils') and hasattr(torch.utils, '_crash_handler'):
-        minidump_dir = torch.utils._crash_handler.DEFAULT_MINIDUMP_DIR
-        if sys.platform == "linux" and os.path.exists(minidump_dir):
-            dumps = [os.path.join(minidump_dir, dump) for dump in os.listdir(minidump_dir)]
-            latest = max(dumps, key=os.path.getctime)
-            ctime = os.path.getctime(latest)
-            creation_time = datetime.datetime.fromtimestamp(ctime).strftime('%Y-%m-%d %H:%M:%S')
-            msg = "\n*** Detected a minidump at {} created on {}, ".format(latest, creation_time) + \
-                  "if this is related to your bug please include it when you file a report ***"
-            print(msg, file=sys.stderr)
-
-
-
-if __name__ == '__main__':
-    main()

From 23da3ef7111696b7ec4a51c7b17551b46457219a Mon Sep 17 00:00:00 2001
From: huynh-anh <huynh-anh.le@halodi.com>
Date: Mon, 14 Feb 2022 19:58:46 +0000
Subject: [PATCH 3/4] removed import because deprecated

---
 adet/layers/csrc/ml_nms/ml_nms.cu | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/adet/layers/csrc/ml_nms/ml_nms.cu b/adet/layers/csrc/ml_nms/ml_nms.cu
index 13be92a13..5fa3d7d19 100644
--- a/adet/layers/csrc/ml_nms/ml_nms.cu
+++ b/adet/layers/csrc/ml_nms/ml_nms.cu
@@ -3,10 +3,12 @@
 #include <ATen/cuda/CUDAContext.h>
 #include <THC/THC.h>
 #include <ATen/cuda/DeviceUtils.cuh>
+#include <ATen/ceil_div.h>
 
 #include <vector>
 #include <iostream>
 
+namespace cuda {
 int const threadsPerBlock = sizeof(unsigned long long) * 8;
 
 __device__ inline float devIoU(float const * const a, float const * const b) {
@@ -82,7 +84,7 @@ at::Tensor ml_nms_cuda(const at::Tensor boxes, const float nms_overlap_thresh) {
 
   int boxes_num = boxes.size(0);
 
-  const int col_blocks = ceil_div(boxes_num, threadsPerBlock);
+  const int col_blocks = cuda::ATenCeilDiv(boxes_num, threadsPerBlock);
 
   scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();
 
@@ -135,5 +137,5 @@ at::Tensor ml_nms_cuda(const at::Tensor boxes, const float nms_overlap_thresh) {
                          order_t.device(), keep.scalar_type())
                      }).sort(0, false));
 }
-
+}
 } // namespace adet
\ No newline at end of file

From efab1f51e9cc892a2f8a78490b43b8118d3045da Mon Sep 17 00:00:00 2001
From: huynh-anh <huynh-anh.le@halodi.com>
Date: Mon, 14 Feb 2022 21:11:44 +0000
Subject: [PATCH 4/4] replaced deprecated function/api call

---
 adet/layers/csrc/ml_nms/ml_nms.cu | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/adet/layers/csrc/ml_nms/ml_nms.cu b/adet/layers/csrc/ml_nms/ml_nms.cu
index 5fa3d7d19..c565da7e5 100644
--- a/adet/layers/csrc/ml_nms/ml_nms.cu
+++ b/adet/layers/csrc/ml_nms/ml_nms.cu
@@ -4,11 +4,10 @@
 #include <THC/THC.h>
 #include <ATen/cuda/DeviceUtils.cuh>
 #include <ATen/ceil_div.h>
-
+#include <ATen/cuda/ThrustAllocator.h>
 #include <vector>
 #include <iostream>
 
-namespace cuda {
 int const threadsPerBlock = sizeof(unsigned long long) * 8;
 
 __device__ inline float devIoU(float const * const a, float const * const b) {
@@ -67,7 +66,7 @@ __global__ void ml_nms_kernel(const int n_boxes, const float nms_overlap_thresh,
         t |= 1ULL << i;
       }
     }
-    const int col_blocks = ceil_div(n_boxes, threadsPerBlock);
+    const int col_blocks = at::ceil_div(n_boxes, threadsPerBlock);
     dev_mask[cur_box_idx * col_blocks + col_start] = t;
   }
 }
@@ -84,20 +83,20 @@ at::Tensor ml_nms_cuda(const at::Tensor boxes, const float nms_overlap_thresh) {
 
   int boxes_num = boxes.size(0);
 
-  const int col_blocks = cuda::ATenCeilDiv(boxes_num, threadsPerBlock);
+  const int col_blocks = at::ceil_div(boxes_num, threadsPerBlock);
 
   scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();
 
-  THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
+  // THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
 
   unsigned long long* mask_dev = NULL;
   //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
   //                      boxes_num * col_blocks * sizeof(unsigned long long)));
 
-  mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
+  mask_dev = (unsigned long long*) c10::cuda::CUDACachingAllocator::raw_alloc(boxes_num * col_blocks * sizeof(unsigned long long));
 
-  dim3 blocks(ATenCeilDiv(boxes_num, threadsPerBlock),
-              ATenCeilDiv(boxes_num, threadsPerBlock));
+  dim3 blocks(at::ceil_div(boxes_num, threadsPerBlock),
+              at::ceil_div(boxes_num, threadsPerBlock));
   dim3 threads(threadsPerBlock);
   ml_nms_kernel<<<blocks, threads>>>(boxes_num,
                                   nms_overlap_thresh,
@@ -105,7 +104,7 @@ at::Tensor ml_nms_cuda(const at::Tensor boxes, const float nms_overlap_thresh) {
                                   mask_dev);
 
   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
-  THCudaCheck(cudaMemcpy(&mask_host[0],
+  c10::cuda::CUDACachingAllocator::raw_alloc(cudaMemcpy(&mask_host[0],
                         mask_dev,
                         sizeof(unsigned long long) * boxes_num * col_blocks,
                         cudaMemcpyDeviceToHost));
@@ -130,12 +129,12 @@ at::Tensor ml_nms_cuda(const at::Tensor boxes, const float nms_overlap_thresh) {
     }
   }
 
-  THCudaFree(state, mask_dev);
+ c10::cuda::CUDACachingAllocator::raw_delete(mask_dev);
   // TODO improve this part
   return std::get<0>(order_t.index({
                        keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
                          order_t.device(), keep.scalar_type())
                      }).sort(0, false));
 }
-}
+
 } // namespace adet
\ No newline at end of file