From aa76b30773a98764976488895c359ef0f3032b22 Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Wed, 9 Aug 2023 13:45:36 -0400 Subject: [PATCH 01/23] Remove unused lambda captures --- pybind_interface/pybind_main.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pybind_interface/pybind_main.cpp b/pybind_interface/pybind_main.cpp index c5615ebc..63a03e4a 100644 --- a/pybind_interface/pybind_main.cpp +++ b/pybind_interface/pybind_main.cpp @@ -377,7 +377,7 @@ std::vector> qsim_simulate(const py::dict &options) { std::vector> amplitudes; amplitudes.reserve(bitstrings.size()); - auto measure = [&bitstrings, &circuit, &litudes]( + auto measure = [&bitstrings, &litudes]( unsigned k, const StateSpace &state_space, const State &state) { for (const auto &b : bitstrings) { @@ -481,7 +481,7 @@ std::vector> qtrajectory_simulate(const py::dict &options) { Simulator simulator = factory.CreateSimulator(); StateSpace state_space = factory.CreateStateSpace(); - auto measure = [&bitstrings, &ncircuit, &litudes, &state_space]( + auto measure = [&bitstrings, &litudes, &state_space]( unsigned k, const State &state, Runner::Stat& stat) { for (const auto &b : bitstrings) { amplitudes.push_back(state_space.GetAmpl(state, b)); @@ -1082,7 +1082,7 @@ std::vector qtrajectory_sample(const py::dict &options) { std::vector> results; - auto measure = [&results, &ncircuit, &state_space]( + auto measure = [&results, &ncircuit]( unsigned k, const State& state, Runner::Stat& stat) { // Converts stat (which matches the MeasurementResult 'bits' field) into // bitstrings matching the MeasurementResult 'bitstring' field. From 036654dfa75e450551b6d9a0db41055566e2efce Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Wed, 9 Aug 2023 13:48:30 -0400 Subject: [PATCH 02/23] Remove unused variable --- lib/fuser_mqubit.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/fuser_mqubit.h b/lib/fuser_mqubit.h index aa4e6305..fdbfddb0 100644 --- a/lib/fuser_mqubit.h +++ b/lib/fuser_mqubit.h @@ -561,8 +561,6 @@ class MultiQubitGateFuser final : public Fuser { static void FuseOrphanedGates(unsigned max_fused_size, Stat& stat, std::vector& orphaned_gates, std::vector& fused_gates) { - unsigned count = 0; - for (std::size_t i = 0; i < orphaned_gates.size(); ++i) { auto ogate1 = orphaned_gates[i]; @@ -575,8 +573,6 @@ class MultiQubitGateFuser final : public Fuser { if (ogate2->visited == kFinal) continue; - ++count; - unsigned cur_size = ogate1->qubits.size() + ogate2->qubits.size(); if (cur_size <= max_fused_size) { From 61c9d09d22924edd242e3e2e1ee16244501d9ab7 Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Wed, 9 Aug 2023 13:53:55 -0400 Subject: [PATCH 03/23] Add missing CUDA error checks --- lib/statespace_cuda.h | 2 +- lib/vectorspace_cuda.h | 28 ++++++++++++++++------------ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/lib/statespace_cuda.h b/lib/statespace_cuda.h index 82f5128b..8f3b2055 100644 --- a/lib/statespace_cuda.h +++ b/lib/statespace_cuda.h @@ -102,7 +102,7 @@ class StateSpaceCUDA : } void SetAllZeros(State& state) const { - cudaMemset(state.get(), 0, MinSize(state.num_qubits()) * sizeof(fp_type)); + ErrorCheck(cudaMemset(state.get(), 0, MinSize(state.num_qubits()) * sizeof(fp_type))); } // Uniform superposition. diff --git a/lib/vectorspace_cuda.h b/lib/vectorspace_cuda.h index 0c1d6167..de210734 100644 --- a/lib/vectorspace_cuda.h +++ b/lib/vectorspace_cuda.h @@ -28,7 +28,7 @@ namespace detail { inline void do_not_free(void*) {} inline void free(void* ptr) { - cudaFree(ptr); + ErrorCheck(cudaFree(ptr)); } } // namespace detail @@ -114,9 +114,10 @@ class VectorSpaceCUDA { return false; } - cudaMemcpy(dest.get(), src.get(), - sizeof(fp_type) * Impl::MinSize(src.num_qubits()), - cudaMemcpyDeviceToDevice); + ErrorCheck( + cudaMemcpy(dest.get(), src.get(), + sizeof(fp_type) * Impl::MinSize(src.num_qubits()), + cudaMemcpyDeviceToDevice)); return true; } @@ -124,9 +125,10 @@ class VectorSpaceCUDA { // It is the client's responsibility to make sure that dest has at least // Impl::MinSize(src.num_qubits()) elements. bool Copy(const Vector& src, fp_type* dest) const { - cudaMemcpy(dest, src.get(), - sizeof(fp_type) * Impl::MinSize(src.num_qubits()), - cudaMemcpyDeviceToHost); + ErrorCheck( + cudaMemcpy(dest, src.get(), + sizeof(fp_type) * Impl::MinSize(src.num_qubits()), + cudaMemcpyDeviceToHost)); return true; } @@ -134,9 +136,10 @@ class VectorSpaceCUDA { // It is the client's responsibility to make sure that src has at least // Impl::MinSize(dest.num_qubits()) elements. bool Copy(const fp_type* src, Vector& dest) const { - cudaMemcpy(dest.get(), src, - sizeof(fp_type) * Impl::MinSize(dest.num_qubits()), - cudaMemcpyHostToDevice); + ErrorCheck( + cudaMemcpy(dest.get(), src, + sizeof(fp_type) * Impl::MinSize(dest.num_qubits()), + cudaMemcpyHostToDevice)); return true; } @@ -145,12 +148,13 @@ class VectorSpaceCUDA { // min(size, Impl::MinSize(dest.num_qubits())) elements. bool Copy(const fp_type* src, uint64_t size, Vector& dest) const { size = std::min(size, Impl::MinSize(dest.num_qubits())); - cudaMemcpy(dest.get(), src, sizeof(fp_type) * size, cudaMemcpyHostToDevice); + ErrorCheck( + cudaMemcpy(dest.get(), src, sizeof(fp_type) * size, cudaMemcpyHostToDevice)); return true; } void DeviceSync() { - cudaDeviceSynchronize(); + ErrorCheck(cudaDeviceSynchronize()); } protected: From caa5463d6e0f6ee52719d6d84594b06774218594 Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Wed, 16 Aug 2023 15:13:35 -0400 Subject: [PATCH 04/23] Add CUDA to HIP translation --- lib/cuda2hip.h | 61 +++++++++++++++++++++++++++++++++++ lib/simulator_cuda_kernels.h | 13 +++++--- lib/statespace_cuda.h | 7 +++- lib/statespace_cuda_kernels.h | 7 +++- lib/util_cuda.h | 6 +++- lib/vectorspace_cuda.h | 9 ++++-- 6 files changed, 94 insertions(+), 9 deletions(-) create mode 100644 lib/cuda2hip.h diff --git a/lib/cuda2hip.h b/lib/cuda2hip.h new file mode 100644 index 00000000..da2d074c --- /dev/null +++ b/lib/cuda2hip.h @@ -0,0 +1,61 @@ +// Copyright 2023 Advanced Micro Devices, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SIMULATOR_CUDA2HIP_H_ +#define SIMULATOR_CUDA2HIP_H_ + +#define cublasCaxpy hipblasCaxpy +#define cublasCdotc hipblasCdotc +#define cublasCreate hipblasCreate +#define cublasCscal hipblasCscal +#define cublasCsscal hipblasCsscal +#define cublasDestroy hipblasDestroy +#define cublasDznrm2 hipblasDznrm2 +#define cublasHandle_t hipblasHandle_t +#define cublasScnrm2 hipblasScnrm2 +#define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS +#define cublasStatus_t hipblasStatus_t +#define cublasZaxpy hipblasZaxpy +#define cublasZdotc hipblasZdotc +#define cublasZdscal hipblasZdscal +#define cublasZscal hipblasZscal +#define cuCimagf hipCimagf +#define cuCimag hipCimag +#define cuComplex hipComplex +#define cuCrealf hipCrealf +#define cuCreal hipCreal +#define CUDA_C_32F HIPBLAS_C_32F +#define CUDA_C_64F HIPBLAS_C_64F +#define cudaDeviceSynchronize hipDeviceSynchronize +#define cudaError_t hipError_t +#define cudaFree hipFree +#define cudaGetErrorString hipGetErrorString +#define cudaMalloc hipMalloc +#define cudaMemcpyAsync hipMemcpyAsync +#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice +#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost +#define cudaMemcpy hipMemcpy +#define cudaMemcpyHostToDevice hipMemcpyHostToDevice +#define cudaMemset hipMemset +#define cudaPeekAtLastError hipPeekAtLastError +#define cudaSuccess hipSuccess +#define cuDoubleComplex hipDoubleComplex + +template +__device__ __forceinline__ T __shfl_down_sync( + unsigned mask, T var, unsigned int delta, int width = warpSize) { + return __shfl_down(var, delta, width); +} + +#endif // SIMULATOR_CUDA2HIP_H_ diff --git a/lib/simulator_cuda_kernels.h b/lib/simulator_cuda_kernels.h index 6510fadf..e21a9d62 100644 --- a/lib/simulator_cuda_kernels.h +++ b/lib/simulator_cuda_kernels.h @@ -15,10 +15,15 @@ #ifndef SIMULATOR_CUDA_KERNELS_H_ #define SIMULATOR_CUDA_KERNELS_H_ -#include -#include - -#include "util_cuda.h" +#ifdef __NVCC__ + #include + #include + + #include "util_cuda.h" +#elif __HIP__ + #include + #include "cuda2hip.h" +#endif namespace qsim { diff --git a/lib/statespace_cuda.h b/lib/statespace_cuda.h index 8f3b2055..81780150 100644 --- a/lib/statespace_cuda.h +++ b/lib/statespace_cuda.h @@ -15,7 +15,12 @@ #ifndef STATESPACE_CUDA_H_ #define STATESPACE_CUDA_H_ -#include +#ifdef __NVCC__ + #include +#elif __HIP__ + #include + #include "cuda2hip.h" +#endif #include #include diff --git a/lib/statespace_cuda_kernels.h b/lib/statespace_cuda_kernels.h index bcb7fd25..b54ebca9 100644 --- a/lib/statespace_cuda_kernels.h +++ b/lib/statespace_cuda_kernels.h @@ -15,7 +15,12 @@ #ifndef STATESPACE_CUDA_KERNELS_H_ #define STATESPACE_CUDA_KERNELS_H_ -#include +#ifdef __NVCC__ + #include +#elif __HIP__ + #include + #include "cuda2hip.h" +#endif #include "util_cuda.h" diff --git a/lib/util_cuda.h b/lib/util_cuda.h index 591d852d..5d8cb5df 100644 --- a/lib/util_cuda.h +++ b/lib/util_cuda.h @@ -15,7 +15,11 @@ #ifndef UTIL_CUDA_H_ #define UTIL_CUDA_H_ -#include +#ifdef __NVCC__ + #include +#elif __HIP__ + #include +#endif #include diff --git a/lib/vectorspace_cuda.h b/lib/vectorspace_cuda.h index de210734..ebf21095 100644 --- a/lib/vectorspace_cuda.h +++ b/lib/vectorspace_cuda.h @@ -15,8 +15,13 @@ #ifndef VECTORSPACE_CUDA_H_ #define VECTORSPACE_CUDA_H_ -#include -#include +#ifdef __NVCC__ + #include + #include +#elif __HIP__ + #include + #include "cuda2hip.h" +#endif #include #include From 6dd69e2095393697083d449b755e1eaf7d2675bd Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Wed, 16 Aug 2023 15:42:57 -0400 Subject: [PATCH 05/23] Add HIP to pybind11 --- pybind_interface/decide/decide.cpp | 4 +- pybind_interface/hip/CMakeLists.txt | 28 ++++++++++++++ pybind_interface/hip/pybind_main_hip.cpp | 47 ++++++++++++++++++++++++ pybind_interface/hip/pybind_main_hip.h | 17 +++++++++ qsimcirq/__init__.py | 2 + setup.py | 1 + 6 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 pybind_interface/hip/CMakeLists.txt create mode 100644 pybind_interface/hip/pybind_main_hip.cpp create mode 100644 pybind_interface/hip/pybind_main_hip.h diff --git a/pybind_interface/decide/decide.cpp b/pybind_interface/decide/decide.cpp index 6355ad3b..5e9a63f1 100644 --- a/pybind_interface/decide/decide.cpp +++ b/pybind_interface/decide/decide.cpp @@ -60,7 +60,7 @@ int detect_instructions() { } enum GPUCapabilities { - CUDA = 0, CUSTATEVEC = 1, NO_GPU = 10, NO_CUSTATEVEC = 11 }; + CUDA = 0, CUSTATEVEC = 1, HIP = 2, NO_GPU = 10, NO_CUSTATEVEC = 11 }; // For now, GPU detection is performed at compile time, as our wheels are // generated on Github Actions runners which do not have GPU support. @@ -70,6 +70,8 @@ enum GPUCapabilities { int detect_gpu() { #ifdef __NVCC__ GPUCapabilities gpu = CUDA; + #elif __HIP__ + GPUCapabilities gpu = HIP; #else GPUCapabilities gpu = NO_GPU; #endif diff --git a/pybind_interface/hip/CMakeLists.txt b/pybind_interface/hip/CMakeLists.txt new file mode 100644 index 00000000..3e89113d --- /dev/null +++ b/pybind_interface/hip/CMakeLists.txt @@ -0,0 +1,28 @@ +cmake_minimum_required(VERSION 3.18) +project(qsim LANGUAGES CXX HIP) + +IF (WIN32) + set(CMAKE_CXX_FLAGS "/O2 /openmp") +ELSE() + set(CMAKE_CXX_FLAGS "-O3 -fopenmp") +ENDIF() + +if(APPLE) + set(CMAKE_CXX_STANDARD 14) + include_directories("/usr/local/include" "/usr/local/opt/llvm/include") + link_directories("/usr/local/lib" "/usr/local/opt/llvm/lib") +endif() + +INCLUDE(../GetPybind11.cmake) +find_package(PythonLibs 3.7 REQUIRED) +find_package(HIP REQUIRED) + +include_directories(${PYTHON_INCLUDE_DIRS} ${pybind11_SOURCE_DIR}/include) + +hip_add_library(qsim_hip MODULE pybind_main_hip.cpp) + +set_target_properties(qsim_hip PROPERTIES + PREFIX "${PYTHON_MODULE_PREFIX}" + SUFFIX "${PYTHON_MODULE_EXTENSION}" +) +set_source_files_properties(pybind_main_hip.cpp PROPERTIES LANGUAGE HIP) diff --git a/pybind_interface/hip/pybind_main_hip.cpp b/pybind_interface/hip/pybind_main_hip.cpp new file mode 100644 index 00000000..d6ee181f --- /dev/null +++ b/pybind_interface/hip/pybind_main_hip.cpp @@ -0,0 +1,47 @@ +// Copyright 2019 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "pybind_main_hip.h" + +#include "../../lib/simulator_cuda.h" + +namespace qsim { + using Simulator = SimulatorCUDA; + + struct Factory { + using Simulator = qsim::Simulator; + using StateSpace = Simulator::StateSpace; + + Factory( + unsigned num_sim_threads, + unsigned num_state_threads, + unsigned num_dblocks + ) : ss_params{num_state_threads, num_dblocks} {} + + StateSpace CreateStateSpace() const { + return StateSpace(ss_params); + } + + Simulator CreateSimulator() const { + return Simulator(); + } + + StateSpace::Parameter ss_params; + }; + + inline void SetFlushToZeroAndDenormalsAreZeros() {} + inline void ClearFlushToZeroAndDenormalsAreZeros() {} +} + +#include "../pybind_main.cpp" diff --git a/pybind_interface/hip/pybind_main_hip.h b/pybind_interface/hip/pybind_main_hip.h new file mode 100644 index 00000000..55a672d5 --- /dev/null +++ b/pybind_interface/hip/pybind_main_hip.h @@ -0,0 +1,17 @@ +// Copyright 2019 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "../pybind_main.h" + +PYBIND11_MODULE(qsim_hip, m) { GPU_MODULE_BINDINGS } diff --git a/qsimcirq/__init__.py b/qsimcirq/__init__.py index 0f022a1a..d9c5504d 100644 --- a/qsimcirq/__init__.py +++ b/qsimcirq/__init__.py @@ -34,6 +34,8 @@ def _load_qsim_gpu(): instr = qsim_decide.detect_gpu() if instr == 0: qsim_gpu = importlib.import_module("qsimcirq.qsim_cuda") + elif instr == 2: + qsim_gpu = importlib.import_module("qsimcirq.qsim_hip") else: qsim_gpu = None return qsim_gpu diff --git a/setup.py b/setup.py index c57a37a9..eafa4387 100644 --- a/setup.py +++ b/setup.py @@ -111,6 +111,7 @@ def build_extension(self, ext): CMakeExtension("qsimcirq/qsim_cuda"), CMakeExtension("qsimcirq/qsim_custatevec"), CMakeExtension("qsimcirq/qsim_decide"), + CMakeExtension("qsimcirq/qsim_hip"), ], cmdclass=dict(build_ext=CMakeBuild), zip_safe=False, From 5edafd6893f910733d1e5441e9796d587faa20f9 Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Wed, 16 Aug 2023 15:44:17 -0400 Subject: [PATCH 06/23] Add HIP to make and cmake files --- CMakeLists.txt | 8 ++++++- Makefile | 16 +++++++++++++ apps/Makefile | 9 ++++++++ apps/make.sh | 16 +++++++++---- pybind_interface/Makefile | 32 ++++++++++++++++++++------ pybind_interface/decide/CMakeLists.txt | 24 +++++++++++++++++-- tests/Makefile | 13 +++++++++++ tests/make.sh | 28 ++++++++++++++-------- 8 files changed, 121 insertions(+), 25 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 615a33db..f902a421 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,13 @@ cmake_minimum_required(VERSION 3.11) execute_process(COMMAND which nvcc OUTPUT_VARIABLE has_nvcc) if(has_nvcc STREQUAL "") - project(qsim) + execute_process(COMMAND which hipcc OUTPUT_VARIABLE has_hipcc) + if(has_hipcc STREQUAL "") + project(qsim) + else() + project(qsim LANGUAGES CXX HIP) + ADD_SUBDIRECTORY(pybind_interface/hip) + endif() else() project(qsim LANGUAGES CXX CUDA) ADD_SUBDIRECTORY(pybind_interface/cuda) diff --git a/Makefile b/Makefile index 9ef0afdd..227ca603 100644 --- a/Makefile +++ b/Makefile @@ -6,10 +6,12 @@ TESTS = run-cxx-tests CXX=g++ NVCC=nvcc +HIPCC=hipcc CXXFLAGS = -O3 -fopenmp ARCHFLAGS = -march=native NVCCFLAGS = -O3 +HIPCCFLAGS = -O3 # CUQUANTUM_ROOT should be set. CUSTATEVECFLAGS = -I$(CUQUANTUM_ROOT)/include -L${CUQUANTUM_ROOT}/lib -L$(CUQUANTUM_ROOT)/lib64 -lcustatevec -lcublas @@ -22,6 +24,8 @@ export ARCHFLAGS export NVCC export NVCCFLAGS export CUSTATEVECFLAGS +export HIPCC +export HIPCCFLAGS ifeq ($(PYBIND11), true) TARGETS += pybind @@ -43,6 +47,10 @@ qsim-cuda: qsim-custatevec: $(MAKE) -C apps/ qsim-custatevec +.PHONY: qsim-hip +qsim-hip: + $(MAKE) -C apps/ qsim-hip + .PHONY: pybind pybind: $(MAKE) -C pybind_interface/ pybind @@ -59,6 +67,10 @@ cuda-tests: custatevec-tests: $(MAKE) -C tests/ custatevec-tests +.PHONY: hip-tests +hip-tests: + $(MAKE) -C tests/ hip-tests + .PHONY: run-cxx-tests run-cxx-tests: cxx-tests $(MAKE) -C tests/ run-cxx-tests @@ -71,6 +83,10 @@ run-cuda-tests: cuda-tests run-custatevec-tests: custatevec-tests $(MAKE) -C tests/ run-custatevec-tests +.PHONY: run-hip-tests +run-hip-tests: hip-tests + $(MAKE) -C tests/ run-hip-tests + PYTESTS = $(shell find qsimcirq_tests/ -name '*_test.py') .PHONY: run-py-tests diff --git a/apps/Makefile b/apps/Makefile index 41fb81e5..48b25cab 100644 --- a/apps/Makefile +++ b/apps/Makefile @@ -7,6 +7,9 @@ CUDA_TARGETS := $(CUDA_TARGETS:%cuda.cu=%cuda.x) CUSTATEVEC_TARGETS = $(shell find . -maxdepth 1 -name "*custatevec.cu") CUSTATEVEC_TARGETS := $(CUSTATEVEC_TARGETS:%custatevec.cu=%custatevec.x) +HIP_TARGETS = $(shell find . -maxdepth 1 -name '*cuda.cu') +HIP_TARGETS := $(HIP_TARGETS:%cuda.cu=%hip.x) + .PHONY: qsim qsim: $(CXX_TARGETS) @@ -16,6 +19,9 @@ qsim-cuda: $(CUDA_TARGETS) .PHONY: qsim-custatevec qsim-custatevec: $(CUSTATEVEC_TARGETS) +.PHONY: qsim-hip +qsim-hip: $(HIP_TARGETS) + %.x: %.cc $(CXX) -o ./$@ $< $(CXXFLAGS) $(ARCHFLAGS) @@ -25,6 +31,9 @@ qsim-custatevec: $(CUSTATEVEC_TARGETS) %custatevec.x: %custatevec.cu $(NVCC) -o ./$@ $< $(NVCCFLAGS) $(CUSTATEVECFLAGS) +%hip.x: %cuda.cu + $(HIPCC) -o ./$@ $< $(HIPCCFLAGS) + .PHONY: clean clean: -rm -f ./*.x ./*.a ./*.so ./*.mod diff --git a/apps/make.sh b/apps/make.sh index f2e777e5..5ddd4d6b 100755 --- a/apps/make.sh +++ b/apps/make.sh @@ -23,9 +23,15 @@ g++ -O3 -march=native -fopenmp -o qsim_amplitudes.x qsim_amplitudes.cc g++ -O3 -march=native -fopenmp -o qsimh_base.x qsimh_base.cc g++ -O3 -march=native -fopenmp -o qsimh_amplitudes.x qsimh_amplitudes.cc -nvcc -O3 -o qsim_base_cuda.x qsim_base_cuda.cu -nvcc -O3 -o qsim_qtrajectory_cuda.x qsim_qtrajectory_cuda.cu +if command -v nvcc &>/dev/null; then + nvcc -O3 -o qsim_base_cuda.x qsim_base_cuda.cu + nvcc -O3 -o qsim_qtrajectory_cuda.x qsim_qtrajectory_cuda.cu -# CUQUANTUM_ROOT should be set. -CUSTATEVECFLAGS="-I${CUQUANTUM_ROOT}/include -L${CUQUANTUM_ROOT}/lib -L${CUQUANTUM_ROOT}/lib64 -lcustatevec -lcublas" -nvcc -O3 $CUSTATEVECFLAGS -o qsim_base_custatevec.x qsim_base_custatevec.cu + if [ -n "$CUQUANTUM_ROOT" ]; then + CUSTATEVECFLAGS="-I${CUQUANTUM_ROOT}/include -L${CUQUANTUM_ROOT}/lib -L${CUQUANTUM_ROOT}/lib64 -lcustatevec -lcublas" + nvcc -O3 $CUSTATEVECFLAGS -o qsim_base_custatevec.x qsim_base_custatevec.cu + fi +elif command -v hipcc &>/dev/null; then + hipcc -O3 -o qsim_base_hip.x qsim_base_cuda.cu + hipcc -O3 -o qsim_qtrajectory_hip.x qsim_qtrajectory_cuda.cu +fi diff --git a/pybind_interface/Makefile b/pybind_interface/Makefile index c7c70860..60e7cc99 100644 --- a/pybind_interface/Makefile +++ b/pybind_interface/Makefile @@ -5,6 +5,7 @@ QSIMLIB_AVX2 = ../qsimcirq/qsim_avx2`python3-config --extension-suffix` QSIMLIB_AVX512 = ../qsimcirq/qsim_avx512`python3-config --extension-suffix` QSIMLIB_CUDA = ../qsimcirq/qsim_cuda`python3-config --extension-suffix` QSIMLIB_CUSTATEVEC = ../qsimcirq/qsim_custatevec`python3-config --extension-suffix` +QSIMLIB_HIP = ../qsimcirq/qsim_hip`python3-config --extension-suffix` QSIMLIB_DECIDE = ../qsimcirq/qsim_decide`python3-config --extension-suffix` # The flags for the compilation of the simd-specific Pybind11 interfaces @@ -13,21 +14,29 @@ PYBINDFLAGS_SSE = -msse4.1 -Wall -shared -std=c++17 -fPIC `python3 -m pybind11 - PYBINDFLAGS_AVX2 = -mavx2 -mfma -Wall -shared -std=c++17 -fPIC `python3 -m pybind11 --includes` PYBINDFLAGS_AVX512 = -mavx512f -mbmi2 -Wall -shared -std=c++17 -fPIC `python3 -m pybind11 --includes` -# The flags for the compilation of GPU-specific Pybind11 interfaces +# The flags for the compilation of CUDA-specific Pybind11 interfaces PYBINDFLAGS_CUDA = -std=c++17 -x cu -Xcompiler "-Wall -shared -fPIC `python3 -m pybind11 --includes`" # The flags for the compilation of cuStateVec-specific Pybind11 interfaces PYBINDFLAGS_CUSTATEVEC = $(CUSTATEVECFLAGS) $(PYBINDFLAGS_CUDA) +# The flags for the compilation of HIP-specific Pybind11 interfaces +PYBINDFLAGS_HIP = -std=c++17 -Wall -shared -fPIC `python3 -m pybind11 --includes` + # Check for nvcc to decide compilation mode. ifeq ($(shell which $(NVCC)),) +# Check for hipcc to decide compilation mode. +ifeq ($(shell which $(HIPCC)),) pybind: pybind-cpu decide-cpu else +pybind: pybind-cpu pybind-hip decide-hip +endif +else # Check for the cuStateVec library. ifeq ($(CUQUANTUM_ROOT),) -pybind: pybind-cpu pybind-gpu decide-gpu +pybind: pybind-cpu pybind-cuda decide-cuda else -pybind: pybind-cpu pybind-gpu pybind-custatevec decide-custatevec +pybind: pybind-cpu pybind-cuda pybind-custatevec decide-custatevec endif endif @@ -40,14 +49,15 @@ pybind-cpu: .PHONY: decide-cpu decide-cpu: + echo "building decide-cpu" $(CXX) decide/decide.cpp -o $(QSIMLIB_DECIDE) $(CXXFLAGS) $(PYBINDFLAGS_BASIC) -.PHONY: pybind-gpu -pybind-gpu: +.PHONY: pybind-cuda +pybind-cuda: $(NVCC) cuda/pybind_main_cuda.cpp -o $(QSIMLIB_CUDA) $(NVCCFLAGS) $(PYBINDFLAGS_CUDA) -.PHONY: decide-gpu -decide-gpu: +.PHONY: decide-cuda +decide-cuda: $(NVCC) decide/decide.cpp -o $(QSIMLIB_DECIDE) $(NVCCFLAGS) $(PYBINDFLAGS_CUDA) .PHONY: pybind-custatevec @@ -58,6 +68,14 @@ pybind-custatevec: decide-custatevec: $(NVCC) decide/decide.cpp -D__CUSTATEVEC__ -o $(QSIMLIB_DECIDE) $(NVCCFLAGS) $(PYBINDFLAGS_CUDA) +.PHONY: pybind-hip +pybind-hip: + $(HIPCC) hip/pybind_main_hip.cpp -o $(QSIMLIB_HIP) $(HIPCCFLAGS) $(PYBINDFLAGS_HIP) + +.PHONY: decide-hip +decide-hip: + $(HIPCC) decide/decide.cpp -o $(QSIMLIB_DECIDE) $(HIPCCFLAGS) $(PYBINDFLAGS_HIP) + .PHONY: clean clean: -rm -f ./basic/*.x ./basic/*.a ./basic/*.so ./basic/*.mod $(QSIMLIB_BASIC) diff --git a/pybind_interface/decide/CMakeLists.txt b/pybind_interface/decide/CMakeLists.txt index 441f3d66..2b28ca96 100644 --- a/pybind_interface/decide/CMakeLists.txt +++ b/pybind_interface/decide/CMakeLists.txt @@ -2,7 +2,12 @@ cmake_minimum_required(VERSION 3.11) execute_process(COMMAND which nvcc OUTPUT_VARIABLE has_nvcc) if(has_nvcc STREQUAL "") - project(qsim) + execute_process(COMMAND which hipcc OUTPUT_VARIABLE has_hipcc) + if(has_hipcc STREQUAL "") + project(qsim) + else() + project(qsim LANGUAGES CXX HIP) + endif() else() project(qsim LANGUAGES CXX CUDA) endif() @@ -22,7 +27,22 @@ endif() INCLUDE(../GetPybind11.cmake) if(has_nvcc STREQUAL "") - pybind11_add_module(qsim_decide decide.cpp) + if(has_hipcc STREQUAL "") + pybind11_add_module(qsim_decide decide.cpp) + else() + find_package(HIP REQUIRED) + find_package(PythonLibs 3.7 REQUIRED) + + include_directories(${PYTHON_INCLUDE_DIRS} ${pybind11_SOURCE_DIR}/include) + + hip_add_library(qsim_decide MODULE decide.cpp) + + set_target_properties(qsim_decide PROPERTIES + PREFIX "${PYTHON_MODULE_PREFIX}" + SUFFIX "${PYTHON_MODULE_EXTENSION}" + ) + set_source_files_properties(decide.cpp PROPERTIES LANGUAGE HIP) + endif() else() find_package(PythonLibs 3.7 REQUIRED) find_package(CUDA REQUIRED) diff --git a/tests/Makefile b/tests/Makefile index f4a37278..6a81e70b 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -14,6 +14,9 @@ CUDA_TARGETS := $(CUDA_TARGETS:%cuda_test.cu=%cuda_test.x) CUSTATEVEC_TARGETS = $(shell find . -maxdepth 1 -name "*custatevec_test.cu") CUSTATEVEC_TARGETS := $(CUSTATEVEC_TARGETS:%custatevec_test.cu=%custatevec_test.x) +HIP_TARGETS = $(shell find . -maxdepth 1 -name "*cuda_test.cu") +HIP_TARGETS := $(HIP_TARGETS:%cuda_test.cu=%hip_test.x) + GTEST_DIR = $(CURDIR)/googletest/googletest GMOCK_DIR = $(CURDIR)/googletest/googlemock @@ -30,6 +33,9 @@ cuda-tests: $(CUDA_TARGETS) .PHONY: custatevec-tests custatevec-tests: $(CUSTATEVEC_TARGETS) +.PHONY: hip-tests +hip-tests: $(HIP_TARGETS) + .PHONY: run-cxx-tests run-cxx-tests: cxx-tests for exe in $(CXX_TARGETS); do if ! ./$$exe; then exit 1; fi; done @@ -42,6 +48,10 @@ run-cuda-tests: cuda-tests run-custatevec-tests: custatevec-tests for exe in $(CUSTATEVEC_TARGETS); do if ! ./$$exe; then exit 1; fi; done +.PHONY: run-hip-tests +run-hip-tests: hip-tests + for exe in $(HIP_TARGETS); do if ! ./$$exe; then exit 1; fi; done + $(GTEST_DIR)/make: -git submodule update --init --recursive googletest mkdir -p $(GTEST_DIR)/make @@ -56,6 +66,9 @@ $(GTEST_DIR)/make: %custatevec_test.x: %custatevec_test.cu $(GTEST_DIR)/make $(NVCC) -o ./$@ $< $(TESTFLAGS) $(NVCCFLAGS) $(CUSTATEVECFLAGS) +%hip_test.x: %cuda_test.cu $(GTEST_DIR)/make + $(HIPCC) -o ./$@ $< $(TESTFLAGS) $(HIPCCFLAGS) + .PHONY: clean clean: -rm -f ./*.x ./*.a ./*.so ./*.mod diff --git a/tests/make.sh b/tests/make.sh index 890663da..b38742df 100755 --- a/tests/make.sh +++ b/tests/make.sh @@ -55,14 +55,22 @@ g++ -O3 -I$path_to_include -L$path_to_lib -fopenmp -o unitaryspace_basic_test.x g++ -O3 -I$path_to_include -L$path_to_lib -msse4 -fopenmp -o unitaryspace_sse_test.x unitaryspace_sse_test.cc -lgtest -lpthread g++ -O3 -I$path_to_include -L$path_to_lib -o vectorspace_test.x vectorspace_test.cc -lgtest -lpthread -nvcc -O3 -I$path_to_include -L$path_to_lib -o hybrid_cuda_test.x hybrid_cuda_test.cu -lgtest -lpthread -nvcc -O3 -I$path_to_include -L$path_to_lib -o qtrajectory_cuda_test.x qtrajectory_cuda_test.cu -lgtest -lpthread -nvcc -O3 -I$path_to_include -L$path_to_lib -o simulator_cuda_test.x simulator_cuda_test.cu -lgtest -lpthread -nvcc -O3 -I$path_to_include -L$path_to_lib -o statespace_cuda_test.x statespace_cuda_test.cu -lgtest -lpthread +if command -v nvcc &>/dev/null; then + nvcc -O3 -I$path_to_include -L$path_to_lib -o hybrid_cuda_test.x hybrid_cuda_test.cu -lgtest -lpthread + nvcc -O3 -I$path_to_include -L$path_to_lib -o qtrajectory_cuda_test.x qtrajectory_cuda_test.cu -lgtest -lpthread + nvcc -O3 -I$path_to_include -L$path_to_lib -o simulator_cuda_test.x simulator_cuda_test.cu -lgtest -lpthread + nvcc -O3 -I$path_to_include -L$path_to_lib -o statespace_cuda_test.x statespace_cuda_test.cu -lgtest -lpthread -# CUQUANTUM_ROOT should be set. -CUSTATEVECFLAGS="-I${CUQUANTUM_ROOT}/include -L${CUQUANTUM_ROOT}/lib -L${CUQUANTUM_ROOT}/lib64 -lcustatevec -lcublas" -nvcc -O3 $CUSTATEVECFLAGS -I$path_to_include -L$path_to_lib -o hybrid_custatevec_test.x hybrid_custatevec_test.cu -lgtest -lpthread -nvcc -O3 $CUSTATEVECFLAGS -I$path_to_include -L$path_to_lib -o qtrajectory_custatevec_test.x qtrajectory_custatevec_test.cu -lgtest -lpthread -nvcc -O3 $CUSTATEVECFLAGS -I$path_to_include -L$path_to_lib -o simulator_custatevec_test.x simulator_custatevec_test.cu -lgtest -lpthread -nvcc -O3 $CUSTATEVECFLAGS -I$path_to_include -L$path_to_lib -o statespace_custatevec_test.x statespace_custatevec_test.cu -lgtest -lpthread + if [ -n "$CUQUANTUM_ROOT" ]; then + CUSTATEVECFLAGS="-I${CUQUANTUM_ROOT}/include -L${CUQUANTUM_ROOT}/lib -L${CUQUANTUM_ROOT}/lib64 -lcustatevec -lcublas" + nvcc -O3 $CUSTATEVECFLAGS -I$path_to_include -L$path_to_lib -o hybrid_custatevec_test.x hybrid_custatevec_test.cu -lgtest -lpthread + nvcc -O3 $CUSTATEVECFLAGS -I$path_to_include -L$path_to_lib -o qtrajectory_custatevec_test.x qtrajectory_custatevec_test.cu -lgtest -lpthread + nvcc -O3 $CUSTATEVECFLAGS -I$path_to_include -L$path_to_lib -o simulator_custatevec_test.x simulator_custatevec_test.cu -lgtest -lpthread + nvcc -O3 $CUSTATEVECFLAGS -I$path_to_include -L$path_to_lib -o statespace_custatevec_test.x statespace_custatevec_test.cu -lgtest -lpthread + fi +elif command -v hipcc &>/dev/null; then + hipcc -O3 -I$path_to_include -L$path_to_lib -o hybrid_hip_test.x hybrid_cuda_test.cu -lgtest -lpthread + hipcc -O3 -I$path_to_include -L$path_to_lib -o qtrajectory_hip_test.x qtrajectory_cuda_test.cu -lgtest -lpthread + hipcc -O3 -I$path_to_include -L$path_to_lib -o simulator_hip_test.x simulator_cuda_test.cu -lgtest -lpthread + hipcc -O3 -I$path_to_include -L$path_to_lib -o statespace_hip_test.x statespace_cuda_test.cu -lgtest -lpthread +fi From c80f91d0140ba47e10fbbeeb55d65521eb9a2cb6 Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Fri, 18 Aug 2023 12:56:29 -0400 Subject: [PATCH 07/23] Add HIP cleanup in pybind_interface --- pybind_interface/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/pybind_interface/Makefile b/pybind_interface/Makefile index 60e7cc99..253d3f85 100644 --- a/pybind_interface/Makefile +++ b/pybind_interface/Makefile @@ -83,5 +83,6 @@ clean: -rm -f ./avx2/*.x ./avx2/*.a ./avx2/*.so ./avx2/*.mod $(QSIMLIB_AVX2) -rm -f ./avx512/*.x ./avx512/*.a ./avx512/*.so ./avx512/*.mod $(QSIMLIB_AVX512) -rm -f ./cuda/*.x ./cuda/*.a ./cuda/*.so ./cuda/*.mod $(QSIMLIB_CUDA) + -rm -f ./hip/*.x ./hip/*.a ./hip/*.so ./hip/*.mod $(QSIMLIB_HIP) -rm -f ./custatevec/*.x ./custatevec/*.a ./custatevec/*.so ./custatevec/*.mod $(QSIMLIB_CUSTATEVEC) -rm -f ./decide/*.x ./decide/*.a ./decide/*.so ./decide/*.mod $(QSIMLIB_DECIDE) From 09611cfd2185bdf376f9575459201556c2aafdeb Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Fri, 18 Aug 2023 14:19:29 -0400 Subject: [PATCH 08/23] Remove APPLE from pybind for HIP --- pybind_interface/hip/CMakeLists.txt | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pybind_interface/hip/CMakeLists.txt b/pybind_interface/hip/CMakeLists.txt index 3e89113d..982579fb 100644 --- a/pybind_interface/hip/CMakeLists.txt +++ b/pybind_interface/hip/CMakeLists.txt @@ -7,12 +7,6 @@ ELSE() set(CMAKE_CXX_FLAGS "-O3 -fopenmp") ENDIF() -if(APPLE) - set(CMAKE_CXX_STANDARD 14) - include_directories("/usr/local/include" "/usr/local/opt/llvm/include") - link_directories("/usr/local/lib" "/usr/local/opt/llvm/lib") -endif() - INCLUDE(../GetPybind11.cmake) find_package(PythonLibs 3.7 REQUIRED) find_package(HIP REQUIRED) From d732ea83b5ad321c63191a7f25b1664e70dfd05d Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Fri, 18 Aug 2023 14:20:52 -0400 Subject: [PATCH 09/23] Use HIPCC for CPU and GPU pybind files --- pybind_interface/Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pybind_interface/Makefile b/pybind_interface/Makefile index 253d3f85..95f64b32 100644 --- a/pybind_interface/Makefile +++ b/pybind_interface/Makefile @@ -29,7 +29,7 @@ ifeq ($(shell which $(NVCC)),) ifeq ($(shell which $(HIPCC)),) pybind: pybind-cpu decide-cpu else -pybind: pybind-cpu pybind-hip decide-hip +pybind: pybind-hip decide-hip endif else # Check for the cuStateVec library. @@ -70,6 +70,10 @@ decide-custatevec: .PHONY: pybind-hip pybind-hip: + $(HIPCC) basic/pybind_main_basic.cpp -o $(QSIMLIB_BASIC) $(CXXFLAGS) $(PYBINDFLAGS_BASIC) + $(HIPCC) sse/pybind_main_sse.cpp -o $(QSIMLIB_SSE) $(CXXFLAGS) $(PYBINDFLAGS_SSE) + $(HIPCC) avx2/pybind_main_avx2.cpp -o $(QSIMLIB_AVX2) $(CXXFLAGS) $(PYBINDFLAGS_AVX2) + $(HIPCC) avx512/pybind_main_avx512.cpp -o $(QSIMLIB_AVX512) $(CXXFLAGS) $(PYBINDFLAGS_AVX512) $(HIPCC) hip/pybind_main_hip.cpp -o $(QSIMLIB_HIP) $(HIPCCFLAGS) $(PYBINDFLAGS_HIP) .PHONY: decide-hip From 31fff3bd04d3933b892f2ee5e5924e83f7dae86f Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Fri, 18 Aug 2023 14:23:01 -0400 Subject: [PATCH 10/23] Add CMAKE_MODULE_PATH for pybind with HIP --- pybind_interface/decide/CMakeLists.txt | 1 + pybind_interface/hip/CMakeLists.txt | 2 ++ 2 files changed, 3 insertions(+) diff --git a/pybind_interface/decide/CMakeLists.txt b/pybind_interface/decide/CMakeLists.txt index 2b28ca96..b94b0ade 100644 --- a/pybind_interface/decide/CMakeLists.txt +++ b/pybind_interface/decide/CMakeLists.txt @@ -30,6 +30,7 @@ if(has_nvcc STREQUAL "") if(has_hipcc STREQUAL "") pybind11_add_module(qsim_decide decide.cpp) else() + list(APPEND CMAKE_MODULE_PATH "/opt/rocm/lib/cmake/hip") find_package(HIP REQUIRED) find_package(PythonLibs 3.7 REQUIRED) diff --git a/pybind_interface/hip/CMakeLists.txt b/pybind_interface/hip/CMakeLists.txt index 982579fb..fe4b1c54 100644 --- a/pybind_interface/hip/CMakeLists.txt +++ b/pybind_interface/hip/CMakeLists.txt @@ -9,6 +9,8 @@ ENDIF() INCLUDE(../GetPybind11.cmake) find_package(PythonLibs 3.7 REQUIRED) + +list(APPEND CMAKE_MODULE_PATH "/opt/rocm/lib/cmake/hip") find_package(HIP REQUIRED) include_directories(${PYTHON_INCLUDE_DIRS} ${pybind11_SOURCE_DIR}/include) From 9e12b68ca77ceab07e656b93721cf17bbe4b2937 Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Fri, 18 Aug 2023 14:24:13 -0400 Subject: [PATCH 11/23] Add hipcc detection in setup.py --- setup.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/setup.py b/setup.py index eafa4387..593620a6 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ import os import re import sys +import shutil import platform import subprocess @@ -63,6 +64,12 @@ def build_extension(self, ext): "-DCMAKE_CXX_COMPILER=/usr/local/opt/llvm/bin/clang++", ] + if shutil.which("hipcc") is not None: + cmake_args += [ + "-DCMAKE_C_COMPILER=hipcc", + "-DCMAKE_CXX_COMPILER=hipcc", + ] + env = os.environ.copy() env["CXXFLAGS"] = '{} -DVERSION_INFO=\\"{}\\"'.format( env.get("CXXFLAGS", ""), self.distribution.get_version() From 7de0100c906e43d4eb0b3e8586cfc56bf14f96c4 Mon Sep 17 00:00:00 2001 From: eliottrosenberg <61400172+eliottrosenberg@users.noreply.github.com> Date: Sat, 19 Aug 2023 18:35:22 -0400 Subject: [PATCH 12/23] Fix typo in docs --- docs/choose_hw.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/choose_hw.md b/docs/choose_hw.md index ff692530..96721ed7 100644 --- a/docs/choose_hw.md +++ b/docs/choose_hw.md @@ -258,7 +258,7 @@ are “embarrassingly parallelizable”, there is an automated workflow for distributing these trajectories over multiple nodes. A simulation of many noiseless circuits can also be distributed over multiple compute nodes. -For mor information about running a mulitnode simulation, see [Multinode quantum +For more information about running a mulitnode simulation, see [Multinode quantum simulation using HTCondor on Google Cloud](/qsim/tutorials/multinode). ## Runtime estimates From d25454331acdd3f31d8049f45b99b6a0637cbde2 Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Mon, 16 Oct 2023 14:43:59 -0400 Subject: [PATCH 13/23] Fix style --- lib/statespace_cuda.h | 3 ++- lib/vectorspace_cuda.h | 22 ++++++++++++---------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/lib/statespace_cuda.h b/lib/statespace_cuda.h index 81780150..660db074 100644 --- a/lib/statespace_cuda.h +++ b/lib/statespace_cuda.h @@ -107,7 +107,8 @@ class StateSpaceCUDA : } void SetAllZeros(State& state) const { - ErrorCheck(cudaMemset(state.get(), 0, MinSize(state.num_qubits()) * sizeof(fp_type))); + ErrorCheck(cudaMemset(state.get(), 0, + MinSize(state.num_qubits()) * sizeof(fp_type))); } // Uniform superposition. diff --git a/lib/vectorspace_cuda.h b/lib/vectorspace_cuda.h index ebf21095..fd91553d 100644 --- a/lib/vectorspace_cuda.h +++ b/lib/vectorspace_cuda.h @@ -120,9 +120,9 @@ class VectorSpaceCUDA { } ErrorCheck( - cudaMemcpy(dest.get(), src.get(), - sizeof(fp_type) * Impl::MinSize(src.num_qubits()), - cudaMemcpyDeviceToDevice)); + cudaMemcpy(dest.get(), src.get(), + sizeof(fp_type) * Impl::MinSize(src.num_qubits()), + cudaMemcpyDeviceToDevice)); return true; } @@ -131,9 +131,9 @@ class VectorSpaceCUDA { // Impl::MinSize(src.num_qubits()) elements. bool Copy(const Vector& src, fp_type* dest) const { ErrorCheck( - cudaMemcpy(dest, src.get(), - sizeof(fp_type) * Impl::MinSize(src.num_qubits()), - cudaMemcpyDeviceToHost)); + cudaMemcpy(dest, src.get(), + sizeof(fp_type) * Impl::MinSize(src.num_qubits()), + cudaMemcpyDeviceToHost)); return true; } @@ -142,9 +142,9 @@ class VectorSpaceCUDA { // Impl::MinSize(dest.num_qubits()) elements. bool Copy(const fp_type* src, Vector& dest) const { ErrorCheck( - cudaMemcpy(dest.get(), src, - sizeof(fp_type) * Impl::MinSize(dest.num_qubits()), - cudaMemcpyHostToDevice)); + cudaMemcpy(dest.get(), src, + sizeof(fp_type) * Impl::MinSize(dest.num_qubits()), + cudaMemcpyHostToDevice)); return true; } @@ -154,7 +154,9 @@ class VectorSpaceCUDA { bool Copy(const fp_type* src, uint64_t size, Vector& dest) const { size = std::min(size, Impl::MinSize(dest.num_qubits())); ErrorCheck( - cudaMemcpy(dest.get(), src, sizeof(fp_type) * size, cudaMemcpyHostToDevice)); + cudaMemcpy(dest.get(), src, + sizeof(fp_type) * size, + cudaMemcpyHostToDevice)); return true; } From 36e9b0094da45ab0315b20dd4abd89c4a37e1bc0 Mon Sep 17 00:00:00 2001 From: eliottrosenberg <61400172+eliottrosenberg@users.noreply.github.com> Date: Sat, 19 Aug 2023 18:35:22 -0400 Subject: [PATCH 14/23] Fix typo in docs --- docs/choose_hw.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/choose_hw.md b/docs/choose_hw.md index ff692530..96721ed7 100644 --- a/docs/choose_hw.md +++ b/docs/choose_hw.md @@ -258,7 +258,7 @@ are “embarrassingly parallelizable”, there is an automated workflow for distributing these trajectories over multiple nodes. A simulation of many noiseless circuits can also be distributed over multiple compute nodes. -For mor information about running a mulitnode simulation, see [Multinode quantum +For more information about running a mulitnode simulation, see [Multinode quantum simulation using HTCondor on Google Cloud](/qsim/tutorials/multinode). ## Runtime estimates From 0f044539e7b94ee3c22120ea82d6a892d2e45026 Mon Sep 17 00:00:00 2001 From: angelo-laskaris Date: Fri, 20 Oct 2023 11:50:49 -0400 Subject: [PATCH 15/23] Adding Confusion Matrix check for QSim. If a confusion matrix is specified in a cirquit and Qsim is the target simulator a ValueError Exception will be thrown until confusion maps for Measurement Gates is supported in QSim. --- qsimcirq/qsim_circuit.py | 20 +++++++++++++++++++- qsimcirq/qsim_simulator.py | 2 +- qsimcirq_tests/qsimcirq_test.py | 14 +++++++++++++- 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/qsimcirq/qsim_circuit.py b/qsimcirq/qsim_circuit.py index 0da46be0..9d33881e 100644 --- a/qsimcirq/qsim_circuit.py +++ b/qsimcirq/qsim_circuit.py @@ -343,7 +343,6 @@ def __init__( cirq_circuit: cirq.Circuit, allow_decomposition: bool = False, ): - if allow_decomposition: super().__init__() for moment in cirq_circuit: @@ -352,6 +351,7 @@ def __init__( self.append(op) else: super().__init__(cirq_circuit) + self._check_for_confusion_matrix() def __eq__(self, other): if not isinstance(other, QSimCircuit): @@ -364,6 +364,24 @@ def _resolve_parameters_( ): return QSimCircuit(cirq.resolve_parameters(super(), param_resolver, recursive)) + def _check_for_confusion_matrix(self): + """Checks cirq Circuit for Measurement Gates with confusion matrices. + Returns: + Throws a runtime exception if a MeasurementGate with a confusion matrix is included in the circuit + """ + confusion_maps_on_measurement_gates = [ + op.gate.confusion_map + for _, op, _ in self.findall_operations_with_gate_type(cirq.MeasurementGate) + if op.gate.confusion_map + ] + for confusion_map in confusion_maps_on_measurement_gates: + for map_values in confusion_map.values(): + if map_values: + raise ValueError( + "Confusion Matrices are not currently supported in Qsim. " + "See https://github.com/quantumlib/Cirq/issues/6305 for latest status" + ) + def translate_cirq_to_qsim( self, qubit_order: cirq.QubitOrderOrList = cirq.QubitOrder.DEFAULT ) -> qsim.Circuit: diff --git a/qsimcirq/qsim_simulator.py b/qsimcirq/qsim_simulator.py index 1bf0a4d0..2513ba50 100644 --- a/qsimcirq/qsim_simulator.py +++ b/qsimcirq/qsim_simulator.py @@ -213,7 +213,7 @@ def _run( """Run a simulation, mimicking quantum hardware. Args: - program: The circuit to simulate. + circuit: The circuit to simulate. param_resolver: Parameters to run with the program. repetitions: Number of times to repeat the run. diff --git a/qsimcirq_tests/qsimcirq_test.py b/qsimcirq_tests/qsimcirq_test.py index e0d19fae..40d0c261 100644 --- a/qsimcirq_tests/qsimcirq_test.py +++ b/qsimcirq_tests/qsimcirq_test.py @@ -336,6 +336,19 @@ def test_numpy_params(): qsim_result = qsim_simulator.simulate_sweep(circuit, params=prs) +def test_confusion_matrix_exception(): + qubit = cirq.LineQubit(0) + cmap = {(0,): np.array([[0.8, 0.2], [0.2, 0.8]])} + circuit = cirq.Circuit() + circuit += cirq.X(qubit) + circuit += cirq.MeasurementGate(1, confusion_map=cmap)(qubit) + x, y = sympy.Symbol("x"), sympy.Symbol("y") + prs = [{x: np.int64(0), y: np.int64(1)}] + qsim_simulator = qsimcirq.QSimSimulator() + with pytest.raises(ValueError): + _ = qsim_simulator.simulate_sweep(circuit, params=prs) + + def test_invalid_params(): # Parameters must have numeric values. q0 = cirq.LineQubit(0) @@ -1219,7 +1232,6 @@ def test_multi_qubit_fusion(): @pytest.mark.parametrize("mode", ["noiseless", "noisy"]) def test_cirq_qsim_simulate_random_unitary(mode: str): - q0, q1 = cirq.LineQubit.range(2) options = qsimcirq.QSimOptions(cpu_threads=16, verbosity=0) qsimSim = qsimcirq.QSimSimulator(qsim_options=options) From a3a08922348c854c7edcf71b1bba4f683c80749e Mon Sep 17 00:00:00 2001 From: Orion Martin <40585662+95-martin-orion@users.noreply.github.com> Date: Mon, 30 Oct 2023 10:22:09 -0700 Subject: [PATCH 16/23] Update version to 0.17.0 --- qsimcirq/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qsimcirq/_version.py b/qsimcirq/_version.py index 95385f6b..54f88311 100644 --- a/qsimcirq/_version.py +++ b/qsimcirq/_version.py @@ -1,3 +1,3 @@ """The version number defined here is read automatically in setup.py.""" -__version__ = "0.16.3" +__version__ = "0.17.0" From 0b64da5ff648524f8cb34ebfa2a43b7590252ef7 Mon Sep 17 00:00:00 2001 From: Orion Martin <40585662+95-martin-orion@users.noreply.github.com> Date: Mon, 30 Oct 2023 10:48:52 -0700 Subject: [PATCH 17/23] Pin CI python version --- .github/workflows/testing_wheels.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/testing_wheels.yml b/.github/workflows/testing_wheels.yml index 5ef42431..4d3c1ca3 100644 --- a/.github/workflows/testing_wheels.yml +++ b/.github/workflows/testing_wheels.yml @@ -50,6 +50,8 @@ jobs: # Used to host cibuildwheel - uses: actions/setup-python@v2 + with: + python-version: '3.11' - name: Install cibuildwheel and twine run: python -m pip install cibuildwheel==2.12.3 From 9b247d1fbe6a28c5986ba06465ba021b395c6091 Mon Sep 17 00:00:00 2001 From: Orion Martin <40585662+95-martin-orion@users.noreply.github.com> Date: Mon, 30 Oct 2023 10:49:49 -0700 Subject: [PATCH 18/23] and in release --- .github/workflows/release_wheels.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/release_wheels.yml b/.github/workflows/release_wheels.yml index 9775c09f..292f4dc5 100644 --- a/.github/workflows/release_wheels.yml +++ b/.github/workflows/release_wheels.yml @@ -45,6 +45,8 @@ jobs: # Used to host cibuildwheel - uses: actions/setup-python@v2 + with: + python-version: '3.11' - name: Install cibuildwheel and twine run: python -m pip install cibuildwheel==2.12.3 From a1ab2e2700fc3f948da72f1355755ce79ebd03eb Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Tue, 7 Nov 2023 14:14:54 -0500 Subject: [PATCH 19/23] Add documentation for AMD GPU support --- docs/_book.yaml | 2 + docs/tutorials/amd_gpu.md | 82 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 docs/tutorials/amd_gpu.md diff --git a/docs/_book.yaml b/docs/_book.yaml index 2fe9a2c8..05c862b3 100644 --- a/docs/_book.yaml +++ b/docs/_book.yaml @@ -26,6 +26,8 @@ upper_tabs: path: /qsim/tutorials/q32d14 - title: "Simulate noise" path: /qsim/tutorials/noisy_qsimcirq + - title: "AMD GPU support" + path: /qsim/tutorials/amd_gpu - name: "Guides" contents: diff --git a/docs/tutorials/amd_gpu.md b/docs/tutorials/amd_gpu.md new file mode 100644 index 00000000..dcd29500 --- /dev/null +++ b/docs/tutorials/amd_gpu.md @@ -0,0 +1,82 @@ +# Support for AMD Instinct™ MI Series Accelerators + +Qsim provides support for AMD Instinct accelerators. +The implementation covers the native GPU support in Qsim +by utilizing [HIP](https://rocm.docs.amd.com/projects/HIP) +(Heterogeneous-Compute Interface for Portability). +The cuQuantum implementation is currently not covered. + +## Building + +To enable support for AMD Instinct GPUs, Qsim needs to be built from sources. +This can be done as follows: + +``` +conda env list +conda create -y -n CirqDevEnv python=3 +conda activate CirqDevEnv +pip install pybind11 + +git clone https://github.com/quantumlib/qsim.git +cd qsim + +make -j qsim # to build CPU qsim +make -j qsim-hip # to build HIP qsim +make -j pybind # to build Python bindings +make -j cxx-tests # to build CPU tests +make -j hip-tests # to build HIP tests + +pip install . +``` + +Note: To avoid problems when building Qsim with support for AMD GPUs, +make sure to use the latest version of CMake. + +## Testing + +### Simulator + +To test the Qsim simulator: + +``` +make run-cxx-tests # to run CPU tests +make run-hip-tests # to run HIP tests +``` + +or + +``` +cd tests +for file in *.x; do ./"$file"; done # to run all tests +for file in *_hip_test.x; do ./"$file"; done # to run HIP tests only +``` + +### Python Bindings + +To test the Python bindings: + +``` +make run-py-tests +``` + +or + +``` +cd qsimcirq_tests +python3 -m pytest -v qsimcirq_test.py +``` + +## Using + +Using Qsim on AMD Instinct GPUs is identical to using it on NVIDIA GPUs. +I.e., it is done by passing `use_gpu=True` and `gpu_mode=0` as `qsimcirq.QSimOptions`: + +``` +simulator = qsimcirq.QSimSimulator(qsim_options=qsimcirq.QSimOptions( + use_gpu=True, + gpu_mode=0, + ... + )) +``` + +Note: `gpu_mode` has to be set to zero for AMD GPUs, as cuStateVec is not supported. From 56c27c60db5a5725c078b96818fa272d95ca82de Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Wed, 8 Nov 2023 12:13:00 -0500 Subject: [PATCH 20/23] Replace Qsim with qsim in amd_gpu.md --- docs/tutorials/amd_gpu.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/tutorials/amd_gpu.md b/docs/tutorials/amd_gpu.md index dcd29500..11965c2d 100644 --- a/docs/tutorials/amd_gpu.md +++ b/docs/tutorials/amd_gpu.md @@ -1,14 +1,14 @@ # Support for AMD Instinct™ MI Series Accelerators -Qsim provides support for AMD Instinct accelerators. -The implementation covers the native GPU support in Qsim +qsim provides support for AMD Instinct accelerators. +The implementation covers the native GPU support in qsim by utilizing [HIP](https://rocm.docs.amd.com/projects/HIP) (Heterogeneous-Compute Interface for Portability). The cuQuantum implementation is currently not covered. ## Building -To enable support for AMD Instinct GPUs, Qsim needs to be built from sources. +To enable support for AMD Instinct GPUs, qsim needs to be built from sources. This can be done as follows: ``` @@ -29,14 +29,14 @@ make -j hip-tests # to build HIP tests pip install . ``` -Note: To avoid problems when building Qsim with support for AMD GPUs, +Note: To avoid problems when building qsim with support for AMD GPUs, make sure to use the latest version of CMake. ## Testing ### Simulator -To test the Qsim simulator: +To test the qsim simulator: ``` make run-cxx-tests # to run CPU tests @@ -68,7 +68,7 @@ python3 -m pytest -v qsimcirq_test.py ## Using -Using Qsim on AMD Instinct GPUs is identical to using it on NVIDIA GPUs. +Using qsim on AMD Instinct GPUs is identical to using it on NVIDIA GPUs. I.e., it is done by passing `use_gpu=True` and `gpu_mode=0` as `qsimcirq.QSimOptions`: ``` From 62d64c4198ae8701b9bac7337bc28959fd855053 Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Wed, 8 Nov 2023 12:27:33 -0500 Subject: [PATCH 21/23] Add pointers to AMD ROCm Platform to amd_gpu.md --- docs/tutorials/amd_gpu.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/tutorials/amd_gpu.md b/docs/tutorials/amd_gpu.md index 11965c2d..e7c5f613 100644 --- a/docs/tutorials/amd_gpu.md +++ b/docs/tutorials/amd_gpu.md @@ -2,13 +2,17 @@ qsim provides support for AMD Instinct accelerators. The implementation covers the native GPU support in qsim -by utilizing [HIP](https://rocm.docs.amd.com/projects/HIP) +by utilizing [AMD HIP SDK](https://rocm.docs.amd.com/projects/HIP) (Heterogeneous-Compute Interface for Portability). The cuQuantum implementation is currently not covered. ## Building -To enable support for AMD Instinct GPUs, qsim needs to be built from sources. +Building qsim with support for AMD Instinct accelerators requires installation of +[AMD ROCm™ Open Software Platform](https://www.amd.com/en/developer/resources/rocm-hub.html). +Instructions for installing ROCm are available at https://rocm.docs.amd.com/. + +To enable support for AMD GPUs, qsim needs to be built from sources. This can be done as follows: ``` From f734cabc5436ba4dfbc70b9fcd404c2953ca8fad Mon Sep 17 00:00:00 2001 From: Orion Martin <40585662+95-martin-orion@users.noreply.github.com> Date: Thu, 9 Nov 2023 13:50:36 -0800 Subject: [PATCH 22/23] Update version to 0.18.0 --- qsimcirq/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qsimcirq/_version.py b/qsimcirq/_version.py index 54f88311..802744d1 100644 --- a/qsimcirq/_version.py +++ b/qsimcirq/_version.py @@ -1,3 +1,3 @@ """The version number defined here is read automatically in setup.py.""" -__version__ = "0.17.0" +__version__ = "0.18.0" From 9efb8320a5bb9f43281cd54ecaae05bb30b1e285 Mon Sep 17 00:00:00 2001 From: Orion Martin <40585662+95-martin-orion@users.noreply.github.com> Date: Mon, 13 Nov 2023 10:07:30 -0800 Subject: [PATCH 23/23] bigger boot --- docs/tutorials/gcp_gpu.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorials/gcp_gpu.md b/docs/tutorials/gcp_gpu.md index 09a642e7..f79a5be8 100644 --- a/docs/tutorials/gcp_gpu.md +++ b/docs/tutorials/gcp_gpu.md @@ -34,7 +34,7 @@ instance section, ensure that your VM has the following properties: * In the **Boot disk** section, click the **Change** button: 1. In the **Operating System** option, choose **Ubuntu**. 2. In the **Version** option, choose **20.04 LTS**. - 3. In the **Size** field, enter **30** (minimum). + 3. In the **Size** field, enter **100** (minimum known to be sufficient). * The instructions above override steps 3 through 5 in the [Create a Linux VM instance](https://cloud.google.com/compute/docs/quickstart-linux) Quickstart.