diff --git a/.github/workflows/release_wheels.yml b/.github/workflows/release_wheels.yml index 9775c09f..292f4dc5 100644 --- a/.github/workflows/release_wheels.yml +++ b/.github/workflows/release_wheels.yml @@ -45,6 +45,8 @@ jobs: # Used to host cibuildwheel - uses: actions/setup-python@v2 + with: + python-version: '3.11' - name: Install cibuildwheel and twine run: python -m pip install cibuildwheel==2.12.3 diff --git a/.github/workflows/testing_wheels.yml b/.github/workflows/testing_wheels.yml index 5ef42431..4d3c1ca3 100644 --- a/.github/workflows/testing_wheels.yml +++ b/.github/workflows/testing_wheels.yml @@ -50,6 +50,8 @@ jobs: # Used to host cibuildwheel - uses: actions/setup-python@v2 + with: + python-version: '3.11' - name: Install cibuildwheel and twine run: python -m pip install cibuildwheel==2.12.3 diff --git a/CMakeLists.txt b/CMakeLists.txt index 615a33db..f902a421 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,13 @@ cmake_minimum_required(VERSION 3.11) execute_process(COMMAND which nvcc OUTPUT_VARIABLE has_nvcc) if(has_nvcc STREQUAL "") - project(qsim) + execute_process(COMMAND which hipcc OUTPUT_VARIABLE has_hipcc) + if(has_hipcc STREQUAL "") + project(qsim) + else() + project(qsim LANGUAGES CXX HIP) + ADD_SUBDIRECTORY(pybind_interface/hip) + endif() else() project(qsim LANGUAGES CXX CUDA) ADD_SUBDIRECTORY(pybind_interface/cuda) diff --git a/Makefile b/Makefile index 9ef0afdd..227ca603 100644 --- a/Makefile +++ b/Makefile @@ -6,10 +6,12 @@ TESTS = run-cxx-tests CXX=g++ NVCC=nvcc +HIPCC=hipcc CXXFLAGS = -O3 -fopenmp ARCHFLAGS = -march=native NVCCFLAGS = -O3 +HIPCCFLAGS = -O3 # CUQUANTUM_ROOT should be set. CUSTATEVECFLAGS = -I$(CUQUANTUM_ROOT)/include -L${CUQUANTUM_ROOT}/lib -L$(CUQUANTUM_ROOT)/lib64 -lcustatevec -lcublas @@ -22,6 +24,8 @@ export ARCHFLAGS export NVCC export NVCCFLAGS export CUSTATEVECFLAGS +export HIPCC +export HIPCCFLAGS ifeq ($(PYBIND11), true) TARGETS += pybind @@ -43,6 +47,10 @@ qsim-cuda: qsim-custatevec: $(MAKE) -C apps/ qsim-custatevec +.PHONY: qsim-hip +qsim-hip: + $(MAKE) -C apps/ qsim-hip + .PHONY: pybind pybind: $(MAKE) -C pybind_interface/ pybind @@ -59,6 +67,10 @@ cuda-tests: custatevec-tests: $(MAKE) -C tests/ custatevec-tests +.PHONY: hip-tests +hip-tests: + $(MAKE) -C tests/ hip-tests + .PHONY: run-cxx-tests run-cxx-tests: cxx-tests $(MAKE) -C tests/ run-cxx-tests @@ -71,6 +83,10 @@ run-cuda-tests: cuda-tests run-custatevec-tests: custatevec-tests $(MAKE) -C tests/ run-custatevec-tests +.PHONY: run-hip-tests +run-hip-tests: hip-tests + $(MAKE) -C tests/ run-hip-tests + PYTESTS = $(shell find qsimcirq_tests/ -name '*_test.py') .PHONY: run-py-tests diff --git a/apps/Makefile b/apps/Makefile index 41fb81e5..48b25cab 100644 --- a/apps/Makefile +++ b/apps/Makefile @@ -7,6 +7,9 @@ CUDA_TARGETS := $(CUDA_TARGETS:%cuda.cu=%cuda.x) CUSTATEVEC_TARGETS = $(shell find . -maxdepth 1 -name "*custatevec.cu") CUSTATEVEC_TARGETS := $(CUSTATEVEC_TARGETS:%custatevec.cu=%custatevec.x) +HIP_TARGETS = $(shell find . -maxdepth 1 -name '*cuda.cu') +HIP_TARGETS := $(HIP_TARGETS:%cuda.cu=%hip.x) + .PHONY: qsim qsim: $(CXX_TARGETS) @@ -16,6 +19,9 @@ qsim-cuda: $(CUDA_TARGETS) .PHONY: qsim-custatevec qsim-custatevec: $(CUSTATEVEC_TARGETS) +.PHONY: qsim-hip +qsim-hip: $(HIP_TARGETS) + %.x: %.cc $(CXX) -o ./$@ $< $(CXXFLAGS) $(ARCHFLAGS) @@ -25,6 +31,9 @@ qsim-custatevec: $(CUSTATEVEC_TARGETS) %custatevec.x: %custatevec.cu $(NVCC) -o ./$@ $< $(NVCCFLAGS) $(CUSTATEVECFLAGS) +%hip.x: %cuda.cu + $(HIPCC) -o ./$@ $< $(HIPCCFLAGS) + .PHONY: clean clean: -rm -f ./*.x ./*.a ./*.so ./*.mod diff --git a/apps/make.sh b/apps/make.sh index f2e777e5..5ddd4d6b 100755 --- a/apps/make.sh +++ b/apps/make.sh @@ -23,9 +23,15 @@ g++ -O3 -march=native -fopenmp -o qsim_amplitudes.x qsim_amplitudes.cc g++ -O3 -march=native -fopenmp -o qsimh_base.x qsimh_base.cc g++ -O3 -march=native -fopenmp -o qsimh_amplitudes.x qsimh_amplitudes.cc -nvcc -O3 -o qsim_base_cuda.x qsim_base_cuda.cu -nvcc -O3 -o qsim_qtrajectory_cuda.x qsim_qtrajectory_cuda.cu +if command -v nvcc &>/dev/null; then + nvcc -O3 -o qsim_base_cuda.x qsim_base_cuda.cu + nvcc -O3 -o qsim_qtrajectory_cuda.x qsim_qtrajectory_cuda.cu -# CUQUANTUM_ROOT should be set. -CUSTATEVECFLAGS="-I${CUQUANTUM_ROOT}/include -L${CUQUANTUM_ROOT}/lib -L${CUQUANTUM_ROOT}/lib64 -lcustatevec -lcublas" -nvcc -O3 $CUSTATEVECFLAGS -o qsim_base_custatevec.x qsim_base_custatevec.cu + if [ -n "$CUQUANTUM_ROOT" ]; then + CUSTATEVECFLAGS="-I${CUQUANTUM_ROOT}/include -L${CUQUANTUM_ROOT}/lib -L${CUQUANTUM_ROOT}/lib64 -lcustatevec -lcublas" + nvcc -O3 $CUSTATEVECFLAGS -o qsim_base_custatevec.x qsim_base_custatevec.cu + fi +elif command -v hipcc &>/dev/null; then + hipcc -O3 -o qsim_base_hip.x qsim_base_cuda.cu + hipcc -O3 -o qsim_qtrajectory_hip.x qsim_qtrajectory_cuda.cu +fi diff --git a/docs/_book.yaml b/docs/_book.yaml index 2fe9a2c8..05c862b3 100644 --- a/docs/_book.yaml +++ b/docs/_book.yaml @@ -26,6 +26,8 @@ upper_tabs: path: /qsim/tutorials/q32d14 - title: "Simulate noise" path: /qsim/tutorials/noisy_qsimcirq + - title: "AMD GPU support" + path: /qsim/tutorials/amd_gpu - name: "Guides" contents: diff --git a/docs/choose_hw.md b/docs/choose_hw.md index ff692530..96721ed7 100644 --- a/docs/choose_hw.md +++ b/docs/choose_hw.md @@ -258,7 +258,7 @@ are “embarrassingly parallelizable”, there is an automated workflow for distributing these trajectories over multiple nodes. A simulation of many noiseless circuits can also be distributed over multiple compute nodes. -For mor information about running a mulitnode simulation, see [Multinode quantum +For more information about running a mulitnode simulation, see [Multinode quantum simulation using HTCondor on Google Cloud](/qsim/tutorials/multinode). ## Runtime estimates diff --git a/docs/tutorials/amd_gpu.md b/docs/tutorials/amd_gpu.md new file mode 100644 index 00000000..e7c5f613 --- /dev/null +++ b/docs/tutorials/amd_gpu.md @@ -0,0 +1,86 @@ +# Support for AMD Instinct™ MI Series Accelerators + +qsim provides support for AMD Instinct accelerators. +The implementation covers the native GPU support in qsim +by utilizing [AMD HIP SDK](https://rocm.docs.amd.com/projects/HIP) +(Heterogeneous-Compute Interface for Portability). +The cuQuantum implementation is currently not covered. + +## Building + +Building qsim with support for AMD Instinct accelerators requires installation of +[AMD ROCm™ Open Software Platform](https://www.amd.com/en/developer/resources/rocm-hub.html). +Instructions for installing ROCm are available at https://rocm.docs.amd.com/. + +To enable support for AMD GPUs, qsim needs to be built from sources. +This can be done as follows: + +``` +conda env list +conda create -y -n CirqDevEnv python=3 +conda activate CirqDevEnv +pip install pybind11 + +git clone https://github.com/quantumlib/qsim.git +cd qsim + +make -j qsim # to build CPU qsim +make -j qsim-hip # to build HIP qsim +make -j pybind # to build Python bindings +make -j cxx-tests # to build CPU tests +make -j hip-tests # to build HIP tests + +pip install . +``` + +Note: To avoid problems when building qsim with support for AMD GPUs, +make sure to use the latest version of CMake. + +## Testing + +### Simulator + +To test the qsim simulator: + +``` +make run-cxx-tests # to run CPU tests +make run-hip-tests # to run HIP tests +``` + +or + +``` +cd tests +for file in *.x; do ./"$file"; done # to run all tests +for file in *_hip_test.x; do ./"$file"; done # to run HIP tests only +``` + +### Python Bindings + +To test the Python bindings: + +``` +make run-py-tests +``` + +or + +``` +cd qsimcirq_tests +python3 -m pytest -v qsimcirq_test.py +``` + +## Using + +Using qsim on AMD Instinct GPUs is identical to using it on NVIDIA GPUs. +I.e., it is done by passing `use_gpu=True` and `gpu_mode=0` as `qsimcirq.QSimOptions`: + +``` +simulator = qsimcirq.QSimSimulator(qsim_options=qsimcirq.QSimOptions( + use_gpu=True, + gpu_mode=0, + ... + )) +``` + +Note: `gpu_mode` has to be set to zero for AMD GPUs, as cuStateVec is not supported. diff --git a/docs/tutorials/gcp_gpu.md b/docs/tutorials/gcp_gpu.md index 09a642e7..f79a5be8 100644 --- a/docs/tutorials/gcp_gpu.md +++ b/docs/tutorials/gcp_gpu.md @@ -34,7 +34,7 @@ instance section, ensure that your VM has the following properties: * In the **Boot disk** section, click the **Change** button: 1. In the **Operating System** option, choose **Ubuntu**. 2. In the **Version** option, choose **20.04 LTS**. - 3. In the **Size** field, enter **30** (minimum). + 3. In the **Size** field, enter **100** (minimum known to be sufficient). * The instructions above override steps 3 through 5 in the [Create a Linux VM instance](https://cloud.google.com/compute/docs/quickstart-linux) Quickstart. diff --git a/lib/cuda2hip.h b/lib/cuda2hip.h new file mode 100644 index 00000000..da2d074c --- /dev/null +++ b/lib/cuda2hip.h @@ -0,0 +1,61 @@ +// Copyright 2023 Advanced Micro Devices, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SIMULATOR_CUDA2HIP_H_ +#define SIMULATOR_CUDA2HIP_H_ + +#define cublasCaxpy hipblasCaxpy +#define cublasCdotc hipblasCdotc +#define cublasCreate hipblasCreate +#define cublasCscal hipblasCscal +#define cublasCsscal hipblasCsscal +#define cublasDestroy hipblasDestroy +#define cublasDznrm2 hipblasDznrm2 +#define cublasHandle_t hipblasHandle_t +#define cublasScnrm2 hipblasScnrm2 +#define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS +#define cublasStatus_t hipblasStatus_t +#define cublasZaxpy hipblasZaxpy +#define cublasZdotc hipblasZdotc +#define cublasZdscal hipblasZdscal +#define cublasZscal hipblasZscal +#define cuCimagf hipCimagf +#define cuCimag hipCimag +#define cuComplex hipComplex +#define cuCrealf hipCrealf +#define cuCreal hipCreal +#define CUDA_C_32F HIPBLAS_C_32F +#define CUDA_C_64F HIPBLAS_C_64F +#define cudaDeviceSynchronize hipDeviceSynchronize +#define cudaError_t hipError_t +#define cudaFree hipFree +#define cudaGetErrorString hipGetErrorString +#define cudaMalloc hipMalloc +#define cudaMemcpyAsync hipMemcpyAsync +#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice +#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost +#define cudaMemcpy hipMemcpy +#define cudaMemcpyHostToDevice hipMemcpyHostToDevice +#define cudaMemset hipMemset +#define cudaPeekAtLastError hipPeekAtLastError +#define cudaSuccess hipSuccess +#define cuDoubleComplex hipDoubleComplex + +template +__device__ __forceinline__ T __shfl_down_sync( + unsigned mask, T var, unsigned int delta, int width = warpSize) { + return __shfl_down(var, delta, width); +} + +#endif // SIMULATOR_CUDA2HIP_H_ diff --git a/lib/fuser_mqubit.h b/lib/fuser_mqubit.h index aa4e6305..fdbfddb0 100644 --- a/lib/fuser_mqubit.h +++ b/lib/fuser_mqubit.h @@ -561,8 +561,6 @@ class MultiQubitGateFuser final : public Fuser { static void FuseOrphanedGates(unsigned max_fused_size, Stat& stat, std::vector& orphaned_gates, std::vector& fused_gates) { - unsigned count = 0; - for (std::size_t i = 0; i < orphaned_gates.size(); ++i) { auto ogate1 = orphaned_gates[i]; @@ -575,8 +573,6 @@ class MultiQubitGateFuser final : public Fuser { if (ogate2->visited == kFinal) continue; - ++count; - unsigned cur_size = ogate1->qubits.size() + ogate2->qubits.size(); if (cur_size <= max_fused_size) { diff --git a/lib/simulator_cuda_kernels.h b/lib/simulator_cuda_kernels.h index 6510fadf..e21a9d62 100644 --- a/lib/simulator_cuda_kernels.h +++ b/lib/simulator_cuda_kernels.h @@ -15,10 +15,15 @@ #ifndef SIMULATOR_CUDA_KERNELS_H_ #define SIMULATOR_CUDA_KERNELS_H_ -#include -#include - -#include "util_cuda.h" +#ifdef __NVCC__ + #include + #include + + #include "util_cuda.h" +#elif __HIP__ + #include + #include "cuda2hip.h" +#endif namespace qsim { diff --git a/lib/statespace_cuda.h b/lib/statespace_cuda.h index 82f5128b..660db074 100644 --- a/lib/statespace_cuda.h +++ b/lib/statespace_cuda.h @@ -15,7 +15,12 @@ #ifndef STATESPACE_CUDA_H_ #define STATESPACE_CUDA_H_ -#include +#ifdef __NVCC__ + #include +#elif __HIP__ + #include + #include "cuda2hip.h" +#endif #include #include @@ -102,7 +107,8 @@ class StateSpaceCUDA : } void SetAllZeros(State& state) const { - cudaMemset(state.get(), 0, MinSize(state.num_qubits()) * sizeof(fp_type)); + ErrorCheck(cudaMemset(state.get(), 0, + MinSize(state.num_qubits()) * sizeof(fp_type))); } // Uniform superposition. diff --git a/lib/statespace_cuda_kernels.h b/lib/statespace_cuda_kernels.h index bcb7fd25..b54ebca9 100644 --- a/lib/statespace_cuda_kernels.h +++ b/lib/statespace_cuda_kernels.h @@ -15,7 +15,12 @@ #ifndef STATESPACE_CUDA_KERNELS_H_ #define STATESPACE_CUDA_KERNELS_H_ -#include +#ifdef __NVCC__ + #include +#elif __HIP__ + #include + #include "cuda2hip.h" +#endif #include "util_cuda.h" diff --git a/lib/util_cuda.h b/lib/util_cuda.h index 591d852d..5d8cb5df 100644 --- a/lib/util_cuda.h +++ b/lib/util_cuda.h @@ -15,7 +15,11 @@ #ifndef UTIL_CUDA_H_ #define UTIL_CUDA_H_ -#include +#ifdef __NVCC__ + #include +#elif __HIP__ + #include +#endif #include diff --git a/lib/vectorspace_cuda.h b/lib/vectorspace_cuda.h index 0c1d6167..fd91553d 100644 --- a/lib/vectorspace_cuda.h +++ b/lib/vectorspace_cuda.h @@ -15,8 +15,13 @@ #ifndef VECTORSPACE_CUDA_H_ #define VECTORSPACE_CUDA_H_ -#include -#include +#ifdef __NVCC__ + #include + #include +#elif __HIP__ + #include + #include "cuda2hip.h" +#endif #include #include @@ -28,7 +33,7 @@ namespace detail { inline void do_not_free(void*) {} inline void free(void* ptr) { - cudaFree(ptr); + ErrorCheck(cudaFree(ptr)); } } // namespace detail @@ -114,9 +119,10 @@ class VectorSpaceCUDA { return false; } - cudaMemcpy(dest.get(), src.get(), - sizeof(fp_type) * Impl::MinSize(src.num_qubits()), - cudaMemcpyDeviceToDevice); + ErrorCheck( + cudaMemcpy(dest.get(), src.get(), + sizeof(fp_type) * Impl::MinSize(src.num_qubits()), + cudaMemcpyDeviceToDevice)); return true; } @@ -124,9 +130,10 @@ class VectorSpaceCUDA { // It is the client's responsibility to make sure that dest has at least // Impl::MinSize(src.num_qubits()) elements. bool Copy(const Vector& src, fp_type* dest) const { - cudaMemcpy(dest, src.get(), - sizeof(fp_type) * Impl::MinSize(src.num_qubits()), - cudaMemcpyDeviceToHost); + ErrorCheck( + cudaMemcpy(dest, src.get(), + sizeof(fp_type) * Impl::MinSize(src.num_qubits()), + cudaMemcpyDeviceToHost)); return true; } @@ -134,9 +141,10 @@ class VectorSpaceCUDA { // It is the client's responsibility to make sure that src has at least // Impl::MinSize(dest.num_qubits()) elements. bool Copy(const fp_type* src, Vector& dest) const { - cudaMemcpy(dest.get(), src, - sizeof(fp_type) * Impl::MinSize(dest.num_qubits()), - cudaMemcpyHostToDevice); + ErrorCheck( + cudaMemcpy(dest.get(), src, + sizeof(fp_type) * Impl::MinSize(dest.num_qubits()), + cudaMemcpyHostToDevice)); return true; } @@ -145,12 +153,15 @@ class VectorSpaceCUDA { // min(size, Impl::MinSize(dest.num_qubits())) elements. bool Copy(const fp_type* src, uint64_t size, Vector& dest) const { size = std::min(size, Impl::MinSize(dest.num_qubits())); - cudaMemcpy(dest.get(), src, sizeof(fp_type) * size, cudaMemcpyHostToDevice); + ErrorCheck( + cudaMemcpy(dest.get(), src, + sizeof(fp_type) * size, + cudaMemcpyHostToDevice)); return true; } void DeviceSync() { - cudaDeviceSynchronize(); + ErrorCheck(cudaDeviceSynchronize()); } protected: diff --git a/pybind_interface/Makefile b/pybind_interface/Makefile index c7c70860..95f64b32 100644 --- a/pybind_interface/Makefile +++ b/pybind_interface/Makefile @@ -5,6 +5,7 @@ QSIMLIB_AVX2 = ../qsimcirq/qsim_avx2`python3-config --extension-suffix` QSIMLIB_AVX512 = ../qsimcirq/qsim_avx512`python3-config --extension-suffix` QSIMLIB_CUDA = ../qsimcirq/qsim_cuda`python3-config --extension-suffix` QSIMLIB_CUSTATEVEC = ../qsimcirq/qsim_custatevec`python3-config --extension-suffix` +QSIMLIB_HIP = ../qsimcirq/qsim_hip`python3-config --extension-suffix` QSIMLIB_DECIDE = ../qsimcirq/qsim_decide`python3-config --extension-suffix` # The flags for the compilation of the simd-specific Pybind11 interfaces @@ -13,21 +14,29 @@ PYBINDFLAGS_SSE = -msse4.1 -Wall -shared -std=c++17 -fPIC `python3 -m pybind11 - PYBINDFLAGS_AVX2 = -mavx2 -mfma -Wall -shared -std=c++17 -fPIC `python3 -m pybind11 --includes` PYBINDFLAGS_AVX512 = -mavx512f -mbmi2 -Wall -shared -std=c++17 -fPIC `python3 -m pybind11 --includes` -# The flags for the compilation of GPU-specific Pybind11 interfaces +# The flags for the compilation of CUDA-specific Pybind11 interfaces PYBINDFLAGS_CUDA = -std=c++17 -x cu -Xcompiler "-Wall -shared -fPIC `python3 -m pybind11 --includes`" # The flags for the compilation of cuStateVec-specific Pybind11 interfaces PYBINDFLAGS_CUSTATEVEC = $(CUSTATEVECFLAGS) $(PYBINDFLAGS_CUDA) +# The flags for the compilation of HIP-specific Pybind11 interfaces +PYBINDFLAGS_HIP = -std=c++17 -Wall -shared -fPIC `python3 -m pybind11 --includes` + # Check for nvcc to decide compilation mode. ifeq ($(shell which $(NVCC)),) +# Check for hipcc to decide compilation mode. +ifeq ($(shell which $(HIPCC)),) pybind: pybind-cpu decide-cpu else +pybind: pybind-hip decide-hip +endif +else # Check for the cuStateVec library. ifeq ($(CUQUANTUM_ROOT),) -pybind: pybind-cpu pybind-gpu decide-gpu +pybind: pybind-cpu pybind-cuda decide-cuda else -pybind: pybind-cpu pybind-gpu pybind-custatevec decide-custatevec +pybind: pybind-cpu pybind-cuda pybind-custatevec decide-custatevec endif endif @@ -40,14 +49,15 @@ pybind-cpu: .PHONY: decide-cpu decide-cpu: + echo "building decide-cpu" $(CXX) decide/decide.cpp -o $(QSIMLIB_DECIDE) $(CXXFLAGS) $(PYBINDFLAGS_BASIC) -.PHONY: pybind-gpu -pybind-gpu: +.PHONY: pybind-cuda +pybind-cuda: $(NVCC) cuda/pybind_main_cuda.cpp -o $(QSIMLIB_CUDA) $(NVCCFLAGS) $(PYBINDFLAGS_CUDA) -.PHONY: decide-gpu -decide-gpu: +.PHONY: decide-cuda +decide-cuda: $(NVCC) decide/decide.cpp -o $(QSIMLIB_DECIDE) $(NVCCFLAGS) $(PYBINDFLAGS_CUDA) .PHONY: pybind-custatevec @@ -58,6 +68,18 @@ pybind-custatevec: decide-custatevec: $(NVCC) decide/decide.cpp -D__CUSTATEVEC__ -o $(QSIMLIB_DECIDE) $(NVCCFLAGS) $(PYBINDFLAGS_CUDA) +.PHONY: pybind-hip +pybind-hip: + $(HIPCC) basic/pybind_main_basic.cpp -o $(QSIMLIB_BASIC) $(CXXFLAGS) $(PYBINDFLAGS_BASIC) + $(HIPCC) sse/pybind_main_sse.cpp -o $(QSIMLIB_SSE) $(CXXFLAGS) $(PYBINDFLAGS_SSE) + $(HIPCC) avx2/pybind_main_avx2.cpp -o $(QSIMLIB_AVX2) $(CXXFLAGS) $(PYBINDFLAGS_AVX2) + $(HIPCC) avx512/pybind_main_avx512.cpp -o $(QSIMLIB_AVX512) $(CXXFLAGS) $(PYBINDFLAGS_AVX512) + $(HIPCC) hip/pybind_main_hip.cpp -o $(QSIMLIB_HIP) $(HIPCCFLAGS) $(PYBINDFLAGS_HIP) + +.PHONY: decide-hip +decide-hip: + $(HIPCC) decide/decide.cpp -o $(QSIMLIB_DECIDE) $(HIPCCFLAGS) $(PYBINDFLAGS_HIP) + .PHONY: clean clean: -rm -f ./basic/*.x ./basic/*.a ./basic/*.so ./basic/*.mod $(QSIMLIB_BASIC) @@ -65,5 +87,6 @@ clean: -rm -f ./avx2/*.x ./avx2/*.a ./avx2/*.so ./avx2/*.mod $(QSIMLIB_AVX2) -rm -f ./avx512/*.x ./avx512/*.a ./avx512/*.so ./avx512/*.mod $(QSIMLIB_AVX512) -rm -f ./cuda/*.x ./cuda/*.a ./cuda/*.so ./cuda/*.mod $(QSIMLIB_CUDA) + -rm -f ./hip/*.x ./hip/*.a ./hip/*.so ./hip/*.mod $(QSIMLIB_HIP) -rm -f ./custatevec/*.x ./custatevec/*.a ./custatevec/*.so ./custatevec/*.mod $(QSIMLIB_CUSTATEVEC) -rm -f ./decide/*.x ./decide/*.a ./decide/*.so ./decide/*.mod $(QSIMLIB_DECIDE) diff --git a/pybind_interface/decide/CMakeLists.txt b/pybind_interface/decide/CMakeLists.txt index 441f3d66..b94b0ade 100644 --- a/pybind_interface/decide/CMakeLists.txt +++ b/pybind_interface/decide/CMakeLists.txt @@ -2,7 +2,12 @@ cmake_minimum_required(VERSION 3.11) execute_process(COMMAND which nvcc OUTPUT_VARIABLE has_nvcc) if(has_nvcc STREQUAL "") - project(qsim) + execute_process(COMMAND which hipcc OUTPUT_VARIABLE has_hipcc) + if(has_hipcc STREQUAL "") + project(qsim) + else() + project(qsim LANGUAGES CXX HIP) + endif() else() project(qsim LANGUAGES CXX CUDA) endif() @@ -22,7 +27,23 @@ endif() INCLUDE(../GetPybind11.cmake) if(has_nvcc STREQUAL "") - pybind11_add_module(qsim_decide decide.cpp) + if(has_hipcc STREQUAL "") + pybind11_add_module(qsim_decide decide.cpp) + else() + list(APPEND CMAKE_MODULE_PATH "/opt/rocm/lib/cmake/hip") + find_package(HIP REQUIRED) + find_package(PythonLibs 3.7 REQUIRED) + + include_directories(${PYTHON_INCLUDE_DIRS} ${pybind11_SOURCE_DIR}/include) + + hip_add_library(qsim_decide MODULE decide.cpp) + + set_target_properties(qsim_decide PROPERTIES + PREFIX "${PYTHON_MODULE_PREFIX}" + SUFFIX "${PYTHON_MODULE_EXTENSION}" + ) + set_source_files_properties(decide.cpp PROPERTIES LANGUAGE HIP) + endif() else() find_package(PythonLibs 3.7 REQUIRED) find_package(CUDA REQUIRED) diff --git a/pybind_interface/decide/decide.cpp b/pybind_interface/decide/decide.cpp index 6355ad3b..5e9a63f1 100644 --- a/pybind_interface/decide/decide.cpp +++ b/pybind_interface/decide/decide.cpp @@ -60,7 +60,7 @@ int detect_instructions() { } enum GPUCapabilities { - CUDA = 0, CUSTATEVEC = 1, NO_GPU = 10, NO_CUSTATEVEC = 11 }; + CUDA = 0, CUSTATEVEC = 1, HIP = 2, NO_GPU = 10, NO_CUSTATEVEC = 11 }; // For now, GPU detection is performed at compile time, as our wheels are // generated on Github Actions runners which do not have GPU support. @@ -70,6 +70,8 @@ enum GPUCapabilities { int detect_gpu() { #ifdef __NVCC__ GPUCapabilities gpu = CUDA; + #elif __HIP__ + GPUCapabilities gpu = HIP; #else GPUCapabilities gpu = NO_GPU; #endif diff --git a/pybind_interface/hip/CMakeLists.txt b/pybind_interface/hip/CMakeLists.txt new file mode 100644 index 00000000..fe4b1c54 --- /dev/null +++ b/pybind_interface/hip/CMakeLists.txt @@ -0,0 +1,24 @@ +cmake_minimum_required(VERSION 3.18) +project(qsim LANGUAGES CXX HIP) + +IF (WIN32) + set(CMAKE_CXX_FLAGS "/O2 /openmp") +ELSE() + set(CMAKE_CXX_FLAGS "-O3 -fopenmp") +ENDIF() + +INCLUDE(../GetPybind11.cmake) +find_package(PythonLibs 3.7 REQUIRED) + +list(APPEND CMAKE_MODULE_PATH "/opt/rocm/lib/cmake/hip") +find_package(HIP REQUIRED) + +include_directories(${PYTHON_INCLUDE_DIRS} ${pybind11_SOURCE_DIR}/include) + +hip_add_library(qsim_hip MODULE pybind_main_hip.cpp) + +set_target_properties(qsim_hip PROPERTIES + PREFIX "${PYTHON_MODULE_PREFIX}" + SUFFIX "${PYTHON_MODULE_EXTENSION}" +) +set_source_files_properties(pybind_main_hip.cpp PROPERTIES LANGUAGE HIP) diff --git a/pybind_interface/hip/pybind_main_hip.cpp b/pybind_interface/hip/pybind_main_hip.cpp new file mode 100644 index 00000000..d6ee181f --- /dev/null +++ b/pybind_interface/hip/pybind_main_hip.cpp @@ -0,0 +1,47 @@ +// Copyright 2019 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "pybind_main_hip.h" + +#include "../../lib/simulator_cuda.h" + +namespace qsim { + using Simulator = SimulatorCUDA; + + struct Factory { + using Simulator = qsim::Simulator; + using StateSpace = Simulator::StateSpace; + + Factory( + unsigned num_sim_threads, + unsigned num_state_threads, + unsigned num_dblocks + ) : ss_params{num_state_threads, num_dblocks} {} + + StateSpace CreateStateSpace() const { + return StateSpace(ss_params); + } + + Simulator CreateSimulator() const { + return Simulator(); + } + + StateSpace::Parameter ss_params; + }; + + inline void SetFlushToZeroAndDenormalsAreZeros() {} + inline void ClearFlushToZeroAndDenormalsAreZeros() {} +} + +#include "../pybind_main.cpp" diff --git a/pybind_interface/hip/pybind_main_hip.h b/pybind_interface/hip/pybind_main_hip.h new file mode 100644 index 00000000..55a672d5 --- /dev/null +++ b/pybind_interface/hip/pybind_main_hip.h @@ -0,0 +1,17 @@ +// Copyright 2019 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "../pybind_main.h" + +PYBIND11_MODULE(qsim_hip, m) { GPU_MODULE_BINDINGS } diff --git a/pybind_interface/pybind_main.cpp b/pybind_interface/pybind_main.cpp index c5615ebc..63a03e4a 100644 --- a/pybind_interface/pybind_main.cpp +++ b/pybind_interface/pybind_main.cpp @@ -377,7 +377,7 @@ std::vector> qsim_simulate(const py::dict &options) { std::vector> amplitudes; amplitudes.reserve(bitstrings.size()); - auto measure = [&bitstrings, &circuit, &litudes]( + auto measure = [&bitstrings, &litudes]( unsigned k, const StateSpace &state_space, const State &state) { for (const auto &b : bitstrings) { @@ -481,7 +481,7 @@ std::vector> qtrajectory_simulate(const py::dict &options) { Simulator simulator = factory.CreateSimulator(); StateSpace state_space = factory.CreateStateSpace(); - auto measure = [&bitstrings, &ncircuit, &litudes, &state_space]( + auto measure = [&bitstrings, &litudes, &state_space]( unsigned k, const State &state, Runner::Stat& stat) { for (const auto &b : bitstrings) { amplitudes.push_back(state_space.GetAmpl(state, b)); @@ -1082,7 +1082,7 @@ std::vector qtrajectory_sample(const py::dict &options) { std::vector> results; - auto measure = [&results, &ncircuit, &state_space]( + auto measure = [&results, &ncircuit]( unsigned k, const State& state, Runner::Stat& stat) { // Converts stat (which matches the MeasurementResult 'bits' field) into // bitstrings matching the MeasurementResult 'bitstring' field. diff --git a/qsimcirq/__init__.py b/qsimcirq/__init__.py index 0f022a1a..d9c5504d 100644 --- a/qsimcirq/__init__.py +++ b/qsimcirq/__init__.py @@ -34,6 +34,8 @@ def _load_qsim_gpu(): instr = qsim_decide.detect_gpu() if instr == 0: qsim_gpu = importlib.import_module("qsimcirq.qsim_cuda") + elif instr == 2: + qsim_gpu = importlib.import_module("qsimcirq.qsim_hip") else: qsim_gpu = None return qsim_gpu diff --git a/qsimcirq/_version.py b/qsimcirq/_version.py index 95385f6b..802744d1 100644 --- a/qsimcirq/_version.py +++ b/qsimcirq/_version.py @@ -1,3 +1,3 @@ """The version number defined here is read automatically in setup.py.""" -__version__ = "0.16.3" +__version__ = "0.18.0" diff --git a/qsimcirq/qsim_circuit.py b/qsimcirq/qsim_circuit.py index 0da46be0..9d33881e 100644 --- a/qsimcirq/qsim_circuit.py +++ b/qsimcirq/qsim_circuit.py @@ -343,7 +343,6 @@ def __init__( cirq_circuit: cirq.Circuit, allow_decomposition: bool = False, ): - if allow_decomposition: super().__init__() for moment in cirq_circuit: @@ -352,6 +351,7 @@ def __init__( self.append(op) else: super().__init__(cirq_circuit) + self._check_for_confusion_matrix() def __eq__(self, other): if not isinstance(other, QSimCircuit): @@ -364,6 +364,24 @@ def _resolve_parameters_( ): return QSimCircuit(cirq.resolve_parameters(super(), param_resolver, recursive)) + def _check_for_confusion_matrix(self): + """Checks cirq Circuit for Measurement Gates with confusion matrices. + Returns: + Throws a runtime exception if a MeasurementGate with a confusion matrix is included in the circuit + """ + confusion_maps_on_measurement_gates = [ + op.gate.confusion_map + for _, op, _ in self.findall_operations_with_gate_type(cirq.MeasurementGate) + if op.gate.confusion_map + ] + for confusion_map in confusion_maps_on_measurement_gates: + for map_values in confusion_map.values(): + if map_values: + raise ValueError( + "Confusion Matrices are not currently supported in Qsim. " + "See https://github.com/quantumlib/Cirq/issues/6305 for latest status" + ) + def translate_cirq_to_qsim( self, qubit_order: cirq.QubitOrderOrList = cirq.QubitOrder.DEFAULT ) -> qsim.Circuit: diff --git a/qsimcirq/qsim_simulator.py b/qsimcirq/qsim_simulator.py index 1bf0a4d0..2513ba50 100644 --- a/qsimcirq/qsim_simulator.py +++ b/qsimcirq/qsim_simulator.py @@ -213,7 +213,7 @@ def _run( """Run a simulation, mimicking quantum hardware. Args: - program: The circuit to simulate. + circuit: The circuit to simulate. param_resolver: Parameters to run with the program. repetitions: Number of times to repeat the run. diff --git a/qsimcirq_tests/qsimcirq_test.py b/qsimcirq_tests/qsimcirq_test.py index e0d19fae..40d0c261 100644 --- a/qsimcirq_tests/qsimcirq_test.py +++ b/qsimcirq_tests/qsimcirq_test.py @@ -336,6 +336,19 @@ def test_numpy_params(): qsim_result = qsim_simulator.simulate_sweep(circuit, params=prs) +def test_confusion_matrix_exception(): + qubit = cirq.LineQubit(0) + cmap = {(0,): np.array([[0.8, 0.2], [0.2, 0.8]])} + circuit = cirq.Circuit() + circuit += cirq.X(qubit) + circuit += cirq.MeasurementGate(1, confusion_map=cmap)(qubit) + x, y = sympy.Symbol("x"), sympy.Symbol("y") + prs = [{x: np.int64(0), y: np.int64(1)}] + qsim_simulator = qsimcirq.QSimSimulator() + with pytest.raises(ValueError): + _ = qsim_simulator.simulate_sweep(circuit, params=prs) + + def test_invalid_params(): # Parameters must have numeric values. q0 = cirq.LineQubit(0) @@ -1219,7 +1232,6 @@ def test_multi_qubit_fusion(): @pytest.mark.parametrize("mode", ["noiseless", "noisy"]) def test_cirq_qsim_simulate_random_unitary(mode: str): - q0, q1 = cirq.LineQubit.range(2) options = qsimcirq.QSimOptions(cpu_threads=16, verbosity=0) qsimSim = qsimcirq.QSimSimulator(qsim_options=options) diff --git a/setup.py b/setup.py index c57a37a9..593620a6 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ import os import re import sys +import shutil import platform import subprocess @@ -63,6 +64,12 @@ def build_extension(self, ext): "-DCMAKE_CXX_COMPILER=/usr/local/opt/llvm/bin/clang++", ] + if shutil.which("hipcc") is not None: + cmake_args += [ + "-DCMAKE_C_COMPILER=hipcc", + "-DCMAKE_CXX_COMPILER=hipcc", + ] + env = os.environ.copy() env["CXXFLAGS"] = '{} -DVERSION_INFO=\\"{}\\"'.format( env.get("CXXFLAGS", ""), self.distribution.get_version() @@ -111,6 +118,7 @@ def build_extension(self, ext): CMakeExtension("qsimcirq/qsim_cuda"), CMakeExtension("qsimcirq/qsim_custatevec"), CMakeExtension("qsimcirq/qsim_decide"), + CMakeExtension("qsimcirq/qsim_hip"), ], cmdclass=dict(build_ext=CMakeBuild), zip_safe=False, diff --git a/tests/Makefile b/tests/Makefile index f4a37278..6a81e70b 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -14,6 +14,9 @@ CUDA_TARGETS := $(CUDA_TARGETS:%cuda_test.cu=%cuda_test.x) CUSTATEVEC_TARGETS = $(shell find . -maxdepth 1 -name "*custatevec_test.cu") CUSTATEVEC_TARGETS := $(CUSTATEVEC_TARGETS:%custatevec_test.cu=%custatevec_test.x) +HIP_TARGETS = $(shell find . -maxdepth 1 -name "*cuda_test.cu") +HIP_TARGETS := $(HIP_TARGETS:%cuda_test.cu=%hip_test.x) + GTEST_DIR = $(CURDIR)/googletest/googletest GMOCK_DIR = $(CURDIR)/googletest/googlemock @@ -30,6 +33,9 @@ cuda-tests: $(CUDA_TARGETS) .PHONY: custatevec-tests custatevec-tests: $(CUSTATEVEC_TARGETS) +.PHONY: hip-tests +hip-tests: $(HIP_TARGETS) + .PHONY: run-cxx-tests run-cxx-tests: cxx-tests for exe in $(CXX_TARGETS); do if ! ./$$exe; then exit 1; fi; done @@ -42,6 +48,10 @@ run-cuda-tests: cuda-tests run-custatevec-tests: custatevec-tests for exe in $(CUSTATEVEC_TARGETS); do if ! ./$$exe; then exit 1; fi; done +.PHONY: run-hip-tests +run-hip-tests: hip-tests + for exe in $(HIP_TARGETS); do if ! ./$$exe; then exit 1; fi; done + $(GTEST_DIR)/make: -git submodule update --init --recursive googletest mkdir -p $(GTEST_DIR)/make @@ -56,6 +66,9 @@ $(GTEST_DIR)/make: %custatevec_test.x: %custatevec_test.cu $(GTEST_DIR)/make $(NVCC) -o ./$@ $< $(TESTFLAGS) $(NVCCFLAGS) $(CUSTATEVECFLAGS) +%hip_test.x: %cuda_test.cu $(GTEST_DIR)/make + $(HIPCC) -o ./$@ $< $(TESTFLAGS) $(HIPCCFLAGS) + .PHONY: clean clean: -rm -f ./*.x ./*.a ./*.so ./*.mod diff --git a/tests/make.sh b/tests/make.sh index 890663da..b38742df 100755 --- a/tests/make.sh +++ b/tests/make.sh @@ -55,14 +55,22 @@ g++ -O3 -I$path_to_include -L$path_to_lib -fopenmp -o unitaryspace_basic_test.x g++ -O3 -I$path_to_include -L$path_to_lib -msse4 -fopenmp -o unitaryspace_sse_test.x unitaryspace_sse_test.cc -lgtest -lpthread g++ -O3 -I$path_to_include -L$path_to_lib -o vectorspace_test.x vectorspace_test.cc -lgtest -lpthread -nvcc -O3 -I$path_to_include -L$path_to_lib -o hybrid_cuda_test.x hybrid_cuda_test.cu -lgtest -lpthread -nvcc -O3 -I$path_to_include -L$path_to_lib -o qtrajectory_cuda_test.x qtrajectory_cuda_test.cu -lgtest -lpthread -nvcc -O3 -I$path_to_include -L$path_to_lib -o simulator_cuda_test.x simulator_cuda_test.cu -lgtest -lpthread -nvcc -O3 -I$path_to_include -L$path_to_lib -o statespace_cuda_test.x statespace_cuda_test.cu -lgtest -lpthread +if command -v nvcc &>/dev/null; then + nvcc -O3 -I$path_to_include -L$path_to_lib -o hybrid_cuda_test.x hybrid_cuda_test.cu -lgtest -lpthread + nvcc -O3 -I$path_to_include -L$path_to_lib -o qtrajectory_cuda_test.x qtrajectory_cuda_test.cu -lgtest -lpthread + nvcc -O3 -I$path_to_include -L$path_to_lib -o simulator_cuda_test.x simulator_cuda_test.cu -lgtest -lpthread + nvcc -O3 -I$path_to_include -L$path_to_lib -o statespace_cuda_test.x statespace_cuda_test.cu -lgtest -lpthread -# CUQUANTUM_ROOT should be set. -CUSTATEVECFLAGS="-I${CUQUANTUM_ROOT}/include -L${CUQUANTUM_ROOT}/lib -L${CUQUANTUM_ROOT}/lib64 -lcustatevec -lcublas" -nvcc -O3 $CUSTATEVECFLAGS -I$path_to_include -L$path_to_lib -o hybrid_custatevec_test.x hybrid_custatevec_test.cu -lgtest -lpthread -nvcc -O3 $CUSTATEVECFLAGS -I$path_to_include -L$path_to_lib -o qtrajectory_custatevec_test.x qtrajectory_custatevec_test.cu -lgtest -lpthread -nvcc -O3 $CUSTATEVECFLAGS -I$path_to_include -L$path_to_lib -o simulator_custatevec_test.x simulator_custatevec_test.cu -lgtest -lpthread -nvcc -O3 $CUSTATEVECFLAGS -I$path_to_include -L$path_to_lib -o statespace_custatevec_test.x statespace_custatevec_test.cu -lgtest -lpthread + if [ -n "$CUQUANTUM_ROOT" ]; then + CUSTATEVECFLAGS="-I${CUQUANTUM_ROOT}/include -L${CUQUANTUM_ROOT}/lib -L${CUQUANTUM_ROOT}/lib64 -lcustatevec -lcublas" + nvcc -O3 $CUSTATEVECFLAGS -I$path_to_include -L$path_to_lib -o hybrid_custatevec_test.x hybrid_custatevec_test.cu -lgtest -lpthread + nvcc -O3 $CUSTATEVECFLAGS -I$path_to_include -L$path_to_lib -o qtrajectory_custatevec_test.x qtrajectory_custatevec_test.cu -lgtest -lpthread + nvcc -O3 $CUSTATEVECFLAGS -I$path_to_include -L$path_to_lib -o simulator_custatevec_test.x simulator_custatevec_test.cu -lgtest -lpthread + nvcc -O3 $CUSTATEVECFLAGS -I$path_to_include -L$path_to_lib -o statespace_custatevec_test.x statespace_custatevec_test.cu -lgtest -lpthread + fi +elif command -v hipcc &>/dev/null; then + hipcc -O3 -I$path_to_include -L$path_to_lib -o hybrid_hip_test.x hybrid_cuda_test.cu -lgtest -lpthread + hipcc -O3 -I$path_to_include -L$path_to_lib -o qtrajectory_hip_test.x qtrajectory_cuda_test.cu -lgtest -lpthread + hipcc -O3 -I$path_to_include -L$path_to_lib -o simulator_hip_test.x simulator_cuda_test.cu -lgtest -lpthread + hipcc -O3 -I$path_to_include -L$path_to_lib -o statespace_hip_test.x statespace_cuda_test.cu -lgtest -lpthread +fi