diff --git a/Makefile b/Makefile index f7796604..3e90be86 100644 --- a/Makefile +++ b/Makefile @@ -51,6 +51,8 @@ src/scs_version.o: src/scs_version.c $(INC_FILES) $(DIRSRC)/private.o: $(DIRSRC)/private.c $(DIRSRC)/private.h $(INDIRSRC)/indirect/private.o: $(INDIRSRC)/private.c $(INDIRSRC)/private.h $(MKLSRC)/private.o: $(MKLSRC)/private.c $(MKLSRC)/private.h +$(HIPSRC)/private.o: $(HIPSRC)/private.c $(HIPSRC)/private.h + $(HIPCC) $(CFLAGS) $(HIPCFLAGS) -I$(HIPSRC) -c $(HIPSRC)/private.c -o $@ $(LINSYS)/scs_matrix.o: $(LINSYS)/scs_matrix.c $(LINSYS)/scs_matrix.h $(LINSYS)/csparse.o: $(LINSYS)/csparse.c $(LINSYS)/csparse.h @@ -69,6 +71,11 @@ $(OUT)/libscsmkl.a: $(SCS_O) $(SCS_OBJECTS) $(MKLSRC)/private.o $(LINSYS)/scs_ma $(ARCHIVE) $@ $^ - $(RANLIB) $@ +$(OUT)/libscship.a: $(SCS_O) $(SCS_OBJECTS) $(HIPSRC)/private.o $(LINSYS)/scs_matrix.o $(LINSYS)/csparse.o + mkdir -p $(OUT) + $(ARCHIVE) $@ $^ + - $(RANLIB) $@ + $(OUT)/libscsdir.$(SHARED): $(SCS_O) $(SCS_OBJECTS) $(DIRSRC)/private.o $(AMD_OBJS) $(LDL_OBJS) $(LINSYS)/scs_matrix.o $(LINSYS)/csparse.o mkdir -p $(OUT) $(CC) $(CFLAGS) -shared -Wl,$(SONAME),$(@:$(OUT)/%=%) -o $@ $^ $(LDFLAGS) $(BLASLDFLAGS) @@ -81,6 +88,10 @@ $(OUT)/libscsmkl.$(SHARED): $(SCS_O) $(SCS_OBJECTS) $(MKLSRC)/private.o $(LINSYS mkdir -p $(OUT) $(CC) $(CFLAGS) -shared -Wl,$(SONAME),$(@:$(OUT)/%=%) -o $@ $^ $(LDFLAGS) $(MKLFLAGS) +$(OUT)/libscship.$(SHARED): $(SCS_O) $(SCS_OBJECTS) $(HIPSRC)/private.o $(LINSYS)/scs_matrix.o $(LINSYS)/csparse.o + mkdir -p $(OUT) + $(CC) $(CFLAGS) -shared -Wl,$(SONAME),$(@:$(OUT)/%=%) -o $@ $^ $(LDFLAGS) $(HIPLDFLAGS) + $(OUT)/demo_socp_direct: test/random_socp_prob.c $(OUT)/libscsdir.a $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(BLASLDFLAGS) @@ -90,6 +101,9 @@ $(OUT)/demo_socp_indirect: test/random_socp_prob.c $(OUT)/libscsindir.a $(OUT)/demo_socp_mkl: test/random_socp_prob.c $(OUT)/libscsmkl.a $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(MKLFLAGS) +$(OUT)/demo_socp_hip: test/random_socp_prob.c $(OUT)/libscship.a + $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(BLASLDFLAGS) $(HIPLDFLAGS) + $(OUT)/run_from_file_direct: test/run_from_file.c $(OUT)/libscsdir.a $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(BLASLDFLAGS) @@ -108,7 +122,8 @@ $(OUT)/run_tests_direct: test/run_tests.c $(OUT)/libscsdir.a $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(BLASLDFLAGS) -Itest $(OUT)/run_tests_mkl: test/run_tests.c $(OUT)/libscsmkl.a $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(MKLFLAGS) -Itest - +$(OUT)/run_tests_hip: test/run_tests.c $(OUT)/libscship.a + $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(BLASLDFLAGS) $(HIPLDFLAGS) -Itest .PHONY: test_gpu test_gpu: $(OUT)/run_tests_gpu_indirect # $(OUT)/run_tests_gpu_direct @@ -120,6 +135,8 @@ ifndef MKLROOT $(error MKLROOT is undefined, set MKLROOT to the MKL install location) endif +.PHONY: +hip: $(OUT)/libscship.a $(OUT)/libscship.$(SHARED) $(OUT)/run_tests_hip $(OUT)/demo_socp_hip $(OUT)/run_tests_gpu_indirect: test/run_tests.c $(OUT)/libscsgpuindir.a $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(BLASLDFLAGS) $(CULDFLAGS) -Itest diff --git a/scs.mk b/scs.mk index 11ebd599..3c6885e2 100644 --- a/scs.mk +++ b/scs.mk @@ -9,6 +9,7 @@ endif #CC = i686-w64-mingw32-gcc -m32 #CC = x86_64-w64-mingw32-gcc-4.8 CUCC = $(CC) #Don't need to use nvcc, since using cuda blas APIs +HIPCC = $(HIP_PATH)/bin/hipcc # For GPU must add cuda libs to path, e.g. # export DYLD_LIBRARY_PATH=/usr/local/cuda/lib:$DYLD_LIBRARY_PATH @@ -70,6 +71,7 @@ INDIRSRC = $(LINSYS)/cpu/indirect GPUDIR = $(LINSYS)/gpu/direct GPUINDIR = $(LINSYS)/gpu/indirect MKLSRC = $(LINSYS)/mkl/direct +HIPSRC = $(LINSYS)/hip/direct EXTSRC = $(LINSYS)/external @@ -135,6 +137,11 @@ endif # to work for all combinations of platform / compiler / threading options. MKLFLAGS = -L$(MKLROOT) -L$(MKLROOT)/lib -Wl,--no-as-needed -lmkl_rt -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -ldl +HIP_PLATFORM =HIP_PLATFORM_AMD +HIP_PATH = /opt/rocm +HIPLDFLAGS = -L$(HIP_PATH)/lib -lamdhip64 -lhipsparse +HIPCFLAGS = -D__$(HIP_PLATFORM)__ -I$(HIP_PATH)/include -Wno-extra-semi -Wno-strict-prototypes + ############ OPENMP: ############ # set USE_OPENMP = 1 to allow openmp (multi-threaded matrix multiplies): # set the number of threads to, for example, 4 by entering the command: