merged development with master

jaurentz · Jan 19, 2016 · 97c61eb · 97c61eb
2 parents 4c09031 + 01d64b3
commit 97c61eb
Show file tree

Hide file tree

Showing 120 changed files with 5,897 additions and 564 deletions.
diff --git a/LICENSE.md b/LICENSE.md
@@ -1,6 +1,6 @@
 The MIT License (MIT)
 
-Copyright (c) 2014 Jared L. Aurentz
+Copyright (c) 2015-2016 Jared L. Aurentz, Vassilis Kalantzis and Yousef Saad
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/Makefile b/Makefile
@@ -15,26 +15,21 @@ SRCS := $(wildcard ./src/*/*.cu)
 # CUCHEB individual objects
 OBJS := $(SRCS:.cu=.o)
 
-# CUCHEB individual tests
-TESTSRCS := $(wildcard ./tests/*/*.cu)
-TESTS := $(TESTSRCS:.cu=)
-
 all: lib$(LIBNAME).so.$(VERSION)
 
-tests: $(TESTS) $(TESTSRCS)
-	@$(MAKE) -C ./tests
-
-$(TESTSRCS):
-
-$(TESTS):
-
-lib$(LIBNAME).so.$(VERSION): $(OBJS)
-	$(CUC) --compiler-options '-fPIC' --shared -o $@ $^
+lib$(LIBNAME).so.$(VERSION): objects
+	@$(CUC) --compiler-options '-fPIC' --shared -o $@ $(OBJS)
 
-$(OBJS): $(SRCS)
+objects: FORCE
 	@$(MAKE) -C ./src
 
-$(SRCS):
+tests: FORCE
+	@$(MAKE) -C ./tests
+
+numex: FORCE
+	@$(MAKE) -C ./numex
+
+FORCE:
 
 install: lib$(LIBNAME).so.$(VERSION) 
 	@mkdir -p $(INSTALLDIR)/cucheb 
@@ -44,12 +39,13 @@ install: lib$(LIBNAME).so.$(VERSION)
 	@mv ./lib$(LIBNAME).so.$(VERSION) $(INSTALLDIR)/cucheb/lib/
 	@ln -s $(INSTALLDIR)/cucheb/lib/lib$(LIBNAME).so.$(VERSION) lib$(LIBNAME).so
 	@mv ./lib$(LIBNAME).so $(INSTALLDIR)/cucheb/lib/
-
+	
 uninstall: clean
 	@rm -rf $(INSTALLDIR)/cucheb
 
 clean:
 	@$(MAKE) clean -C ./src
 	@$(MAKE) clean -C ./tests
+	@$(MAKE) clean -C ./numex
 
 
diff --git a/README.md b/README.md
@@ -1,2 +1,42 @@
 # cucheb - CUDA accelerated large sparse eigensolvers #
-This is a package written in C++ for accelerating large sparse eigenvalue computations using NVIDIA GPUs. 
+Jared L. Aurentz and Vasileios Kalantzis, October 2015
+
+## Introduction ##
+__cucheb__ is a collection of C++ subroutines for accurately and 
+efficiently solving large sparse matrix eigenvalue problems using 
+NVIDIA brand GPUs. These methods are well suited for computing
+eigenvalues of 2D/3D discretization matrices that arise in 
+elliptic and parabolic PDEs.
+
+### Current features ###
+__cucheb-v0.1.0__ has the following features:
+ - double precision eigensolvers for real symmetric matrices
+
+## Installation ##
+__cucheb__ is built on top of the [NVIDIA CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit)
+and a small number of C++ standard libraries. You must have the toolkit
+installed before the library can be built.
+
+### Linux ###
+To install on a Linux machine simply move into the __cucheb__ root directory, 
+edit the file __make.inc__ to suit your system and type:
+```
+make install
+```
+This creates a shared object library __libcucheb.so._version___ and copies 
+it into the user specified installation directory. The installation does not 
+create any symbolic links or export any library paths.
+
+## Removing cucheb ##
+If the source directory has not been removed simply move into the __cucheb__ 
+root directory and type:
+```
+make uninstall
+```
+If the source directory has been removed the install directory will have to 
+be removed explicitly by the user.
+
+## Questions and issues ##
+If you have any questions or encounter any issues while using __cucheb__ 
+please file an issue on the [__cucheb__ issues](https://github.com/jaurentz
+/cucheb/issues) page of Github.
diff --git a/include/cucheb.h b/include/cucheb.h
@@ -1,35 +1,186 @@
-/** \mainpage My Personal Index Page
- *
- * \section intro_sec Introduction
- *
- * This is the introduction.
- *
- * \section install_sec Installation
- *
- * \subsection step1 Step 1: Opening the box
- *  
- * etc...
- */
-
-#ifndef __cucheb_h__ /* __cucheb_h__ */
-#define __cucheb_h__
+#include <cuchebdependencies.h>
 
-#include <iostream>
-#include <string>
-#include <sstream>
-#include <omp.h>
-//#include <cula_lapack.h>
-//#include <cula_lapack_device.h>
-#include <lapacke.h>
-using namespace std;
-#include <cucheberror.h>
+#include <cuchebstats.h>
 #include <cuchebpoly.h>
-#include <cuchebop.h>
-#include <cuchebsolve.h>
-#include <cuchebeigs.h>
+#include <cuchebmatrix.h>
+#include <cucheblanczos.h>
+
+/* header file for cucheb data type */
+#ifndef __cucheb_h__ 
+#define __cucheb_h__
+
+
+
+/* cuchebutils subroutines */
+/* rotation generator */
+int cuchebutils_rotation(const double a, const double b, double* c, double* s,
+                         double* nrm);
+
+/* function to perform banded symmetric bulge chase */
+int cuchebutils_chasebulge(int n, int bwidth, double* bands, int ldbands,
+                           double* bulge, double* vecs, int ldvecs);
+
+/* eigenvalues and eigenvectors of 2x2 symmetric matrix */
+int cuchebutils_2x2symeig(double a1, double a2, double b, double* e1, double* e2,
+                         double* c, double* s);
+
+/* reduce banded symmetric matrix to tridiagonal */
+int cuchebutils_bandsymred(int n, int bwidth, double* bands, int ldbands,
+                           double* vecs, int ldvecs);
+
+/* eigenvalues and eigenvectors of banded symmetric matrix via QR */
+int cuchebutils_bandsymqr(int n, int bwidth, double* bands, int ldbands,
+                           double* evals, double* vecs, int ldvecs);
+
+
+
+/* cuchebstats subroutines */
+/* standard print cuchebstats object */
+int cuchebstats_print(cuchebstats* ccs);
+
+/* print cuchebstats objects to file */
+int cuchebstats_fileprint(const string& fname, int nummats, string* matnames,
+                          cuchebstats* ccstats);
+
+
+
+/* cuchebpoly subroutines */
+/* instantiate cuchebpoly object */
+int cuchebpoly_init(cuchebpoly* ccp);
+
+/* destroy cuchebpoly object */
+int cuchebpoly_destroy(cuchebpoly* ccp);
+
+/* standard print cuchebpoly object */
+int cuchebpoly_print(cuchebpoly* ccp);
+
+/* long print cuchebpoly object */
+int cuchebpoly_printlong(cuchebpoly* ccp);
+
+/* second kind Chebyshev points */
+int cuchebpoly_points(double a, double b, cuchebpoly* ccp);
+
+/* convert values to coefficients */
+int cuchebpoly_coeffs(cuchebpoly* ccp);
+
+/* evaluate poly at scalar */
+double cuchebpoly_clenshaw(cuchebpoly* ccp, double x);
+
+/* threshold coefficients */
+int cuchebpoly_chop(cuchebpoly* ccp);
+
+/* routine for creating point filter */
+int cuchebpoly_pointfilter(double a, double b, double rho, int order, cuchebpoly* ccp);
+
+/* routine for creating step filter */
+int cuchebpoly_stepfilter(double a, double b, double c, double d, int order, cuchebpoly* ccp);
+
+/* routine for creating smart filter */
+int cuchebpoly_smartfilter(double a, double b, double c, double d, cuchebpoly* ccp);
+
+/* routine for creating gaussian filter */
+int cuchebpoly_gaussianfilter(double a, double b, double rho, double tau, cuchebpoly* ccp);
+
+
+
+/* cuchebmatrix subroutines */
+/* instantiate cuchebmatrix object */
+int cuchebmatrix_init(const string& mtxfile, cuchebmatrix* ccm);
+
+/* destroy cuchebmatrix object */
+int cuchebmatrix_destroy(cuchebmatrix* ccm);
+
+/* print cuchebmatrix object */
+int cuchebmatrix_print(cuchebmatrix* ccm);
+
+/* longprint cuchebmatrix object */
+int cuchebmatrix_printlong(cuchebmatrix* ccm);
+
+/* gpuprint cuchebmatrix object */
+int cuchebmatrix_gpuprint(cuchebmatrix* ccm);
+
+/* routine for sorting entries */
+int cuchebmatrix_sort(cuchebmatrix* ccm);
+
+/* routine for converting to csr format */
+int cuchebmatrix_csr(cuchebmatrix* ccm);
+
+/* routine for mv multiply on GPU */
+int cuchebmatrix_mv(cuchebmatrix* ccm, double* alpha, double* x, double* beta,
+                    double* y);
+
+/* routine for poly mv multiply on GPU */
+int cuchebmatrix_polymv(cuchebmatrix* ccm, cuchebpoly* ccp, double* x, double* y);
+
+/* routine for estimating spectral interval */
+int cuchebmatrix_specint(cuchebmatrix* ccm);
+
+/* routine for estimating spectral interval */
+int cuchebmatrix_specint(cuchebmatrix* ccm, cucheblanczos *ccl);
+
+/* lanczos routine for intervals */ 
+int cuchebmatrix_lanczos(double lbnd, double ubnd,
+                         int bsize, int numvecs, int stepsize, 
+                         cuchebmatrix* ccm, cucheblanczos* ccl);
+
+/* lanczos routine for intervals with statistics variable */ 
+int cuchebmatrix_lanczos(double lbnd, double ubnd,
+                         int bsize, int numvecs, int stepsize, 
+                         cuchebmatrix* ccm, cucheblanczos* ccl, 
+                         cuchebstats* ccstats);
+
+/* filtered lanczos routine for interval */
+int cuchebmatrix_filteredlanczos(double lbnd, double ubnd, int bsize,
+                                 cuchebmatrix* ccm, cucheblanczos* ccl);
+
+/* same routine as above but with statistics variable */
+int cuchebmatrix_filteredlanczos(double lbnd, double ubnd, int bsize, 
+                                 cuchebmatrix* ccm, cucheblanczos* ccl, 
+                                 cuchebstats* ccs);
+
+/* expert lanczos routine for intervals */ 
+int cuchebmatrix_expertlanczos(double lbnd, double ubnd, int degree,
+                                 int bsize, int numvecs, int stepsize, 
+                                 cuchebmatrix* ccm, cucheblanczos* ccl, 
+                                 cuchebstats* ccstats);
+
+
+/* cucheblanczos subroutines */
+/* instantiate cucheblanczos object */
+int cucheblanczos_init(int bsize, int numvecs, cuchebmatrix* ccm, cucheblanczos* ccl);
+
+/* destroy cucheblanczos object */
+int cucheblanczos_destroy(cucheblanczos* ccl);
+
+/* print cucheblanczos object */
+int cucheblanczos_print(cucheblanczos* ccl);
+
+/* set cucheblanczos starting vectors */
+int cucheblanczos_startvecs(cucheblanczos* ccl);
+
+/* arnoldi run using cuchebmatrix */
+int cucheblanczos_arnoldi(int nsteps, cuchebmatrix* ccm, cucheblanczos* ccl,
+                          cuchebstats* ccstats);
+
+/* filtered arnoldi run using cuchebmatrix */
+int cucheblanczos_filteredarnoldi(int nsteps, cuchebmatrix* ccm, cuchebpoly* ccp,
+                                  cucheblanczos* ccl, cuchebstats* ccstats);
+
+/* compute ritz values */
+int cucheblanczos_ritz(cuchebmatrix* ccm, cucheblanczos* ccl);
+
+/* compute rayleigh quotients */
+int cucheblanczos_rayleigh(cuchebmatrix* ccm, cucheblanczos* ccl);
+
+/* sort evals in interval */
+int cucheblanczos_sort(double lb, double ub, cucheblanczos* ccl);
+
+/* sort evals by largest modulus */
+int cucheblanczos_sort(cucheblanczos* ccl);
+
+/* check convergence */
+int cucheblanczos_checkconvergence(cucheblanczos* ccl);
 
-cuchebStatus_t cuchebSetGridBlocks(int n, dim3 *blockSize, dim3 *gridSize);
 
-cuchebStatus_t cuchebDinit(int n,double *x,int incx,double val);
 
 #endif /* __cucheb_h__ */
diff --git a/include/cuchebdependencies.h b/include/cuchebdependencies.h
@@ -0,0 +1,18 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <iomanip>
+#include <algorithm>
+#include <vector>
+using namespace std;
+
+#include <mmio.h>
+#include <cuda.h>
+#include <cublas_v2.h>
+#include <cusparse.h>
+#include <cufft.h>
+