Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support MSA SIMD for MIPS #244

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ NEON_LIBS = dft/simd/neon/libdft_neon_codelets.la \
rdft/simd/neon/librdft_neon_codelets.la
endif

if HAVE_MSA
MSA_LIBS = dft/simd/msa/libdft_msa_codelets.la \
rdft/simd/msa/librdft_msa_codelets.la
endif

if HAVE_GENERIC_SIMD128
GENERIC_SIMD128_LIBS = dft/simd/generic-simd128/libdft_generic_simd128_codelets.la \
rdft/simd/generic-simd128/librdft_generic_simd128_codelets.la
Expand Down Expand Up @@ -125,7 +130,7 @@ libfftw3@PREC_SUFFIX@_la_LIBADD = \
reodft/libreodft.la \
api/libapi.la \
$(SIMD_LIBS) $(SSE2_LIBS) $(AVX_LIBS) $(AVX_128_FMA_LIBS) \
$(AVX2_LIBS) $(ALTIVEC_LIBS) \
$(AVX2_LIBS) $(ALTIVEC_LIBS) $(MSA_LIBS) \
$(VSX_LIBS) $(NEON_LIBS) $(KCVI_LIBS) $(AVX512_LIBS) \
$(GENERIC_SIMD128_LIBS) $(GENERIC_SIMD256_LIBS) \
$(COMBINED_THREADLIBS)
Expand Down
4 changes: 4 additions & 0 deletions api/version.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ const char X(version)[] = PACKAGE "-" PACKAGE_VERSION
"-neon"
#endif

#if HAVE_MSA
"-msa"
#endif

#if defined(HAVE_GENERIC_SIMD128)
"-generic_simd128"
#endif
Expand Down
3 changes: 3 additions & 0 deletions cmake.config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,9 @@
/* Define to enable ARM NEON optimizations. */
/* #undef HAVE_NEON */

/* Define to enable MIPS MSA optimizations. */
/* #undef HAVE_MSA */

/* Define if OpenMP is enabled */
#cmakedefine HAVE_OPENMP

Expand Down
20 changes: 18 additions & 2 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,11 @@ if test "$have_generic_simd256" = "yes"; then
fi
AM_CONDITIONAL(HAVE_GENERIC_SIMD256, test "$have_generic_simd256" = "yes")

AC_ARG_ENABLE(msa, [AC_HELP_STRING([--enable-msa],[enable MIPS MSA optimizations])], have_msa=$enableval, have_msa=no)
if test "$have_msa" = "yes"; then
AC_DEFINE(HAVE_MSA,1,[Define to enable MIPS MSA optimizations.])
fi
AM_CONDITIONAL(HAVE_MSA, test "$have_msa" = "yes")

dnl FIXME:
dnl AC_ARG_ENABLE(mips-ps, [AC_HELP_STRING([--enable-mips-ps],[enable MIPS pair-single optimizations])], have_mips_ps=$enableval, have_mips_ps=no)
Expand Down Expand Up @@ -359,9 +364,12 @@ case "${ax_cv_c_compiler_vendor}" in
fi

# AVX2
# gcc-4.8 works with -march=core-avx2, but -mavx2 is not enough.
# Later versions seem to happy with -mavx2, so try the arch one first.
if test "$have_avx2" = "yes" -a "x$AVX2_CFLAGS" = x; then
AX_CHECK_COMPILER_FLAGS(-mavx2, [AVX2_CFLAGS="-mavx2"],
[AC_MSG_ERROR([Need a version of gcc with -mavx2])])
AX_CHECK_COMPILER_FLAGS(-march=core-avx2, [AVX2_CFLAGS="-march=core-avx2"],
[AX_CHECK_COMPILER_FLAGS(-mavx2, [AVX2_CFLAGS="-mavx2"],
[AC_MSG_ERROR([Need a version of gcc with either -march=core-avx2 or -mavx2])])])
AX_CHECK_COMPILER_FLAGS(-mfma, [AVX2_CFLAGS="$AVX2_CFLAGS -mfma"],
[AC_MSG_WARN([Need a version of gcc with -mfma (harmless for icc)])])
fi
Expand Down Expand Up @@ -411,6 +419,11 @@ case "${ax_cv_c_compiler_vendor}" in
[AC_MSG_ERROR([Need a version of gcc with -mvsx])])
fi

if test "$have_msa" = "yes" -a "x$MSA_CFLAGS" = x; then
AX_CHECK_COMPILER_FLAGS(-mmsa, [MSA_CFLAGS="-mmsa"],
[AC_MSG_ERROR([Need a version of gcc with -mmsa])])
fi

dnl FIXME:
dnl elif test "$have_mips_ps" = "yes"; then
dnl # Just punt here and use only new 4.2 compiler :(
Expand Down Expand Up @@ -471,6 +484,7 @@ AC_SUBST(KCVI_CFLAGS)
AC_SUBST(ALTIVEC_CFLAGS)
AC_SUBST(VSX_CFLAGS)
AC_SUBST(NEON_CFLAGS)
AC_SUBST(MSA_CFLAGS)

dnl add stack alignment CFLAGS if so requested
if test "$with_incoming_stack_boundary"x != "no"x; then
Expand Down Expand Up @@ -766,6 +780,7 @@ AC_CONFIG_FILES([
dft/simd/altivec/Makefile
dft/simd/vsx/Makefile
dft/simd/neon/Makefile
dft/simd/msa/Makefile
dft/simd/generic-simd128/Makefile
dft/simd/generic-simd256/Makefile

Expand All @@ -786,6 +801,7 @@ AC_CONFIG_FILES([
rdft/simd/altivec/Makefile
rdft/simd/vsx/Makefile
rdft/simd/neon/Makefile
rdft/simd/msa/Makefile
rdft/simd/generic-simd128/Makefile
rdft/simd/generic-simd256/Makefile

Expand Down
1 change: 1 addition & 0 deletions dft/codelet-dft.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ extern const solvtab X(solvtab_dft_kcvi);
extern const solvtab X(solvtab_dft_altivec);
extern const solvtab X(solvtab_dft_vsx);
extern const solvtab X(solvtab_dft_neon);
extern const solvtab X(solvtab_dft_msa);
extern const solvtab X(solvtab_dft_generic_simd128);
extern const solvtab X(solvtab_dft_generic_simd256);

Expand Down
4 changes: 4 additions & 0 deletions dft/conf.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ void X(dft_conf_standard)(planner *p)
if (X(have_simd_neon)())
X(solvtab_exec)(X(solvtab_dft_neon), p);
#endif
#if HAVE_MSA
if (X(have_simd_msa)())
X(solvtab_exec)(X(solvtab_dft_msa), p);
#endif
#if HAVE_GENERIC_SIMD128
X(solvtab_exec)(X(solvtab_dft_generic_simd128), p);
#endif
Expand Down
2 changes: 1 addition & 1 deletion dft/simd/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
AM_CPPFLAGS = -I $(top_srcdir)
SUBDIRS = common sse2 avx avx-128-fma avx2 avx2-128 avx512 kcvi altivec vsx neon generic-simd128 generic-simd256
SUBDIRS = common sse2 avx avx-128-fma avx2 avx2-128 avx512 kcvi altivec vsx neon msa generic-simd128 generic-simd256
EXTRA_DIST = n1b.h n1f.h n2b.h n2f.h n2s.h q1b.h q1f.h t1b.h t1bu.h \
t1f.h t1fu.h t2b.h t2f.h t3b.h t3f.h ts.h codlist.mk simd.mk
13 changes: 13 additions & 0 deletions dft/simd/msa/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
AM_CFLAGS = $(MSA_CFLAGS)
SIMD_HEADER=simd-support/simd-msa.h

include $(top_srcdir)/dft/simd/codlist.mk
include $(top_srcdir)/dft/simd/simd.mk

if HAVE_MSA

BUILT_SOURCES = $(EXTRA_DIST)
noinst_LTLIBRARIES = libdft_msa_codelets.la
libdft_msa_codelets_la_SOURCES = $(BUILT_SOURCES)

endif
1 change: 1 addition & 0 deletions doc/install.texi
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ of the time). @xref{Cycle Counters}.
@code{--enable-altivec} (single),
@code{--enable-vsx} (single, double),
@code{--enable-neon} (single, double on aarch64),
@code{--enable-msa} (single, double on mips),
@code{--enable-generic-simd128},
and
@code{--enable-generic-simd256}:
Expand Down
3 changes: 1 addition & 2 deletions doc/intro.texi
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ transform (DFT) and various special cases thereof.

@item FFTW supports arbitrary multi-dimensional data.

@item FFTW supports the SSE, SSE2, AVX, AVX2, AVX512, KCVI, Altivec, VSX, and
NEON vector instruction sets.
@item FFTW supports the SSE, SSE2, AVX, AVX2, AVX512, KCVI, Altivec, VSX, NEON and MSA vector instruction sets.

@item FFTW includes parallel (multi-threaded) transforms
for shared-memory systems.
Expand Down
5 changes: 3 additions & 2 deletions doc/other.texi
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ special operations supported by some processors to perform a single
operation on several numbers (usually 2 or 4) simultaneously. SIMD
floating-point instructions are available on several popular CPUs:
SSE/SSE2/AVX/AVX2/AVX512/KCVI on some x86/x86-64 processors, AltiVec and
VSX on some POWER/PowerPCs, NEON on some ARM models. FFTW can be
compiled to support the SIMD instructions on any of these systems.
VSX on some POWER/PowerPCs, NEON on some ARM models, MSA on some MIPS
models. FFTW can be compiled to support the SIMD instructions on any of
these systems.
@cindex SIMD
@cindex SSE
@cindex SSE2
Expand Down
3 changes: 2 additions & 1 deletion kernel/ifftw.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ extern void X(extract_reim)(int sign, R *c, R **r, R **i);
defined(HAVE_AVX2) || defined(HAVE_AVX512) || \
defined(HAVE_KCVI) || \
defined(HAVE_ALTIVEC) || defined(HAVE_VSX) || \
defined(HAVE_MIPS_PS) || \
defined(HAVE_MIPS_PS) || defined(HAVE_MSA) || \
defined(HAVE_GENERIC_SIMD128) || defined(HAVE_GENERIC_SIMD256)
#define HAVE_SIMD 1
#else
Expand All @@ -119,6 +119,7 @@ extern int X(have_simd_avx512)(void);
extern int X(have_simd_altivec)(void);
extern int X(have_simd_vsx)(void);
extern int X(have_simd_neon)(void);
extern int X(have_simd_msa)(void);

/* forward declarations */
typedef struct problem_s problem;
Expand Down
1 change: 1 addition & 0 deletions rdft/codelet-rdft.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ extern const solvtab X(solvtab_rdft_kcvi);
extern const solvtab X(solvtab_rdft_altivec);
extern const solvtab X(solvtab_rdft_vsx);
extern const solvtab X(solvtab_rdft_neon);
extern const solvtab X(solvtab_rdft_msa);
extern const solvtab X(solvtab_rdft_generic_simd128);
extern const solvtab X(solvtab_rdft_generic_simd256);

Expand Down
4 changes: 4 additions & 0 deletions rdft/conf.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ void X(rdft_conf_standard)(planner *p)
if (X(have_simd_neon)())
X(solvtab_exec)(X(solvtab_rdft_neon), p);
#endif
#if HAVE_MSA
if (X(have_simd_msa)())
X(solvtab_exec)(X(solvtab_rdft_msa), p);
#endif
#if HAVE_GENERIC_SIMD128
X(solvtab_exec)(X(solvtab_rdft_generic_simd128), p);
#endif
Expand Down
2 changes: 1 addition & 1 deletion rdft/simd/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@

AM_CPPFLAGS = -I $(top_srcdir)
SUBDIRS = common sse2 avx avx-128-fma avx2 avx2-128 avx512 kcvi altivec vsx neon generic-simd128 generic-simd256
SUBDIRS = common sse2 avx avx-128-fma avx2 avx2-128 avx512 kcvi altivec vsx neon msa generic-simd128 generic-simd256
EXTRA_DIST = hc2cbv.h hc2cfv.h codlist.mk simd.mk
13 changes: 13 additions & 0 deletions rdft/simd/msa/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
AM_CFLAGS = $(MSA_CFLAGS)
SIMD_HEADER=simd-support/simd-msa.h

include $(top_srcdir)/rdft/simd/codlist.mk
include $(top_srcdir)/rdft/simd/simd.mk

if HAVE_MSA

noinst_LTLIBRARIES = librdft_msa_codelets.la
BUILT_SOURCES = $(EXTRA_DIST)
librdft_msa_codelets_la_SOURCES = $(BUILT_SOURCES)

endif
1 change: 1 addition & 0 deletions simd-support/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@ avx512.c simd-avx512.h \
kcvi.c simd-kcvi.h \
altivec.c simd-altivec.h vsx.c simd-vsx.h \
neon.c simd-neon.h \
msa.c simd-msa.h \
simd-generic128.h simd-generic256.h

73 changes: 73 additions & 0 deletions simd-support/msa.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/


#include "kernel/ifftw.h"

#if HAVE_MSA

/* check for an environment where signals are known to work */
#if defined(unix) || defined(linux)
# include <signal.h>
# include <setjmp.h>

static jmp_buf jb;

static void sighandler(int x)
{
UNUSED(x);
longjmp(jb, 1);
}

static int msa_works(void)
{
void (*oldsig)(int);
oldsig = signal(SIGILL, sighandler);
if (setjmp(jb)) {
signal(SIGILL, oldsig);
return 0;
} else {
/* asm volatile ("xor.v $w0, $w0, $w0"); */
asm volatile (".long 0x7860001e");
signal(SIGILL, oldsig);
return 1;
}
}

int X(have_simd_msa)(void)
{
static int init = 0, res;

if (!init) {
res = msa_works();
init = 1;
}
return res;
}

#else
/* don't know how to autodetect MSA; assume it is present */
int X(have_simd_msa)(void)
{
return 1;
}
#endif

#endif
3 changes: 3 additions & 0 deletions simd-support/simd-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@
# define ALIGNMENT 16 /* Alignment for the LD/ST macros */
# endif
# define ALIGNMENTA 64 /* Alignment for the LDA/STA macros */
#elif defined(HAVE_MSA)
# define ALIGNMENT 16 /* Alignment for the LD/ST macros */
# define ALIGNMENTA 16 /* Alignment for the LDA/STA macros */
#elif defined(HAVE_GENERIC_SIMD256)
# if defined(FFTW_SINGLE)
# define ALIGNMENT 8
Expand Down
Loading