Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

This change updates support for target-specific code. #615

Merged
merged 1 commit into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 1 addition & 48 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -128,49 +128,6 @@ else()
# libm is not available or not needed.
endif()

if(PNG_HARDWARE_OPTIMIZATIONS)

# Set definitions and sources for ARM.
set(libpng_arm_sources
arm/arm_init.c
arm/filter_neon_intrinsics.c
arm/palette_neon_intrinsics.c)

# Set definitions and sources for PowerPC.
set(libpng_powerpc_sources
powerpc/powerpc_init.c
powerpc/filter_vsx_intrinsics.c)

# Set definitions and sources for Intel.
set(libpng_intel_sources
intel/intel_init.c
intel/filter_sse2_intrinsics.c)

# Set definitions and sources for MIPS.
set(libpng_mips_sources
mips/mips_init.c
mips/filter_msa_intrinsics.c
mips/filter_mmi_inline_assembly.c)

# Set definitions and sources for LoongArch.
set(libpng_loongarch_sources
loongarch/loongarch_lsx_init.c
loongarch/filter_lsx_intrinsics.c)

else(PNG_HARDWARE_OPTIMIZATIONS)

# Disable opt for all arches
add_definitions(
-DPNG_ARM_NEON_OPT=0
-DPNG_POWERPC_VSX_OPT=0
-DPNG_INTEL_SSE_OPT=0
-DPNG_MIPS_MSA_OPT=0
-DPNG_MIPS_MMI_OPT=0
-DPNG_LOONGARCH_LSX_OPT=0
)

endif(PNG_HARDWARE_OPTIMIZATIONS)

option(ld-version-script "Enable linker version script" ON)
if(ld-version-script AND NOT ANDROID AND NOT APPLE)
# Check if LD supports linker scripts.
Expand Down Expand Up @@ -481,11 +438,7 @@ set(libpng_sources
pngwrite.c
pngwtran.c
pngwutil.c
${libpng_arm_sources}
${libpng_intel_sources}
${libpng_mips_sources}
${libpng_powerpc_sources}
${libpng_loongarch_sources}
pngsimd.c
)
set(pngtest_sources
pngtest.c
Expand Down
10 changes: 2 additions & 8 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,8 @@ lib_LTLIBRARIES=libpng@PNGLIB_MAJOR@@[email protected]
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = png.c pngerror.c\
pngget.c pngmem.c pngpread.c pngread.c pngrio.c pngrtran.c pngrutil.c\
pngset.c pngtrans.c pngwio.c pngwrite.c pngwtran.c pngwutil.c\
png.h pngconf.h pngdebug.h pnginfo.h pngpriv.h pngstruct.h pngusr.dfa\
arm/arm_init.c arm/filter_neon_intrinsics.c\
arm/palette_neon_intrinsics.c\
intel/intel_init.c intel/filter_sse2_intrinsics.c\
loongarch/loongarch_lsx_init.c loongarch/filter_lsx_intrinsics.c\
mips/mips_init.c mips/filter_msa_intrinsics.c\
mips/filter_mmi_inline_assembly.c\
powerpc/powerpc_init.c powerpc/filter_vsx_intrinsics.c
pngsimd.c\
png.h pngconf.h pngdebug.h pnginfo.h pngpriv.h pngstruct.h pngusr.dfa

nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h

Expand Down
294 changes: 194 additions & 100 deletions arm/arm_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,113 +9,27 @@
* For conditions of distribution and use, see the disclaimer
* and license in png.h
*/
#define png_target_impl "arm-neon"

/* This module requires POSIX 1003.1 functions. */
#define _POSIX_SOURCE 1

#include "../pngpriv.h"

#ifdef PNG_READ_SUPPORTED

#if PNG_ARM_NEON_OPT > 0
#ifdef PNG_ARM_NEON_CHECK_SUPPORTED /* Do run-time checks */
/* WARNING: it is strongly recommended that you do not build libpng with
* run-time checks for CPU features if at all possible. In the case of the ARM
* NEON instructions there is no processor-specific way of detecting the
* presence of the required support, therefore run-time detection is extremely
* OS specific.
*
* You may set the macro PNG_ARM_NEON_FILE to the file name of file containing
* a fragment of C source code which defines the png_have_neon function. There
* are a number of implementations in contrib/arm-neon, but the only one that
* has partial support is contrib/arm-neon/linux.c - a generic Linux
* implementation which reads /proc/cpufino.
*/
#include <signal.h> /* for sig_atomic_t */

#ifndef PNG_ARM_NEON_FILE
# if defined(__aarch64__) || defined(_M_ARM64)
/* ARM Neon is expected to be unconditionally available on ARM64. */
# error "PNG_ARM_NEON_CHECK_SUPPORTED must not be defined on ARM64"
# elif defined(__ARM_NEON__) || defined(__ARM_NEON)
/* ARM Neon is expected to be available on the target CPU architecture. */
# error "PNG_ARM_NEON_CHECK_SUPPORTED must not be defined on this CPU arch"
# elif defined(__linux__)
# define PNG_ARM_NEON_FILE "contrib/arm-neon/linux.c"
# else
# error "No support for run-time ARM Neon checking; use compile-time options"
# endif
#endif

static int png_have_neon(png_structp png_ptr);
#ifdef PNG_ARM_NEON_FILE
# include PNG_ARM_NEON_FILE
#if defined(_MSC_VER) && !defined(__clang__) && defined(_M_ARM64)
# include <arm64_neon.h>
#else
# include <arm_neon.h>
#endif
#endif /* PNG_ARM_NEON_CHECK_SUPPORTED */

#ifndef PNG_ALIGNED_MEMORY_SUPPORTED
# error "ALIGNED_MEMORY is required; set: -DPNG_ALIGNED_MEMORY_SUPPORTED"
#endif
/* Obtain the definitions of the actual filter functions: */
#include "filter_neon_intrinsics.c"

void
static void
png_init_filter_functions_neon(png_structp pp, unsigned int bpp)
{
/* The switch statement is compiled in for ARM_NEON_API, the call to
* png_have_neon is compiled in for ARM_NEON_CHECK. If both are defined
* the check is only performed if the API has not set the NEON option on
* or off explicitly. In this case the check controls what happens.
*
* If the CHECK is not compiled in and the option is UNSET the behavior prior
* to 1.6.7 was to use the NEON code - this was a bug caused by having the
* wrong order of the 'ON' and 'default' cases. UNSET now defaults to OFF,
* as documented in png.h
*/
png_debug(1, "in png_init_filter_functions_neon");
#ifdef PNG_ARM_NEON_API_SUPPORTED
switch ((pp->options >> PNG_ARM_NEON) & 3)
{
case PNG_OPTION_UNSET:
/* Allow the run-time check to execute if it has been enabled -
* thus both API and CHECK can be turned on. If it isn't supported
* this case will fall through to the 'default' below, which just
* returns.
*/
#endif /* PNG_ARM_NEON_API_SUPPORTED */
#ifdef PNG_ARM_NEON_CHECK_SUPPORTED
{
static volatile sig_atomic_t no_neon = -1; /* not checked */

if (no_neon < 0)
no_neon = !png_have_neon(pp);

if (no_neon)
return;
}
#ifdef PNG_ARM_NEON_API_SUPPORTED
break;
#endif
#endif /* PNG_ARM_NEON_CHECK_SUPPORTED */

#ifdef PNG_ARM_NEON_API_SUPPORTED
default: /* OFF or INVALID */
return;

case PNG_OPTION_ON:
/* Option turned on */
break;
}
#endif

/* IMPORTANT: any new external functions used here must be declared using
* PNG_INTERNAL_FUNCTION in ../pngpriv.h. This is required so that the
* 'prefix' option to configure works:
*
* ./configure --with-libpng-prefix=foobar_
/* IMPORTANT: DO NOT DEFINE EXTERNAL FUNCTIONS HERE
*
* Verify you have got this right by running the above command, doing a build
* and examining pngprefix.h; it must contain a #define for every external
* function you add. (Notice that this happens automatically for the
* initialization function.)
* This is because external functions must be declared with
* PNG_INTERNAL_FUNCTION in pngpriv.h; without this the PNG_PREFIX option to
* the build will not work (it will not know about these symbols).
*/
pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon;

Expand All @@ -135,5 +49,185 @@ png_init_filter_functions_neon(png_structp pp, unsigned int bpp)
png_read_filter_row_paeth4_neon;
}
}
#endif /* PNG_ARM_NEON_OPT > 0 */
#endif /* READ */

#define png_target_init_filter_functions_impl png_init_filter_functions_neon

#ifdef PNG_TARGET_STORES_DATA
/* png_target_free_data_impl
* Must be defined if the implementation stores data in
* png_struct::target_data. Need not be defined otherwise.
*/
static void
png_target_free_data_arm(png_structrp pp)
{
png_voidp ptr = pp->target_data;
pp->target_data = NULL;
png_free(pp, ptr);
}
#define png_target_free_data_impl png_target_free_data_arm
#endif /* TARGET_STORES_DATA */

#ifdef PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE
/* png_target_do_expand_palette_impl [flag: png_target_expand_palette]
* static function
* OPTIONAL
* Handles the transform. Need not be defined, only called if the
* state contains png_target_<transform>, may set this flag to zero, may
* return false to indicate that the transform was not done (so the
* C implementation must then execute).
*/
#include "palette_neon_intrinsics.c"

static int
png_target_do_expand_palette_neon(png_structrp png_ptr, png_row_infop row_info,
png_bytep row, png_const_colorp palette, png_const_bytep trans_alpha,
int num_trans)
{
/* NOTE: it is important that this is done. row_info->width is not a CSE
* because the pointer is not declared with the 'restrict' parameter, this
* makes it a CSE but then it is very important that no one changes it in
* this function, hence the const.
*/
const png_uint_32 row_width = row_info->width;

/* NOTE: this is pretty much the original code:
*
* 1) The original code only works when the original PNG has 8-bits per
* palette. This test was in pngrtran.c and is now here.
*
* 2) The original code starts at the end and works backward but then stops
* when it is within 16 bytes of the start. It then left the remainder to
* the original code in pngrtran.c That code is now here.
*
* 3) The original code takes pointers to the end of the input and the end of
* the output; this is the way png_do_expand_palette works becuase it
* has to copy down from the end (otherwise it would overwrite the input
* data before it read it). Note that the row buffer is aliased by
* these two pointers.
*
* A consequence of passing pointers is that the row pointers (input and
* output) are forced into memory (they can't be in registers). This
* could be fixed and some compilers may be able to handle this but
* no changes have been made to the original ARM code at this point.
*/
if (row_info->color_type == PNG_COLOR_TYPE_PALETTE &&
row_info->bit_depth == 8 /* <8 requires a bigger "riffled" palette */)
{
png_const_bytep sp = row + (row_width - 1); /* 8 bit palette index */
if (num_trans > 0)
{
/* This case needs a "riffled" palette. In this implementation the
* initialization is done here, on demand.
*/
if (png_ptr->target_data == NULL)
{
/* Initialize the accelerated palette expansion.
*
* The data is now allocated using png_malloc_warn so the code
* does not error out on OOM.
*/
png_ptr->target_data = png_malloc_warn(png_ptr, 256 * 4);

/* On allocation error it is essential to clear the flag or a
* massive number of warnings will be output.
*/
if (png_ptr->target_data != NULL)
png_riffle_palette_neon(png_ptr->target_data, palette,
trans_alpha, num_trans);
else
goto clear_flag;
}

/* This is the general convention in the core transform code; when
* expanding the number of bytes in the row copy down (necessary) and
* pass a pointer to the last byte, not the first.
*
* It does not have to be preserved here but maybe it is better this
* way despite the fact that the comments in the neon palette code
* obfuscate what is happening.
*/
png_bytep dp = row + (4/*RGBA*/*row_width - 1);

/* Cosmin Truta: "Sometimes row_info->bit_depth has been changed to 8.
* In these cases, the palette hasn't been riffled."
*
* John Bowler: Explanation: The code in png_do_palette_expand
* *invariably* changes the bit depth to 8. So low palette bit depth
* gets expanded to 8 and png_row_info is adjusted to reflect this (see
* png_do_palette_expand), however the "riffle" initialization code
* checked the original png_ptr bit depth, so it didn't know this would
* happen...
*
* This could be changed; the original bit depth is irrelevant to the
* initialization code.
*/
png_uint_32 i = png_target_do_expand_palette_rgba8_neon(
png_ptr->target_data, row_info->width, &sp, &dp);

if (i == 0) /* nothing was done */
return 0; /* Return here: interlaced images start out narrow */

/* Now 'i' make not have reached row_width.
* NOTE: [i] is not the index into the row buffer, rather than is
* [row_width-i], this is the way it is done in the original
* png_do_expand_palette.
*/
for (; i < row_width; i++)
{
if ((int)(*sp) >= num_trans)
*dp-- = 0xff;
else
*dp-- = trans_alpha[*sp];
*dp-- = palette[*sp].blue;
*dp-- = palette[*sp].green;
*dp-- = palette[*sp].red;
sp--;
}

/* Finally update row_info to reflect the expanded output: */
row_info->bit_depth = 8;
row_info->pixel_depth = 32;
row_info->rowbytes = row_width * 4;
row_info->color_type = 6;
row_info->channels = 4;
return 1;
}
else
{
/* No tRNS chunk (num_trans == 0), expand to RGB not RGBA. */
png_bytep dp = row + (3/*RGB*/*row_width - 1);

png_uint_32 i = png_target_do_expand_palette_rgb8_neon(palette,
row_info->width, &sp, &dp);

if (i == 0)
return 0; /* Return here: interlaced images start out narrow */

/* Finish the last bytes: */
for (; i < row_width; i++)
{
*dp-- = palette[*sp].blue;
*dp-- = palette[*sp].green;
*dp-- = palette[*sp].red;
sp--;
}

row_info->bit_depth = 8;
row_info->pixel_depth = 24;
row_info->rowbytes = row_width * 3;
row_info->color_type = 2;
row_info->channels = 3;
return 1;
}
}

clear_flag:
/* Here on malloc failure and on an inapplicable image. */
png_ptr->target_state &= ~png_target_expand_palette;
return 0;
}

#define png_target_do_expand_palette_impl png_target_do_expand_palette_neon
/* EXPAND_PALETTE */

#endif /*TODO*/
Loading