diff --git a/mupdf/Makefile b/mupdf/Makefile index 8297eb7f8b5c..22926298259f 100755 --- a/mupdf/Makefile +++ b/mupdf/Makefile @@ -454,6 +454,7 @@ incdir ?= $(prefix)/include mandir ?= $(prefix)/share/man docdir ?= $(prefix)/share/doc/mupdf pydir ?= $(shell python3 -c "import sysconfig; print(sysconfig.get_path('platlib'))") +SO_INSTALL_MODE ?= 644 third: $(THIRD_LIB) extra-libs: $(THIRD_GLUT_LIB) @@ -612,13 +613,17 @@ $(error OUT=$(OUT) does not contain shared) endif # C++, Python and C# shared libraries. +# +# To disable automatic use of a venv, use `make VENV_FLAG= ...` or `VENV_FLAG= +# make ...`. +# +VENV_FLAG ?= --venv c++-%: shared-% - ./scripts/mupdfwrap.py --venv -d $(OUT) -b 01 + ./scripts/mupdfwrap.py $(VENV_FLAG) -d $(OUT) -b 01 python-%: c++-% - ./scripts/mupdfwrap.py --venv -d $(OUT) -b 23 + ./scripts/mupdfwrap.py $(VENV_FLAG) -d $(OUT) -b 23 csharp-%: c++-% - ./scripts/mupdfwrap.py --venv -d $(OUT) -b --csharp 23 - + ./scripts/mupdfwrap.py $(VENV_FLAG) -d $(OUT) -b --csharp 23 # Installs of C, C++, Python and C# shared libraries # @@ -632,21 +637,21 @@ endif install-shared-c: install-shared-check shared install-headers install -d $(DESTDIR)$(libdir) - install -m 644 $(OUT)/libmupdf.$(SO)$(SO_VERSION) $(DESTDIR)$(libdir)/ + install -m $(SO_INSTALL_MODE) $(OUT)/libmupdf.$(SO)$(SO_VERSION) $(DESTDIR)$(libdir)/ ifneq ($(OS),OpenBSD) ln -sf libmupdf.$(SO)$(SO_VERSION) $(DESTDIR)$(libdir)/libmupdf.$(SO) endif install-shared-c++: install-shared-c c++ install -m 644 platform/c++/include/mupdf/*.h $(DESTDIR)$(incdir)/mupdf - install -m 644 $(OUT)/libmupdfcpp.$(SO)$(SO_VERSION) $(DESTDIR)$(libdir)/ + install -m $(SO_INSTALL_MODE) $(OUT)/libmupdfcpp.$(SO)$(SO_VERSION) $(DESTDIR)$(libdir)/ ifneq ($(OS),OpenBSD) ln -sf libmupdfcpp.$(SO)$(SO_VERSION) $(DESTDIR)$(libdir)/libmupdfcpp.$(SO) endif install-shared-python: install-shared-c++ python install -d $(DESTDIR)$(pydir)/mupdf - install -m 644 $(OUT)/_mupdf.$(SO) $(DESTDIR)$(pydir)/mupdf + install -m $(SO_INSTALL_MODE) $(OUT)/_mupdf.$(SO) $(DESTDIR)$(pydir)/mupdf install -m 644 $(OUT)/mupdf.py $(DESTDIR)$(pydir)/mupdf/__init__.py else diff --git a/mupdf/Makelists b/mupdf/Makelists index 9a7c7bf607e6..8857d58ceed9 100644 --- a/mupdf/Makelists +++ b/mupdf/Makelists @@ -586,6 +586,7 @@ TESSERACT_SRC += thirdparty/tesseract/src/api/pdfrenderer.cpp TESSERACT_SRC += thirdparty/tesseract/src/api/renderer.cpp TESSERACT_SRC += thirdparty/tesseract/src/api/wordstrboxrenderer.cpp TESSERACT_SRC += thirdparty/tesseract/src/arch/dotproduct.cpp +TESSERACT_SRC += thirdparty/tesseract/src/arch/dotproductneon.cpp TESSERACT_SRC += thirdparty/tesseract/src/arch/intsimdmatrix.cpp TESSERACT_SRC += thirdparty/tesseract/src/arch/simddetect.cpp TESSERACT_SRC += thirdparty/tesseract/src/ccmain/applybox.cpp diff --git a/mupdf/include/mupdf/fitz/string-util.h b/mupdf/include/mupdf/fitz/string-util.h index d5edec11ce87..e6280a271c65 100644 --- a/mupdf/include/mupdf/fitz/string-util.h +++ b/mupdf/include/mupdf/fitz/string-util.h @@ -249,6 +249,12 @@ const char *fz_runeptr(const char *str, int idx); */ int fz_utflen(const char *s); +/* + Convert a wchar string into a new heap allocated utf8 one. +*/ +char *fz_utf8_from_wchar(fz_context *ctx, const wchar_t *s); + + /** Locale-independent decimal to binary conversion. On overflow return (-)INFINITY and set errno to ERANGE. On underflow return diff --git a/mupdf/include/mupdf/fitz/system.h b/mupdf/include/mupdf/fitz/system.h index 3d9823187ef5..57202bf985db 100644 --- a/mupdf/include/mupdf/fitz/system.h +++ b/mupdf/include/mupdf/fitz/system.h @@ -165,9 +165,6 @@ static __inline int signbit(double x) #ifdef _WIN32 -char *fz_utf8_from_wchar(const wchar_t *s); -wchar_t *fz_wchar_from_utf8(const char *s); - /* really a FILE* but we don't want to include stdio.h here */ void *fz_fopen_utf8(const char *name, const char *mode); int fz_remove_utf8(const char *name); @@ -184,6 +181,8 @@ void fz_free_argv(int argc, char **argv); int64_t fz_stat_ctime(const char *path); int64_t fz_stat_mtime(const char *path); +int fz_mkdir(char *path); + /* inline is standard in C++. For some compilers we can enable it within * C too. Some compilers think they know better than we do about when diff --git a/mupdf/platform/gl/gl-main.c b/mupdf/platform/gl/gl-main.c index e6e2e5e9d4b0..c246b84ec0a3 100644 --- a/mupdf/platform/gl/gl-main.c +++ b/mupdf/platform/gl/gl-main.c @@ -499,25 +499,6 @@ static void save_history(void) js_freestate(J); } -static int -fz_mkdir(char *path) -{ -#ifdef _WIN32 - int ret; - wchar_t *wpath = fz_wchar_from_utf8(path); - - if (wpath == NULL) - return -1; - - ret = _wmkdir(wpath); - - free(wpath); - - return ret; -#else - return mkdir(path, S_IRWXU | S_IRWXG | S_IRWXO); -#endif -} static int create_accel_path(char outname[], size_t len, int create, const char *absname, ...) { diff --git a/mupdf/platform/win32/libtesseract.vcxproj b/mupdf/platform/win32/libtesseract.vcxproj index cc1b2b16f24d..e6dc927265f9 100644 --- a/mupdf/platform/win32/libtesseract.vcxproj +++ b/mupdf/platform/win32/libtesseract.vcxproj @@ -565,6 +565,7 @@ NotUsing pch.h ..\..\include;..\..\thirdparty\leptonica\src;..\..\thirdparty\tesseract\include;..\..\thirdparty\tesseract\src\api;..\..\thirdparty\tesseract\src\arch;..\..\thirdparty\tesseract\src\ccmain;..\..\thirdparty\tesseract\src\ccstruct;..\..\thirdparty\tesseract\src\ccutil;..\..\thirdparty\tesseract\src\classify;..\..\thirdparty\tesseract\src\dict;..\..\thirdparty\tesseract\src\lstm;..\..\thirdparty\tesseract\src\textord;..\..\thirdparty\tesseract\src\viewer;..\..\thirdparty\tesseract\src\wordrec;..\..\thirdparty\tesseract\src\cutil;..\..\scripts\tesseract + stdcpp17 MultiThreadedDLL @@ -650,4 +651,4 @@ - \ No newline at end of file + diff --git a/mupdf/platform/x11/pdfapp.c b/mupdf/platform/x11/pdfapp.c index f81faaeaad4a..434ec09a0f63 100644 --- a/mupdf/platform/x11/pdfapp.c +++ b/mupdf/platform/x11/pdfapp.c @@ -25,26 +25,6 @@ #define MAX(a,b) ((a) > (b) ? (a) : (b)) #endif -static int -fz_mkdir(char *path) -{ -#ifdef _WIN32 - int ret; - wchar_t *wpath = fz_wchar_from_utf8(path); - - if (wpath == NULL) - return -1; - - ret = _wmkdir(wpath); - - free(wpath); - - return ret; -#else - return mkdir(path, S_IRWXU | S_IRWXG | S_IRWXO); -#endif -} - static int create_accel_path(fz_context *ctx, char outname[], size_t len, int create, const char *absname, ...) { va_list args; diff --git a/mupdf/scripts/pipcl.py b/mupdf/scripts/pipcl.py index e9da93f03ad4..4eb69645a65d 100644 --- a/mupdf/scripts/pipcl.py +++ b/mupdf/scripts/pipcl.py @@ -348,8 +348,8 @@ def __init__(self, assumed to be relative to `root`. `to_` identifies what the file should be called within a wheel - or when installing. If `to_` ends with `/`, the leaf of `from_` - is appended to it. + or when installing. If `to_` is '' or ends with `/`, the leaf + of `from_` is appended to it. Initial `$dist-info/` in `_to` is replaced by `{name}-{version}.dist-info/`; this is useful for license files @@ -1541,7 +1541,7 @@ def build_extension( if command_was_run and darwin(): # We need to patch up references to shared libraries in `libs`. sublibraries = list() - for lib in libs: + for lib in () if libs is None else libs: for libpath in libpaths: found = list() for suffix in '.so', '.dylib': @@ -1745,6 +1745,7 @@ def run( command, capture=False, check=1): else: return (cp.returncode, cp.stdout) if capture else cp.returncode + def darwin(): return sys.platform.startswith( 'darwin') @@ -1760,6 +1761,9 @@ def pyodide(): def linux(): return platform.system() == 'Linux' +def openbsd(): + return platform.system() == 'OpenBSD' + class PythonFlags: ''' Compile/link flags for the current python, for example the include path @@ -1875,6 +1879,8 @@ def macos_patch( library, *sublibraries): log2( f'macos_patch(): library={library} sublibraries={sublibraries}') if not darwin(): return + if not sublibraries: + return subprocess.run( f'otool -L {library}', shell=1, check=1) command = 'install_name_tool' names = [] @@ -2172,6 +2178,36 @@ def _so_suffix(): return sysconfig.get_config_var('EXT_SUFFIX') +def get_soname(path): + ''' + If we are on Linux and `path` is softlink and points to a shared library + for which `objdump -p` contains 'SONAME', return the pointee. Otherwise + return `path`. Useful if Linux shared libraries have been created with + `-Wl,-soname,...`, where we need to embed the versioned library. + ''' + log1(f'{path=} {os.path.abspath(path)=}.') + if linux() and os.path.islink(path): + path2 = os.path.realpath(path) + log1(f'Is link: {path} -> {path2}.') + if subprocess.run(f'objdump -p {path2}|grep SONAME', shell=1, check=0).returncode == 0: + log1(f'SONAME, returning {path2=}.') + return path2 + log1(f'Not SONAME') + elif openbsd(): + # Return newest .so with version suffix. + sos = glob.glob(f'{path}.*') + log1(f'{sos=}') + sos2 = list() + for so in sos: + suffix = so[len(path):] + if not suffix or re.match('^[.][0-9.]*[0-9]$', suffix): + sos2.append(so) + sos2.sort(key=lambda p: os.path.getmtime(p)) + log1(f'{sos2=}') + return sos2[-1] + return path + + def install_dir(root=None): ''' Returns install directory used by `install()`. diff --git a/mupdf/scripts/wrap/__main__.py b/mupdf/scripts/wrap/__main__.py index 855c3fdc41b5..04570ea8e7c3 100644 --- a/mupdf/scripts/wrap/__main__.py +++ b/mupdf/scripts/wrap/__main__.py @@ -1211,6 +1211,7 @@ def _get_m_command( build_dirs, j=None, make=None, m_target=None, m_vars=None): make_args += f' {m_vars}' suffix = None build_prefix = '' + build_suffix = '' in_prefix = True for i, flag in enumerate( flags): if flag in ('x32', 'x64') or re.match('py[0-9]', flag): @@ -1219,6 +1220,7 @@ def _get_m_command( build_dirs, j=None, make=None, m_target=None, m_vars=None): # when creating wheels; we need to ignore # them. jlib.log('Ignoring {flag=}') + build_suffix += f'-{flag}' else: if 0: pass # lgtm [py/unreachable-statement] elif flag == 'debug': @@ -1259,6 +1261,8 @@ def _get_m_command( build_dirs, j=None, make=None, m_target=None, m_vars=None): assert suffix, f'Leaf must contain "shared-" or "fpic-": build_dirs.dir_so={build_dirs.dir_so}' if build_prefix: make_args += f' build_prefix={build_prefix}' + if build_suffix: + make_args += f' build_suffix={build_suffix}' if m_target: make_args += f' {m_target}' command = f'cd {build_dirs.dir_mupdf} &&' @@ -1456,7 +1460,11 @@ def link_l_flags(sos): if state.state_.pyodide: # Don't add '-Wl,-rpath*' etc if building for Pyodide. ld_origin = False - return jlib.link_l_flags( sos, ld_origin) + ret = jlib.link_l_flags( sos, ld_origin) + r = os.environ.get('LDFLAGS') + if r: + ret += f' {r}' + return ret def build( build_dirs, swig_command, args, vs_upgrade, make_command): diff --git a/mupdf/setup.py b/mupdf/setup.py index 49e02508ef25..ba84d366fc33 100644 --- a/mupdf/setup.py +++ b/mupdf/setup.py @@ -309,30 +309,30 @@ def build(): if windows(): infix = '' if sys.maxsize == 2**31 - 1 else '64' names = [ - f'mupdfcpp{infix}.dll', # C and C++. - '_mupdf.pyd', # Python internals. - 'mupdf.py', # Python. + f'{build_dir()}/mupdfcpp{infix}.dll', # C and C++. + f'{build_dir()}/_mupdf.pyd', # Python internals. + f'{build_dir()}/mupdf.py', # Python. ] elif macos(): log( f'Contents of {build_dir()} are:') for leaf in os.listdir(build_dir()): log( f' {leaf}') names = [ - 'libmupdf.dylib', # C. - 'libmupdfcpp.so', # C++. - '_mupdf.so', # Python internals. - 'mupdf.py', # Python. + f'{build_dir()}/libmupdf.dylib', # C. + f'{build_dir()}/libmupdfcpp.so', # C++. + f'{build_dir()}/_mupdf.so', # Python internals. + f'{build_dir()}/mupdf.py', # Python. ] else: names = [ - 'libmupdf.so', # C. - 'libmupdfcpp.so', # C++. - '_mupdf.so', # Python internals. - 'mupdf.py', # Python. + pipcl.get_soname(f'{build_dir()}/libmupdf.so'), # C. + pipcl.get_soname(f'{build_dir()}/libmupdfcpp.so'), # C++. + f'{build_dir()}/_mupdf.so', # Python internals. + f'{build_dir()}/mupdf.py', # Python. ] paths = [] for name in names: - paths.append((f'{build_dir()}/{name}', name)) + paths.append((name, '')) log(f'build(): returning: {paths}') return paths diff --git a/mupdf/source/fitz/string.c b/mupdf/source/fitz/string.c index dec96c32a557..6b2d34862b62 100755 --- a/mupdf/source/fitz/string.c +++ b/mupdf/source/fitz/string.c @@ -938,3 +938,28 @@ void *fz_memmem(const void *h0, size_t k, const void *n0, size_t l) return twoway_memmem(h, h+k, n, l); } + +char * +fz_utf8_from_wchar(fz_context *ctx, const wchar_t *s) +{ + const wchar_t *src = s; + char *d; + char *dst; + int len = 1; + + while (*src) + { + len += fz_runelen(*src++); + } + + d = Memento_label(fz_malloc(ctx, len), "utf8_from_wchar"); + dst = d; + src = s; + while (*src) + { + dst += fz_runetochar(dst, *src++); + } + *dst = 0; + + return d; +} diff --git a/mupdf/source/fitz/time.c b/mupdf/source/fitz/time.c index cfe9ac363582..9ff1efa835fb 100644 --- a/mupdf/source/fitz/time.c +++ b/mupdf/source/fitz/time.c @@ -30,6 +30,7 @@ #include #include #include +#include /* for mkdir */ #ifdef _MSC_VER #ifndef _WINRT @@ -62,8 +63,8 @@ int gettimeofday(struct timeval *tv, struct timezone *tz) #endif /* !_WINRT */ #endif /* _MSC_VER */ -char * -fz_utf8_from_wchar(const wchar_t *s) +static char * +utf8_from_wchar(const wchar_t *s) { const wchar_t *src = s; char *d; @@ -89,8 +90,8 @@ fz_utf8_from_wchar(const wchar_t *s) return d; } -wchar_t * -fz_wchar_from_utf8(const char *s) +static wchar_t * +wchar_from_utf8(const char *s) { wchar_t *d, *r; int c; @@ -115,13 +116,13 @@ fz_fopen_utf8(const char *name, const char *mode) wchar_t *wname, *wmode; FILE *file; - wname = fz_wchar_from_utf8(name); + wname = wchar_from_utf8(name); if (wname == NULL) { return NULL; } - wmode = fz_wchar_from_utf8(mode); + wmode = wchar_from_utf8(mode); if (wmode == NULL) { free(wname); @@ -141,7 +142,7 @@ fz_remove_utf8(const char *name) wchar_t *wname; int n; - wname = fz_wchar_from_utf8(name); + wname = wchar_from_utf8(name); if (wname == NULL) { errno = ENOMEM; @@ -169,7 +170,7 @@ fz_argv_from_wargv(int argc, wchar_t **wargv) for (i = 0; i < argc; i++) { - argv[i] = Memento_label(fz_utf8_from_wchar(wargv[i]), "fz_arg"); + argv[i] = Memento_label(utf8_from_wchar(wargv[i]), "fz_arg"); if (argv[i] == NULL) { fprintf(stderr, "Out of memory while processing command line args!\n"); @@ -195,7 +196,7 @@ fz_stat_ctime(const char *path) struct _stat info; wchar_t *wpath; - wpath = fz_wchar_from_utf8(path); + wpath = wchar_from_utf8(path); if (wpath == NULL) return 0; @@ -214,7 +215,7 @@ fz_stat_mtime(const char *path) struct _stat info; wchar_t *wpath; - wpath = fz_wchar_from_utf8(path); + wpath = wchar_from_utf8(path); if (wpath == NULL) return 0; @@ -227,6 +228,22 @@ fz_stat_mtime(const char *path) return info.st_mtime; } +int +fz_mkdir(char *path) +{ + int ret; + wchar_t *wpath = wchar_from_utf8(path); + + if (wpath == NULL) + return -1; + + ret = _wmkdir(wpath); + + free(wpath); + + return ret; +} + #else int64_t @@ -247,4 +264,10 @@ fz_stat_mtime(const char *path) return info.st_mtime; } +int +fz_mkdir(char *path) +{ + return mkdir(path, S_IRWXU | S_IRWXG | S_IRWXO); +} + #endif /* _WIN32 */ diff --git a/mupdf/source/fitz/unlibarchive.c b/mupdf/source/fitz/unlibarchive.c index dbc753bd17c9..456c0667daf6 100644 --- a/mupdf/source/fitz/unlibarchive.c +++ b/mupdf/source/fitz/unlibarchive.c @@ -369,6 +369,8 @@ fz_open_libarchive_archive_with_stream(fz_context *ctx, fz_stream *file) { fz_libarchive_archive *arch = fz_new_derived_archive(ctx, file, fz_libarchive_archive); int r; + int free_path = 0; + const char *path = NULL; fz_seek(ctx, file, 0, SEEK_SET); @@ -386,6 +388,9 @@ fz_open_libarchive_archive_with_stream(fz_context *ctx, fz_stream *file) arch->super.open_entry = open_libarchive_entry; arch->super.drop_archive = drop_libarchive_archive; + fz_var(free_path); + fz_var(path); + fz_try(ctx) { arch->ctx = ctx; @@ -393,7 +398,6 @@ fz_open_libarchive_archive_with_stream(fz_context *ctx, fz_stream *file) do { struct archive_entry *entry; - const char *path; size_t z; r = archive_read_next_header(arch->archive, &entry); @@ -403,7 +407,13 @@ fz_open_libarchive_archive_with_stream(fz_context *ctx, fz_stream *file) if (r != ARCHIVE_OK) fz_throw(ctx, FZ_ERROR_LIBRARY, "Corrupt archive"); + free_path = 0; path = archive_entry_pathname_utf8(entry); + if (!path) + { + path = fz_utf8_from_wchar(ctx, archive_entry_pathname_w(entry)); + free_path = 1; + } if (!path) continue; @@ -420,6 +430,11 @@ fz_open_libarchive_archive_with_stream(fz_context *ctx, fz_stream *file) z = strlen(path); arch->entries[arch->entries_len] = fz_malloc(ctx, sizeof(entry_t) - 32 + z + 1); memcpy(&arch->entries[arch->entries_len]->name[0], path, z+1); + if (free_path) + { + fz_free(ctx, path); + free_path = 0; + } arch->entries[arch->entries_len]->len = archive_entry_size(entry); arch->entries_len++; @@ -428,6 +443,11 @@ fz_open_libarchive_archive_with_stream(fz_context *ctx, fz_stream *file) libarchive_reset(ctx, arch); } + fz_always(ctx) + { + if (free_path) + fz_free(ctx, path); + } fz_catch(ctx) { arch->ctx = NULL; diff --git a/mupdf/source/pdf/pdf-op-run.c b/mupdf/source/pdf/pdf-op-run.c index 3af5545ec321..d31c8fd90aa8 100644 --- a/mupdf/source/pdf/pdf-op-run.c +++ b/mupdf/source/pdf/pdf-op-run.c @@ -219,6 +219,8 @@ begin_softmask(fz_context *ctx, pdf_run_processor *pr, softmask_save *save) int saved_blendmode; fz_function *tr = NULL; + fz_var(tr); + save->softmask = softmask; if (softmask == NULL) return gstate; @@ -2385,7 +2387,10 @@ static void pdf_run_gs_SMask(fz_context *ctx, pdf_processor *proc, pdf_obj *smas gstate->softmask = pdf_keep_obj(ctx, smask); gstate->softmask_resources = pdf_keep_obj(ctx, pr->rstack->resources); if (tr) + { + pdf_drop_obj(ctx, gstate->softmask_tr); gstate->softmask_tr = pdf_keep_obj(ctx, tr); + } for (i = 0; i < cs_n; ++i) gstate->softmask_bc[i] = bc[i]; gstate->luminosity = luminosity; diff --git a/mupdf/source/tools/mudraw.c b/mupdf/source/tools/mudraw.c index c4ba1edbb006..93bf2f138f2a 100644 --- a/mupdf/source/tools/mudraw.c +++ b/mupdf/source/tools/mudraw.c @@ -1898,26 +1898,6 @@ static void apply_layer_config(fz_context *ctx, fz_document *doc, const char *lc #endif } -static int -fz_mkdir(char *path) -{ -#ifdef _WIN32 - int ret; - wchar_t *wpath = fz_wchar_from_utf8(path); - - if (wpath == NULL) - return -1; - - ret = _wmkdir(wpath); - - free(wpath); - - return ret; -#else - return mkdir(path, S_IRWXU | S_IRWXG | S_IRWXO); -#endif -} - static int create_accel_path(fz_context *ctx, char outname[], size_t len, int create, const char *absname, ...) { va_list args;