From fc6800387c53978500c791e1002e4a0f63d414f0 Mon Sep 17 00:00:00 2001 From: Krzysztof Kowalczyk Date: Wed, 8 Nov 2023 21:57:27 -0500 Subject: [PATCH] update mupdf --- mupdf/include/mupdf/fitz/filter.h | 9 ++ mupdf/source/fitz/unlibarchive.c | 250 ++++++++++++++++++++++++++++-- mupdf/source/html/html-parse.c | 7 +- mupdf/source/xps/xps-link.c | 32 ++-- mupdf/source/xps/xps-path.c | 8 +- 5 files changed, 283 insertions(+), 23 deletions(-) diff --git a/mupdf/include/mupdf/fitz/filter.h b/mupdf/include/mupdf/fitz/filter.h index cae8ef45ef82..e5630f371749 100644 --- a/mupdf/include/mupdf/fitz/filter.h +++ b/mupdf/include/mupdf/fitz/filter.h @@ -155,6 +155,15 @@ fz_stream *fz_open_faxd(fz_context *ctx, fz_stream *chain, fz_stream *fz_open_flated(fz_context *ctx, fz_stream *chain, int window_bits); /** + libarchived filter performs generic compressed decoding of data + in any format understood by libarchive from the chained filter. + + This will throw an exception if libarchive is not built in, or + if the compression format is not recognised. +*/ +fz_stream *fz_open_libarchived(fz_context *ctx, fz_stream *chain); + +/** lzwd filter performs LZW decoding of data read from the chained filter. diff --git a/mupdf/source/fitz/unlibarchive.c b/mupdf/source/fitz/unlibarchive.c index 36ac1b551c6d..5668bf1d95cc 100644 --- a/mupdf/source/fitz/unlibarchive.c +++ b/mupdf/source/fitz/unlibarchive.c @@ -109,6 +109,7 @@ libarchive_read(struct archive *a, void *client_data, const void **buf) fz_catch(ctx) { /* Ignore error */ + archive_set_error(a, ARCHIVE_FATAL, "%s", fz_caught_message(ctx)); return -1; } @@ -131,6 +132,7 @@ libarchive_skip(struct archive *a, void *client_data, la_int64_t skip) fz_catch(ctx) { /* Ignore error */ + archive_set_error(a, ARCHIVE_FATAL, "%s", fz_caught_message(ctx)); return -1; } @@ -152,6 +154,7 @@ libarchive_seek(struct archive *a, void *client_data, la_int64_t offset, int whe fz_catch(ctx) { /* Ignore error */ + archive_set_error(a, ARCHIVE_FATAL, "%s", fz_caught_message(ctx)); return -1; } @@ -161,8 +164,6 @@ libarchive_seek(struct archive *a, void *client_data, la_int64_t offset, int whe static int libarchive_close(struct archive *a, void *client_data) { - fz_libarchive_archive *arch = (fz_libarchive_archive *)client_data; - /* Nothing to do. Stream is dropped when the fz_archive is closed. */ return ARCHIVE_OK; } @@ -236,7 +237,7 @@ lookup_archive_entry(fz_context *ctx, fz_libarchive_archive *arch, const char *n for (idx = 0; idx < arch->entries_len; idx++) { - if (!strcmp(name, arch->entries[idx]->name)) + if (!strcmp(name, (const char *)arch->entries[idx]->name)) return idx; } @@ -254,7 +255,7 @@ static const char *list_libarchive_entry(fz_context *ctx, fz_archive *arch_, int fz_libarchive_archive *arch = (fz_libarchive_archive *)arch_; if (idx < 0 || idx >= arch->entries_len) return NULL; - return arch->entries[idx]->name; + return (const char *)arch->entries[idx]->name; } static int count_libarchive_entries(fz_context *ctx, fz_archive *arch_) @@ -267,10 +268,10 @@ static fz_buffer * read_libarchive_entry(fz_context *ctx, fz_archive *arch_, const char *name) { fz_libarchive_archive *arch = (fz_libarchive_archive *)arch_; - fz_stream *file = arch->super.file; fz_buffer *ubuf = NULL; int idx; struct archive_entry *entry; + la_ssize_t ret; size_t size; idx = lookup_archive_entry(ctx, arch, name); @@ -303,9 +304,11 @@ read_libarchive_entry(fz_context *ctx, fz_archive *arch_, const char *name) size = arch->entries[idx]->len; ubuf = fz_new_buffer(ctx, size); - ubuf->len = archive_read_data(arch->archive, ubuf->data, size); - if (ubuf->len != size) - fz_warn(ctx, "Premature end of data reading archive entry data (%z vs %z)", (size_t)ubuf->len, (size_t)size); + ret = archive_read_data(arch->archive, ubuf->data, size); + if (ret < 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to read archive data"); + if ((size_t)ret != size) + fz_warn(ctx, "Premature end of data reading archive entry data (%zu vs %zu)", (size_t)ubuf->len, (size_t)size); } fz_always(ctx) arch->ctx = NULL; @@ -321,7 +324,6 @@ read_libarchive_entry(fz_context *ctx, fz_archive *arch_, const char *name) static fz_stream * open_libarchive_entry(fz_context *ctx, fz_archive *arch_, const char *name) { - fz_libarchive_archive *arch = (fz_libarchive_archive *)arch_; fz_buffer *buf = read_libarchive_entry(ctx, arch_, name); fz_stream *stm = NULL; @@ -374,6 +376,10 @@ fz_open_libarchive_archive_with_stream(fz_context *ctx, fz_stream *file) if (r != ARCHIVE_OK) fz_throw(ctx, FZ_ERROR_GENERIC, "Corrupt archive"); + path = archive_entry_pathname_utf8(entry); + if (!path) + continue; + if (arch->entries_len == arch->entries_max) { int new_max = arch->entries_max * 2; @@ -384,8 +390,6 @@ fz_open_libarchive_archive_with_stream(fz_context *ctx, fz_stream *file) arch->entries_max = new_max; } - path = archive_entry_pathname_utf8(entry); - z = strlen(path); arch->entries[arch->entries_len] = fz_malloc(ctx, sizeof(entry_t) - 32 + z + 1); memcpy(&arch->entries[arch->entries_len]->name[0], path, z+1); @@ -425,6 +429,222 @@ fz_open_libarchive_archive(fz_context *ctx, const char *filename) return tar; } + +/* Universal decomp stream */ + +typedef struct +{ + fz_stream *chain; + fz_context *ctx; /* Safe as not persistent. */ + struct archive *archive; + struct archive_entry *entry; + uint8_t block[4096]; +} fz_libarchived_state; + +static la_ssize_t +libarchived_read(struct archive *a, void *client_data, const void **buf) +{ + fz_libarchived_state *state = (fz_libarchived_state *)client_data; + size_t z; + uint8_t *p; + size_t left; + fz_context *ctx = state->ctx; + la_ssize_t ret = 0; + + fz_try(ctx) + { + z = fz_available(ctx, state->chain, 1024); + + /* If we're at the EOF, can't read anything! */ + if (z == 0) + break; + + /* If we have at least 1K, then just return the pointer to that + * directly. */ + if (z >= 1024) + { + *buf = state->chain->rp; + state->chain->rp += z; + ret = (la_ssize_t)z; + break; + } + + /* If not, let's pull a large enough lump out. */ + + left = sizeof(state->block); + p = state->block; + do + { + memcpy(p, state->chain->rp, z); + p += z; + state->chain->rp += z; + left -= z; + if (left) + { + z = fz_available(ctx, state->chain, left); + if (z > left) + z = left; + if (z == 0) + break; + } + } + while (left != 0); + + ret = p - state->block; + *buf = state->block; + } + fz_catch(ctx) + { + /* Ignore error */ + archive_set_error(a, ARCHIVE_FATAL, "%s", fz_caught_message(ctx)); + return -1; + } + + return ret; +} + +static la_int64_t +libarchived_skip(struct archive *a, void *client_data, la_int64_t skip) +{ + fz_libarchived_state *state = (fz_libarchived_state *)client_data; + int64_t pos; + fz_context *ctx = state->ctx; + + fz_try(ctx) + { + pos = fz_tell(state->ctx, state->chain); + fz_seek(state->ctx, state->chain, pos + skip, SEEK_SET); + pos = fz_tell(state->ctx, state->chain) - pos; + } + fz_catch(ctx) + { + /* Ignore error */ + archive_set_error(a, ARCHIVE_FATAL, "%s", fz_caught_message(ctx)); + return -1; + } + + return pos; +} + +static la_int64_t +libarchived_seek(struct archive *a, void *client_data, la_int64_t offset, int whence) +{ + fz_libarchived_state *state = (fz_libarchived_state *)client_data; + fz_context *ctx = state->ctx; + int64_t pos; + + fz_try(ctx) + { + fz_seek(ctx, state->chain, offset, whence); + pos = fz_tell(ctx, state->chain); + } + fz_catch(ctx) + { + /* Ignore error */ + archive_set_error(a, ARCHIVE_FATAL, "%s", fz_caught_message(ctx)); + return -1; + } + + return pos; +} + +static int +libarchived_close(struct archive *a, void *client_data) +{ + /* Nothing to do. Stream is dropped when the fz_stream is dropped. */ + return ARCHIVE_OK; +} + +static int +next_libarchived(fz_context *ctx, fz_stream *stm, size_t required) +{ + fz_libarchived_state *state = stm->state; + la_ssize_t z; + + if (stm->eof) + return EOF; + + z = archive_read_data(state->archive, state->block, sizeof(state->block)); + if (z < 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to read compressed data"); + if (z == 0) + { + stm->eof = 1; + return EOF; + } + + stm->rp = state->block; + stm->wp = state->block + z; + + return *stm->rp++; +} + +static void +close_libarchived(fz_context *ctx, void *state_) +{ + fz_libarchived_state *state = (fz_libarchived_state *)state_; + int code; + + state->ctx = ctx; + code = archive_read_free(state->archive); + state->ctx = NULL; + if (code != ARCHIVE_OK) + fz_warn(ctx, "libarchive error: archive_read_free: %d", code); + + fz_drop_stream(ctx, state->chain); + fz_free(ctx, state); +} + +fz_stream * +fz_open_libarchived(fz_context *ctx, fz_stream *chain) +{ + fz_libarchived_state *state; + int r; + + state = fz_malloc_struct(ctx, fz_libarchived_state); + + state->chain = fz_keep_stream(ctx, chain); + state->archive = archive_read_new(); + archive_read_support_filter_all(state->archive); + archive_read_support_format_raw(state->archive); + + state->ctx = ctx; + r = archive_read_set_seek_callback(state->archive, libarchived_seek); + if (r == ARCHIVE_OK) + r = archive_read_open2(state->archive, state, NULL, libarchived_read, libarchived_skip, libarchived_close); + if (r != ARCHIVE_OK) + { + archive_read_free(state->archive); + state->ctx = NULL; + fz_drop_stream(ctx, state->chain); + fz_free(ctx, state); + fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to open archive"); + } + + r = archive_filter_code(state->archive, 0); + if (r == ARCHIVE_FILTER_NONE) + { + archive_read_free(state->archive); + state->ctx = NULL; + fz_drop_stream(ctx, state->chain); + fz_free(ctx, state); + fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to open archive"); + } + + /* This is the one we want. */ + r = archive_read_next_header(state->archive, &state->entry); + if (r != ARCHIVE_OK) + { + archive_read_free(state->archive); + state->ctx = NULL; + fz_drop_stream(ctx, state->chain); + fz_free(ctx, state); + fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to open archive"); + } + + return fz_new_stream(ctx, state, next_libarchived, close_libarchived); +} + #else int @@ -457,4 +677,12 @@ fz_open_libarchive_archive(fz_context *ctx, const char *filename) return NULL; } +fz_stream * +fz_open_libarchived(fz_context *ctx, fz_stream *chain) +{ + fz_throw(ctx, FZ_ERROR_GENERIC, "libarchive support not included"); + + return NULL; +} + #endif diff --git a/mupdf/source/html/html-parse.c b/mupdf/source/html/html-parse.c index b57de2f3e273..9801cd8b71c5 100644 --- a/mupdf/source/html/html-parse.c +++ b/mupdf/source/html/html-parse.c @@ -2454,7 +2454,7 @@ fz_txt_buffer_to_html(fz_context *ctx, fz_buffer *in) outbuf = fz_new_buffer(ctx, 1024); out = fz_new_output_with_buffer(ctx, outbuf); - fz_write_string(ctx, out, "
");
+		fz_write_string(ctx, out, "
");
 
 		if (encoding == ENCODING_UTF16_LE || encoding == ENCODING_UTF16_BE)
 		{
@@ -2502,6 +2502,11 @@ fz_txt_buffer_to_html(fz_context *ctx, fz_buffer *in)
 				while (n--)
 					fz_write_byte(ctx, out, ' ');
 			}
+			else if (c == 12)
+			{
+				col = -1;
+				fz_write_string(ctx, out, "
\n");
+			}
 			else if (c == '<')
 				fz_write_string(ctx, out, "<");
 			else if (c == '>')
diff --git a/mupdf/source/xps/xps-link.c b/mupdf/source/xps/xps-link.c
index 4edf5463a596..879e47563aae 100644
--- a/mupdf/source/xps/xps-link.c
+++ b/mupdf/source/xps/xps-link.c
@@ -68,8 +68,12 @@ xps_load_links_in_path(fz_context *ctx, xps_document *doc, fz_matrix ctm,
 			path = xps_parse_path_geometry(ctx, doc, dict, data_tag, 0, &fill_rule);
 		if (path)
 		{
-			area = fz_bound_path(ctx, path, NULL, ctm);
-			fz_drop_path(ctx, path);
+			fz_try(ctx)
+				area = fz_bound_path(ctx, path, NULL, ctm);
+			fz_always(ctx)
+				fz_drop_path(ctx, path);
+			fz_catch(ctx)
+				fz_rethrow(ctx);
 			xps_add_link(ctx, doc, area, base_uri, navigate_uri_att, link);
 		}
 	}
@@ -98,7 +102,7 @@ xps_load_links_in_glyphs(fz_context *ctx, xps_document *doc, fz_matrix ctm,
 		int is_sideways = 0;
 		int bidi_level = 0;
 		fz_font *font;
-		fz_text *text;
+		fz_text *text = NULL;
 		fz_rect area;
 
 		xps_resolve_resource_reference(ctx, doc, dict, &transform_att, &transform_tag, NULL);
@@ -113,12 +117,22 @@ xps_load_links_in_glyphs(fz_context *ctx, xps_document *doc, fz_matrix ctm,
 		font = xps_lookup_font(ctx, doc, base_uri, font_uri_att, style_att);
 		if (!font)
 			return;
-		text = xps_parse_glyphs_imp(ctx, doc, ctm, font, fz_atof(font_size_att),
-				fz_atof(origin_x_att), fz_atof(origin_y_att),
-				is_sideways, bidi_level, indices_att, unicode_att);
-		area = fz_bound_text(ctx, text, NULL, ctm);
-		fz_drop_text(ctx, text);
-		fz_drop_font(ctx, font);
+
+		fz_var(text);
+		fz_try(ctx)
+		{
+			text = xps_parse_glyphs_imp(ctx, doc, ctm, font, fz_atof(font_size_att),
+					fz_atof(origin_x_att), fz_atof(origin_y_att),
+					is_sideways, bidi_level, indices_att, unicode_att);
+			area = fz_bound_text(ctx, text, NULL, ctm);
+		}
+		fz_always(ctx)
+		{
+			fz_drop_text(ctx, text);
+			fz_drop_font(ctx, font);
+		}
+		fz_catch(ctx)
+			fz_rethrow(ctx);
 
 		xps_add_link(ctx, doc, area, base_uri, navigate_uri_att, link);
 	}
diff --git a/mupdf/source/xps/xps-path.c b/mupdf/source/xps/xps-path.c
index d3c9a95f48d3..516a590091dc 100644
--- a/mupdf/source/xps/xps-path.c
+++ b/mupdf/source/xps/xps-path.c
@@ -802,8 +802,12 @@ xps_clip(fz_context *ctx, xps_document *doc, fz_matrix ctm, xps_resource *dict,
 		path = xps_parse_path_geometry(ctx, doc, dict, clip_tag, 0, &fill_rule);
 	else
 		path = fz_new_path(ctx);
-	fz_clip_path(ctx, dev, path, fill_rule == 0, ctm, fz_infinite_rect);
-	fz_drop_path(ctx, path);
+	fz_try(ctx)
+		fz_clip_path(ctx, dev, path, fill_rule == 0, ctm, fz_infinite_rect);
+	fz_always(ctx)
+		fz_drop_path(ctx, path);
+	fz_catch(ctx)
+		fz_rethrow(ctx);
 }
 
 void