diff --git a/src/xz/args.c b/src/xz/args.c index b3743ceaf..37708ca5c 100644 --- a/src/xz/args.c +++ b/src/xz/args.c @@ -21,6 +21,7 @@ bool opt_stdout = false; bool opt_force = false; bool opt_keep_original = false; +bool opt_synchronous = false; bool opt_robot = false; bool opt_ignore_check = false; @@ -217,6 +218,7 @@ parse_real(args_info *args, int argc, char **argv) OPT_LZMA1, OPT_LZMA2, + OPT_SYNCHRONOUS, OPT_SINGLE_STREAM, OPT_NO_SPARSE, OPT_FILES, @@ -249,6 +251,7 @@ parse_real(args_info *args, int argc, char **argv) { "force", no_argument, NULL, 'f' }, { "stdout", no_argument, NULL, 'c' }, { "to-stdout", no_argument, NULL, 'c' }, + { "synchronous", no_argument, NULL, OPT_SYNCHRONOUS }, { "single-stream", no_argument, NULL, OPT_SINGLE_STREAM }, { "no-sparse", no_argument, NULL, OPT_NO_SPARSE }, { "suffix", required_argument, NULL, 'S' }, @@ -655,6 +658,10 @@ parse_real(args_info *args, int argc, char **argv) optarg, 0, UINT64_MAX); break; + case OPT_SYNCHRONOUS: + opt_synchronous = true; + break; + default: message_try_help(); tuklib_exit(E_ERROR, E_ERROR, false); diff --git a/src/xz/args.h b/src/xz/args.h index e693ecd62..7fdf37f14 100644 --- a/src/xz/args.h +++ b/src/xz/args.h @@ -34,7 +34,7 @@ typedef struct { extern bool opt_stdout; extern bool opt_force; extern bool opt_keep_original; -// extern bool opt_recursive; +extern bool opt_synchronous; extern bool opt_robot; extern bool opt_ignore_check; diff --git a/src/xz/coder.c b/src/xz/coder.c index c9899abbe..fda7f765c 100644 --- a/src/xz/coder.c +++ b/src/xz/coder.c @@ -1244,6 +1244,12 @@ coder_normal(file_pair *pair) if (coder_write_output(pair)) break; + // If --flush-timeout is combined with + // --synchronous, each flush is also + // synced to permanent storage. + if (opt_synchronous && io_sync_dest(pair)) + break; + // Mark that we haven't seen any new input // since the previous flush. pair->src_has_seen_input = false; diff --git a/src/xz/file_io.c b/src/xz/file_io.c index d8a7597ba..76698474a 100644 --- a/src/xz/file_io.c +++ b/src/xz/file_io.c @@ -17,6 +17,7 @@ # include #else # include +# include static bool warn_fchown; #endif @@ -56,6 +57,10 @@ static bool warn_fchown; # define S_ISREG(m) (((m) & _S_IFMT) == _S_IFREG) #endif +#if defined(_WIN32) && !defined(__CYGWIN__) +# define fsync _commit +#endif + #ifndef O_BINARY # define O_BINARY 0 #endif @@ -64,6 +69,14 @@ static bool warn_fchown; # define O_NOCTTY 0 #endif +#ifndef O_SEARCH +# define O_SEARCH O_RDONLY +#endif + +#ifndef O_DIRECTORY +# define O_DIRECTORY 0 +#endif + // Using this macro to silence a warning from gcc -Wlogical-op. #if EAGAIN == EWOULDBLOCK # define IS_EAGAIN_OR_EWOULDBLOCK(e) ((e) == EAGAIN) @@ -450,6 +463,63 @@ io_copy_attrs(const file_pair *pair) } +extern bool +io_sync_dest(file_pair *pair) +{ + assert(pair->dest_fd != -1); + + if (fsync(pair->dest_fd)) { + // If dest_fd is STDOUT_FILENO, we might be writing to + // something which cannot be synced and thus syncing will + // fail with EINVAL. Ignore the error in that case and + // return successfully. + // + // FIXME? On Windows, fsync() is actually _commit() due + // to the #define at the top of this file. _commit() sets + // errno to EBADF instead of EINVAL when stdout is console + // or nul. Ignoring EBADF wouldn't be good though because + // it might be that _commit() uses EBADF for *all* errors. + // It feels safest to leave this broken for cases where + // stdout isn't a regular file. + if (errno == EINVAL) { + assert(pair->dest_fd == STDOUT_FILENO); + return false; + } + + message_error(_("%s: Synchronizing the file failed: %s"), + tuklib_mask_nonprint(pair->dest_name), + strerror(errno)); + return true; + } + +#ifndef TUKLIB_DOSLIKE + // If we have a file descriptor of the directory that contains + // the file, try to sync the directory. + if (pair->dir_fd != -1) { + if (fsync(pair->dir_fd)) { + message_error(_("%s: Synchronizing the directory of " + "the file failed: %s"), + tuklib_mask_nonprint(pair->dest_name), + strerror(errno)); + return true; + } + + // With the combination of --flush-timeout and --synchronous, + // this function may be called multiple times for the same + // file. The directory needs to be synced only once though. + // + // NOTE: This is correct but weird. A typical use case of + // --flush-timeout writes to stdout, and then dir_fd isn't + // available. + (void)close(pair->dir_fd); + pair->dir_fd = -1; + } +#endif + + return false; +} + + /// Opens the source file. Returns false on success, true on error. static bool io_open_src_real(file_pair *pair) @@ -717,6 +787,9 @@ io_open_src(const char *src_name) .dest_name = NULL, .src_fd = -1, .dest_fd = -1, +#ifndef TUKLIB_DOSLIKE + .dir_fd = -1, +#endif .src_eof = false, .src_has_seen_input = false, .flush_needed = false, @@ -819,6 +892,46 @@ io_open_dest_real(file_pair *pair) if (pair->dest_name == NULL) return true; +#ifndef TUKLIB_DOSLIKE + // If --synchronous is used, open also the directory + // so that we can sync it. + if (opt_synchronous) { + char *buf = xstrdup(pair->dest_name); + const char *dir_name = dirname(buf); + + // O_NOCTTY and O_NONBLOCK are there in case + // O_DIRECTORY is 0 and dir_name doesn't refer + // to a directory. (We opened the source file + // already but directories might have been renamed + // after the source file was opened.) + pair->dir_fd = open(dir_name, O_SEARCH | O_DIRECTORY + | O_NOCTTY | O_NONBLOCK); + if (pair->dir_fd == -1) { + // Since we did open the source file + // successfully, we should rarely get here. + // Perhaps something has been renamed or + // had its permissions changed. + // + // In an odd case, the directory has write + // and search permissions but not read + // permission (d-wx------), and O_SEARCH is + // actually O_RDONLY. Then we would be able + // to create a new file and only the directory + // syncing would be impossible. But if user + // specifies --synchronous, let's be strict + // about it. + message_error(_("%s: Opening the directory " + "failed: %s"), + tuklib_mask_nonprint(dir_name), + strerror(errno)); + free(buf); + goto error; + } + + free(buf); + } +#endif + #ifdef __DJGPP__ struct stat st; if (stat(pair->dest_name, &st) == 0) { @@ -828,8 +941,7 @@ io_open_dest_real(file_pair *pair) "a DOS special file", tuklib_mask_nonprint( pair->dest_name)); - free(pair->dest_name); - return true; + goto error; } // Check that we aren't overwriting the source file. @@ -839,8 +951,7 @@ io_open_dest_real(file_pair *pair) "as the input file", tuklib_mask_nonprint( pair->dest_name)); - free(pair->dest_name); - return true; + goto error; } } #endif @@ -850,8 +961,7 @@ io_open_dest_real(file_pair *pair) message_error(_("%s: Cannot remove: %s"), tuklib_mask_nonprint(pair->dest_name), strerror(errno)); - free(pair->dest_name); - return true; + goto error; } // Open the file. @@ -867,9 +977,12 @@ io_open_dest_real(file_pair *pair) message_error(_("%s: %s"), tuklib_mask_nonprint(pair->dest_name), strerror(errno)); - free(pair->dest_name); - return true; + goto error; } + + // If using --synchronous, we could sync dir_fd now and + // close it. However, performance can be better if this is + // delayed until dest_fd has been synced in io_sync_dest(). } if (fstat(pair->dest_fd, &pair->dest_st)) { @@ -901,9 +1014,7 @@ io_open_dest_real(file_pair *pair) // dest_fd needs to be reset to -1 to keep io_close() working. (void)close(pair->dest_fd); pair->dest_fd = -1; - - free(pair->dest_name); - return true; + goto error; } #elif !defined(TUKLIB_DOSLIKE) else if (try_sparse && opt_mode == MODE_DECOMPRESS) { @@ -975,6 +1086,18 @@ io_open_dest_real(file_pair *pair) #endif return false; + +error: +#ifndef TUKLIB_DOSLIKE + // io_close() closes pair->dir_fd but let's do it here anyway. + if (pair->dir_fd != -1) { + (void)close(pair->dir_fd); + pair->dir_fd = -1; + } +#endif + + free(pair->dest_name); + return true; } @@ -1074,6 +1197,19 @@ io_close(file_pair *pair, bool success) if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO) io_copy_attrs(pair); + // Synchronize the file (and possibly its directory) if requested. + if (opt_synchronous) { + if (success && pair->dest_fd != -1) + success = !io_sync_dest(pair); + +#ifndef TUKLIB_DOSLIKE + // If io_sync_dest() was successfully called, it + // already closed dir_fd. Otherwise we do it here. + if (pair->dir_fd != -1) + (void)close(pair->dir_fd); +#endif + } + // Close the destination first. If it fails, we must not remove // the source file! if (io_close_dest(pair, success)) diff --git a/src/xz/file_io.h b/src/xz/file_io.h index f7b2f49e6..dc4f48619 100644 --- a/src/xz/file_io.h +++ b/src/xz/file_io.h @@ -55,6 +55,12 @@ typedef struct { /// File descriptor of the target file int dest_fd; +#ifndef TUKLIB_DOSLIKE + /// File descriptor of the directory of the target file (which is + /// also the directory of the source file) + int dir_fd; +#endif + /// True once end of the source file has been detected. bool src_eof; @@ -180,3 +186,12 @@ extern bool io_pread(file_pair *pair, io_buf *buf, size_t size, uint64_t pos); /// \return On success, false is returned. On error, error message /// is printed and true is returned. extern bool io_write(file_pair *pair, const io_buf *buf, size_t size); + + +/// \brief Synchronizes the destination file to permanent storage +/// +/// \param pair File pair having the destination file open for writing +/// +/// \return On success, false is returned. On error, error message +/// is printed and true is returned. +extern bool io_sync_dest(file_pair *pair); diff --git a/src/xz/message.c b/src/xz/message.c index 1ff592ec4..12e7552da 100644 --- a/src/xz/message.c +++ b/src/xz/message.c @@ -1069,7 +1069,8 @@ message_help(bool long_help) e |= tuklib_wrapf(stdout, &wrap2, " --block-size=%s\v%s\r" " --block-list=%s\v%s\r" - " --flush-timeout=%s\v%s", + " --flush-timeout=%s\v%s\r" + " --synchronous\v%s", _("SIZE"), W_("start a new .xz block after every SIZE bytes " "of input; use this to set the block size " @@ -1084,7 +1085,10 @@ message_help(bool long_help) W_("when compressing, if more than NUM " "milliseconds has passed since the previous " "flush and reading more input would block, " - "all pending data is flushed out")); + "all pending data is flushed out"), + W_("synchronize the output file to the storage device " + "after successful compression, " + "decompression, or flushing")); e |= tuklib_wrapf(stdout, &wrap2, " --memlimit-compress=%1$s\n" diff --git a/src/xz/sandbox.c b/src/xz/sandbox.c index 5a12f69b6..9cea6a419 100644 --- a/src/xz/sandbox.c +++ b/src/xz/sandbox.c @@ -226,7 +226,9 @@ sandbox_init(void) // rights because files are created with open() using O_EXCL and // without O_TRUNC. // - // LANDLOCK_ACCESS_FS_READ_DIR is included here to get a clear error + // LANDLOCK_ACCESS_FS_READ_DIR is required when using --synchronous. + // + // LANDLOCK_ACCESS_FS_READ_DIR is also helpful to show a clear error // message if xz is given a directory name. Without this permission // the message would be "Permission denied" but with this permission // it's "Is a directory, skipping". It could be worked around with diff --git a/src/xz/xz.1 b/src/xz/xz.1 index 5b880e81e..710949bc8 100644 --- a/src/xz/xz.1 +++ b/src/xz/xz.1 @@ -1039,6 +1039,41 @@ is unsuitable for decompressing the stream in real time due to how .B xz does buffering. .TP +.B \-\-synchronous +After successful compression or decompression, +synchronize the target file to the storage device. +The synchronization is done before possibly removing the source file, +thus at least one of the two files should exist after a system crash. +.IP "" +On POSIX systems, +also the directory containing the file is synchronized +.BR "except when writing to standard output". +If the directory isn't synchronized, +it's possible that the directory entry for the file won't exist after a crash +even if the file itself has been synchronized. +A workaround is to create an empty file first and synchronize the directory. +An example using +.BR sync (1) +from GNU coreutils: +.RS +.IP "" +.nf +.ft CR +touch foo/bar/target.xz +sync foo/bar +sometool | xz --synchronous > foo/bar/target.xz +.ft R +.fi +.RE +.IP "" +If +.B \-\-synchronous +is used together with +.BR \-\-flush\-timeout , +the file is also synchronized after every flush. +This way all the data until the most recent flush +should be recoverable after a system crash. +.TP .BI \-\-memlimit\-compress= limit Set a memory usage limit for compression. If this option is specified multiple times,