Skip to content

Commit

Permalink
utils: compare logical extent offset in rm_util_link_type
Browse files Browse the repository at this point in the history
Holes are handled by ignoring them when they are after an extent and
skipping them when they are before one. Since we were only comparing the
current file position, a file with a hole could match a file without one
as long as the physical offsets match and the extents logically end at
the same place. This led to --is-reflink false-positives.

Fix this by comparing the logical offset of each extent.

Fixes sahib#611
  • Loading branch information
cebtenzzre committed Feb 24, 2023
1 parent 2479f4e commit b64f89f
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 14 deletions.
38 changes: 25 additions & 13 deletions lib/utilities.c
Original file line number Diff line number Diff line change
Expand Up @@ -1013,13 +1013,14 @@ static struct fiemap *rm_offset_get_fiemap(int fd, const int n_extents,
return fm;
}

/* Return physical (disk) offset of the beginning of the file extent containing the
/* Return physical (disk) offset of the beginning of the file extent found after the
* specified logical file_offset.
* If a pointer to file_offset_next is provided then read fiemap extents until
* the next non-contiguous extent (fragment) is encountered and writes the corresponding
* file offset to &file_offset_next.
* If a pointer to file_offset_next is provided then read fiemap extents until the next
* non-contiguous extent (fragment) is encountered and writes the corresponding file
* offset to *file_offset_next.
* The logical offset of the found extent is written to *logical_offset.
* */
RmOff rm_offset_get_from_fd(int fd, RmOff file_offset, RmOff *file_offset_next, bool *is_last) {
RmOff rm_offset_get_from_fd(int fd, RmOff file_offset, RmOff *file_offset_next, RmOff *logical_offset, bool *is_last) {
RmOff result = 0;
bool done = FALSE;
bool first = TRUE;
Expand Down Expand Up @@ -1054,7 +1055,10 @@ RmOff rm_offset_get_from_fd(int fd, RmOff file_offset, RmOff *file_offset_next,
if (first) {
/* remember disk location of start of data */
result = fm_ext.fe_physical;
first=FALSE;
if(logical_offset != NULL) {
*logical_offset = fm_ext.fe_logical;
}
first = FALSE;
} else {
/* check if subsequent extents are contiguous */
if(fm_ext.fe_physical != expected) {
Expand Down Expand Up @@ -1100,15 +1104,15 @@ RmOff rm_offset_get_from_path(const char *path, RmOff file_offset,
rm_log_info("Error opening %s in rm_offset_get_from_path\n", path);
return 0;
}
RmOff result = rm_offset_get_from_fd(fd, file_offset, file_offset_next, NULL);
RmOff result = rm_offset_get_from_fd(fd, file_offset, file_offset_next, NULL, NULL);
rm_sys_close(fd);
return result;
}

#else /* Probably FreeBSD */

RmOff rm_offset_get_from_fd(_UNUSED int fd, _UNUSED RmOff file_offset,
_UNUSED RmOff *file_offset_next, _UNUSED bool *is_last) {
RmOff rm_offset_get_from_fd(_UNUSED int fd, _UNUSED RmOff file_offset, _UNUSED RmOff *file_offset_next,
_UNUSED RmOff *logical_offset, _UNUSED bool *is_last) {
return 0;
}

Expand Down Expand Up @@ -1247,11 +1251,11 @@ RmLinkType rm_util_link_type(char *path1, char *path2) {
bool is_last_2 = false;

while(!rm_session_was_aborted()) {
RmOff logical_next_1 = 0;
RmOff logical_next_2 = 0;
RmOff logical_1 = 0, logical_2 = 0;
RmOff logical_next_1 = 0, logical_next_2 = 0;

RmOff physical_1 = rm_offset_get_from_fd(fd1, logical_current, &logical_next_1, &is_last_1);
RmOff physical_2 = rm_offset_get_from_fd(fd2, logical_current, &logical_next_2, &is_last_2);
RmOff physical_1 = rm_offset_get_from_fd(fd1, logical_current, &logical_next_1, &logical_1, &is_last_1);
RmOff physical_2 = rm_offset_get_from_fd(fd2, logical_current, &logical_next_2, &logical_2, &is_last_2);

if(is_last_1 != is_last_2) {
RM_RETURN(RM_LINK_NONE);
Expand All @@ -1262,6 +1266,14 @@ RmLinkType rm_util_link_type(char *path1, char *path2) {
rm_log_debug_line("Physical offsets differ at byte %" G_GUINT64_FORMAT
": %"G_GUINT64_FORMAT "<> %" G_GUINT64_FORMAT,
logical_current, physical_1, physical_2);
#endif
RM_RETURN(RM_LINK_NONE);
}
if(logical_1 != logical_2) {
#if _RM_OFFSET_DEBUG
rm_log_debug_line("Logical offsets differ after %" G_GUINT64_FORMAT
" bytes: %" G_GUINT64_FORMAT "<> %" G_GUINT64_FORMAT,
logical_current, logical_1, logical_2);
#endif
RM_RETURN(RM_LINK_NONE);
}
Expand Down
2 changes: 1 addition & 1 deletion lib/utilities.h
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ bool rm_mounts_can_reflink(RmMountTable *self, dev_t source, dev_t dest);
*
* @return the physical offset starting from the disk.
*/
RmOff rm_offset_get_from_fd(int fd, RmOff file_offset, RmOff *file_offset_next, bool *is_last);
RmOff rm_offset_get_from_fd(int fd, RmOff file_offset, RmOff *file_offset_next, RmOff *logical_offset, bool *is_last);

/**
* @brief Lookup the physical offset of a file path at any given offset.
Expand Down
31 changes: 31 additions & 0 deletions tests/test_mains/test_is_reflink.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,34 @@ def test_last_extent_differs():
@with_setup(usual_setup_func, usual_teardown_func)
def test_reflink_ends_with_hole():
_make_reflink_testcase(extents=1, hole_extents=1)


def _copy_file_range(src, dst, count, offset_src, offset_dst):
bytes_copied = os.copy_file_range(src, dst, count, offset_src, offset_dst)
if bytes_copied < count:
raise RuntimeError('copy_file_range only copied {} bytes (expected {})'.format(bytes_copied, count))


# GitHub issue #611: Make sure holes can be detected when the physical offsets and logical
# extent ends are otherwise the same.
@needs_reflink_fs
@with_setup(usual_setup_func, usual_teardown_func)
def test_hole_before_extent():
path_a = os.path.join(TESTDIR_NAME, 'a')
path_b = os.path.join(TESTDIR_NAME, 'b')

def kb(n): return 1024 * n

_run_dd_urandom(path_a, '8K', 2)
with open(path_b, 'wb') as f:
os.truncate(f.fileno(), kb(16)) # same-sized file with no extents

with open(path_a, 'rb') as fsrc, open(path_b, 'wb') as fdst:
infd, outfd = fsrc.fileno(), fdst.fileno()
# copy first half of first extent with 4K offset
_copy_file_range(infd, outfd, kb(4), kb(0), kb(4))
# copy second extent
_copy_file_range(infd, outfd, kb(8), kb(8), kb(8))

# expect RM_LINK_NONE
check_is_reflink_status(1, path_a, path_b)

0 comments on commit b64f89f

Please sign in to comment.