From 340f585d100e4e8cca82e620b0d44e7da314e4cb Mon Sep 17 00:00:00 2001
From: Andriy Gapon <andriy.gapon@clusterhq.com>
Date: Fri, 30 Oct 2015 13:30:30 +0200
Subject: [PATCH 1/2] add lzc_receive_with_header() and a helper function
 receive_header()

Using these functions will allow to examine the stream metadata that is
included into the stream begin record.  In particular, we will be able
to see the snapshot name used on the sending side, so that we can use
the same snapshot name on the receiving side.

See:
- https://clusterhq.atlassian.net/browse/ZFS-20
- https://reviews.csiden.org/r/256/
- https://reviews.csiden.org/r/256/bugs/6051/
---
 libzfs_core/__init__.py              |   4 +
 libzfs_core/_libzfs_core.py          | 119 +++++++++++++++++++++++++++
 libzfs_core/bindings/libzfs_core.py  |  38 ++++++++-
 libzfs_core/test/test_libzfs_core.py |  39 +++++----
 4 files changed, 181 insertions(+), 19 deletions(-)

diff --git a/libzfs_core/__init__.py b/libzfs_core/__init__.py
index 54c2969..de00bdd 100644
--- a/libzfs_core/__init__.py
+++ b/libzfs_core/__init__.py
@@ -45,6 +45,7 @@
     lzc_send,
     lzc_send_space,
     lzc_receive,
+    lzc_receive_with_header,
     lzc_recv,
     lzc_exists,
     is_supported,
@@ -56,6 +57,7 @@
     lzc_get_props,
     lzc_list_children,
     lzc_list_snaps,
+    receive_header,
 )
 
 __all__ = [
@@ -78,6 +80,7 @@
     'lzc_send',
     'lzc_send_space',
     'lzc_receive',
+    'lzc_receive_with_header',
     'lzc_recv',
     'lzc_exists',
     'is_supported',
@@ -89,6 +92,7 @@
     'lzc_get_props',
     'lzc_list_children',
     'lzc_list_snaps',
+    'receive_header',
 ]
 
 # vim: softtabstop=4 tabstop=4 expandtab shiftwidth=4
diff --git a/libzfs_core/_libzfs_core.py b/libzfs_core/_libzfs_core.py
index 2dbdaed..67d890a 100644
--- a/libzfs_core/_libzfs_core.py
+++ b/libzfs_core/_libzfs_core.py
@@ -652,6 +652,125 @@ def lzc_receive(snapname, fd, force=False, origin=None, props=None):
 lzc_recv = lzc_receive
 
 
+def lzc_receive_with_header(snapname, fd, header, force=False, origin=None, props=None):
+    '''
+    Like :func:`lzc_receive`, but allows the caller to read the begin record
+    and then to pass it in.
+
+    That could be useful if the caller wants to derive, for example,
+    the snapname or the origin parameters based on the information contained in
+    the begin record.
+    :func:`receive_header` can be used to receive the begin record from the file
+    descriptor.
+
+    :param bytes snapname: the name of the snapshot to create.
+    :param int fd: the file descriptor from which to read the stream.
+    :param header: the stream's begin header.
+    :type header: ``cffi`` `CData` representing the header structure.
+    :param bool force: whether to roll back or destroy the target filesystem
+                       if that is required to receive the stream.
+    :param origin: the optional origin snapshot name if the stream is for a clone.
+    :type origin: bytes or None
+    :param props: the properties to set on the snapshot as *received* properties.
+    :type props: dict of bytes : Any
+
+    :raises IOError: if an input / output error occurs while reading from the ``fd``.
+    :raises DatasetExists: if the snapshot named ``snapname`` already exists.
+    :raises DatasetExists: if the stream is a full stream and the destination filesystem already exists.
+    :raises DatasetExists: if ``force`` is `True` but the destination filesystem could not
+                           be rolled back to a matching snapshot because a newer snapshot
+                           exists and it is an origin of a cloned filesystem.
+    :raises StreamMismatch: if an incremental stream is received and the latest
+                            snapshot of the destination filesystem does not match
+                            the source snapshot of the stream.
+    :raises StreamMismatch: if a full stream is received and the destination
+                            filesystem already exists and it has at least one snapshot,
+                            and ``force`` is `False`.
+    :raises StreamMismatch: if an incremental clone stream is received but the specified
+                            ``origin`` is not the actual received origin.
+    :raises DestinationModified: if an incremental stream is received and the destination
+                                 filesystem has been modified since the last snapshot
+                                 and ``force`` is `False`.
+    :raises DestinationModified: if a full stream is received and the destination
+                                 filesystem already exists and it does not have any
+                                 snapshots, and ``force`` is `False`.
+    :raises DatasetNotFound: if the destination filesystem and its parent do not exist.
+    :raises DatasetNotFound: if the ``origin`` is not `None` and does not exist.
+    :raises DatasetBusy: if ``force`` is `True` but the destination filesystem could not
+                         be rolled back to a matching snapshot because a newer snapshot
+                         is held and could not be destroyed.
+    :raises DatasetBusy: if another receive operation is being performed on the
+                         destination filesystem.
+    :raises BadStream: if the stream is corrupt or it is not recognized or it is
+                       a compound stream or it is a clone stream, but ``origin``
+                       is `None`.
+    :raises BadStream: if a clone stream is received and the destination filesystem
+                       already exists.
+    :raises StreamFeatureNotSupported: if the stream has a feature that is not
+                                       supported on this side.
+    :raises PropertyInvalid: if one or more of the specified properties is invalid
+                             or has an invalid type or value.
+    :raises NameInvalid: if the name of either snapshot is invalid.
+    :raises NameTooLong: if the name of either snapshot is too long.
+    '''
+
+    if origin is not None:
+        c_origin = origin
+    else:
+        c_origin = _ffi.NULL
+    if props is None:
+        props = {}
+    nvlist = nvlist_in(props)
+    ret = _lib.lzc_receive_with_header(snapname, nvlist, c_origin, force,
+                                       False, fd, _ffi.addressof(header))
+    errors.lzc_receive_translate_error(ret, snapname, fd, force, origin, props)
+
+
+def receive_header(fd):
+    '''
+    Read the begin record of the ZFS backup stream from the given file descriptor.
+
+    This is a helper function for :func:`lzc_receive_with_header`.
+
+    :param int fd: the file descriptor from which to read the stream.
+    :return: a tuple with two elements where the first one is a Python `dict` representing
+             the fields of the begin record and the second one is an opaque object
+             suitable for passing to :func:`lzc_receive_with_header`.
+    :raises IOError: if an input / output error occurs while reading from the ``fd``.
+
+    At present the following fields can be of interest in the header:
+
+    drr_toname : bytes
+        the name of the snapshot for which the stream has been created
+    drr_toguid : integer
+        the GUID of the snapshot for which the stream has been created
+    drr_fromguid : integer
+        the GUID of the starting snapshot in the case the stream is incremental,
+        zero otherwise
+    drr_flags : integer
+        the flags describing the stream's properties
+    drr_type : integer
+        the type of the dataset for which the stream has been created
+        (volume, filesystem)
+    '''
+    # read sizeof(dmu_replay_record_t) bytes directly into the memort backing 'record'
+    record = _ffi.new("dmu_replay_record_t *")
+    _ffi.buffer(record)[:] = os.read(fd, _ffi.sizeof(record[0]))
+    # get drr_begin member and its representation as a Pythn dict
+    c_header = record.drr_u.drr_begin
+    header = {}
+    for field, descr in _ffi.typeof(c_header).fields:
+        if descr.type.kind == 'primitive':
+            header[field] = getattr(c_header, field)
+        elif descr.type.kind == 'enum':
+            header[field] = getattr(c_header, field)
+        elif descr.type.kind == 'array' and descr.type.item.cname == 'char':
+            header[field] = _ffi.string(getattr(c_header, field))
+        else:
+            raise TypeError('Unexpected field type in drr_begin: ' + str(descr.type))
+    return (header, c_header)
+
+
 def lzc_exists(name):
     '''
     Check if a dataset (a filesystem, or a volume, or a snapshot)
diff --git a/libzfs_core/bindings/libzfs_core.py b/libzfs_core/bindings/libzfs_core.py
index afb3931..686e518 100644
--- a/libzfs_core/bindings/libzfs_core.py
+++ b/libzfs_core/bindings/libzfs_core.py
@@ -20,6 +20,40 @@
         DMU_OST_NUMTYPES
     } dmu_objset_type_t;
 
+    #define MAXNAMELEN 256
+
+    struct drr_begin {
+        uint64_t drr_magic;
+        uint64_t drr_versioninfo; /* was drr_version */
+        uint64_t drr_creation_time;
+        dmu_objset_type_t drr_type;
+        uint32_t drr_flags;
+        uint64_t drr_toguid;
+        uint64_t drr_fromguid;
+        char drr_toname[MAXNAMELEN];
+    };
+
+    typedef struct zio_cksum {
+        uint64_t	zc_word[4];
+    } zio_cksum_t;
+
+    typedef struct dmu_replay_record {
+        enum {
+            DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS,
+            DRR_WRITE, DRR_FREE, DRR_END, DRR_WRITE_BYREF,
+            DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_NUMTYPES
+        } drr_type;
+        uint32_t drr_payloadlen;
+        union {
+            struct drr_begin drr_begin;
+            /* ... */
+            struct drr_checksum {
+                uint64_t drr_pad[34];
+                zio_cksum_t drr_checksum;
+            } drr_checksum;
+        } drr_u;
+    } dmu_replay_record_t;
+
     int libzfs_core_init(void);
     void libzfs_core_fini(void);
 
@@ -38,8 +72,10 @@
     int lzc_get_holds(const char *, nvlist_t **);
 
     int lzc_send(const char *, const char *, int, enum lzc_send_flags);
-    int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, int);
     int lzc_send_space(const char *, const char *, uint64_t *);
+    int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, int);
+    int lzc_receive_with_header(const char *, nvlist_t *, const char *, boolean_t,
+        boolean_t, int, const struct drr_begin *);
 
     boolean_t lzc_exists(const char *);
 
diff --git a/libzfs_core/test/test_libzfs_core.py b/libzfs_core/test/test_libzfs_core.py
index 364c2e5..e5b474f 100644
--- a/libzfs_core/test/test_libzfs_core.py
+++ b/libzfs_core/test/test_libzfs_core.py
@@ -1800,24 +1800,6 @@ def test_recv_incremental(self):
             self.assertTrue(
                 filecmp.cmp(os.path.join(mnt1, name), os.path.join(mnt2, name), False))
 
-    # This test case fails unless unless a patch from
-    # https://clusterhq.atlassian.net/browse/ZFS-20
-    # is applied to libzfs_core, otherwise it succeeds.
-    @unittest.skip("fails with unpatched libzfs_core")
-    def test_recv_without_explicit_snap_name(self):
-        srcfs = ZFSTest.pool.makeName("fs1")
-        src1 = srcfs + "@snap1"
-        src2 = srcfs + "@snap2"
-        dstfs = ZFSTest.pool.makeName("fs2/received-100")
-        dst1 = dstfs + '@snap1'
-        dst2 = dstfs + '@snap2'
-
-        with streams(srcfs, src1, src2) as (_, (full, incr)):
-            lzc.lzc_receive(dstfs, full.fileno())
-            lzc.lzc_receive(dstfs, incr.fileno())
-        self.assertExists(dst1)
-        self.assertExists(dst2)
-
     def test_recv_clone(self):
         orig_src = ZFSTest.pool.makeName("fs2@send-origin")
         clone = ZFSTest.pool.makeName("fs1/fs/send-clone")
@@ -2432,6 +2414,27 @@ def test_recv_incremental_into_cloned_fs(self):
         self.assertExists(dst1)
         self.assertNotExists(dst2)
 
+    def test_recv_with_header_full(self):
+        src = ZFSTest.pool.makeName("fs1@snap")
+        dst = ZFSTest.pool.makeName("fs2/received")
+
+        with temp_file_in_fs(ZFSTest.pool.makeName("fs1")) as name:
+            lzc.lzc_snapshot([src])
+
+        with tempfile.TemporaryFile(suffix='.ztream') as stream:
+            lzc.lzc_send(src, None, stream.fileno())
+            stream.seek(0)
+
+            (header, c_header) = lzc.receive_header(stream.fileno())
+            self.assertEqual(src, header['drr_toname'])
+            snap = header['drr_toname'].split('@', 1)[1]
+            lzc.lzc_receive_with_header(dst + '@' + snap, stream.fileno(), c_header)
+
+        name = os.path.basename(name)
+        with zfs_mount(src) as mnt1, zfs_mount(dst) as mnt2:
+            self.assertTrue(
+                filecmp.cmp(os.path.join(mnt1, name), os.path.join(mnt2, name), False))
+
     def test_send_full_across_clone_branch_point(self):
         origfs = ZFSTest.pool.makeName("fs2")
 

From 15ba755a84b76199d574e67cac404b51c5a6f7ff Mon Sep 17 00:00:00 2001
From: Andriy Gapon <andriy.gapon@clusterhq.com>
Date: Tue, 3 Nov 2015 10:59:38 +0200
Subject: [PATCH 2/2] lzc_receive_with_header now takes dmu_replay_record
 argument

---
 libzfs_core/_libzfs_core.py         | 14 +++++++-------
 libzfs_core/bindings/libzfs_core.py |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/libzfs_core/_libzfs_core.py b/libzfs_core/_libzfs_core.py
index 67d890a..a21a2eb 100644
--- a/libzfs_core/_libzfs_core.py
+++ b/libzfs_core/_libzfs_core.py
@@ -722,7 +722,7 @@ def lzc_receive_with_header(snapname, fd, header, force=False, origin=None, prop
         props = {}
     nvlist = nvlist_in(props)
     ret = _lib.lzc_receive_with_header(snapname, nvlist, c_origin, force,
-                                       False, fd, _ffi.addressof(header))
+                                       False, fd, header)
     errors.lzc_receive_translate_error(ret, snapname, fd, force, origin, props)
 
 
@@ -757,18 +757,18 @@ def receive_header(fd):
     record = _ffi.new("dmu_replay_record_t *")
     _ffi.buffer(record)[:] = os.read(fd, _ffi.sizeof(record[0]))
     # get drr_begin member and its representation as a Pythn dict
-    c_header = record.drr_u.drr_begin
+    drr_begin = record.drr_u.drr_begin
     header = {}
-    for field, descr in _ffi.typeof(c_header).fields:
+    for field, descr in _ffi.typeof(drr_begin).fields:
         if descr.type.kind == 'primitive':
-            header[field] = getattr(c_header, field)
+            header[field] = getattr(drr_begin, field)
         elif descr.type.kind == 'enum':
-            header[field] = getattr(c_header, field)
+            header[field] = getattr(drr_begin, field)
         elif descr.type.kind == 'array' and descr.type.item.cname == 'char':
-            header[field] = _ffi.string(getattr(c_header, field))
+            header[field] = _ffi.string(getattr(drr_begin, field))
         else:
             raise TypeError('Unexpected field type in drr_begin: ' + str(descr.type))
-    return (header, c_header)
+    return (header, record)
 
 
 def lzc_exists(name):
diff --git a/libzfs_core/bindings/libzfs_core.py b/libzfs_core/bindings/libzfs_core.py
index 686e518..1e7fd12 100644
--- a/libzfs_core/bindings/libzfs_core.py
+++ b/libzfs_core/bindings/libzfs_core.py
@@ -75,7 +75,7 @@
     int lzc_send_space(const char *, const char *, uint64_t *);
     int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, int);
     int lzc_receive_with_header(const char *, nvlist_t *, const char *, boolean_t,
-        boolean_t, int, const struct drr_begin *);
+        boolean_t, int, const struct dmu_replay_record *);
 
     boolean_t lzc_exists(const char *);