Skip to content

Commit

Permalink
add serialization methods to host buffer
Browse files Browse the repository at this point in the history
  • Loading branch information
Matt711 committed Nov 14, 2024
1 parent 658ba08 commit 89cb0c3
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 7 deletions.
14 changes: 7 additions & 7 deletions python/cudf/cudf/_lib/copying.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@ import pickle
from libcpp cimport bool
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

import pylibcudf

import cudf
from cudf.core.buffer import acquire_spill_lock, as_buffer

from cudf.core.abc import Serializable
from cudf._lib.column cimport Column

from cudf._lib.scalar import as_device_scalar
Expand Down Expand Up @@ -331,7 +330,7 @@ def get_element(Column input_column, size_type index):
)


class PackedColumns:
class PackedColumns(Serializable):
"""
A packed representation of a Frame, with all columns residing
in a single GPU memory buffer.
Expand Down Expand Up @@ -366,6 +365,7 @@ class PackedColumns:

header["column-names"] = self.column_names
header["index-names"] = self.index_names
header["metadata"] = self._metadata.obj
for name, dtype in self.column_dtypes.items():
dtype_header, dtype_frames = dtype.serialize()
self.column_dtypes[name] = (
Expand All @@ -375,7 +375,7 @@ class PackedColumns:
frames.extend(dtype_frames)
header["column-dtypes"] = self.column_dtypes
header["type-serialized"] = pickle.dumps(type(self))
return header, ((self._metadata, self._gpu_data), frames)
return header, frames

@classmethod
def deserialize(cls, header, frames):
Expand All @@ -384,12 +384,12 @@ class PackedColumns:
dtype_header, (start, stop) = dtype
column_dtypes[name] = pickle.loads(
dtype_header["type-serialized"]
).deserialize(dtype_header, frames[1][start:stop])
packed_metadata, packed_gpu_data = frames[0]
).deserialize(dtype_header, frames[start:stop])
return cls(
plc.contiguous_split.pack(
plc.contiguous_split.unpack_from_memoryviews(
packed_metadata, packed_gpu_data
memoryview(header["metadata"]),
plc.gpumemoryview(frames[0]),
)
),
header["column-names"],
Expand Down
16 changes: 16 additions & 0 deletions python/pylibcudf/pylibcudf/contiguous_split.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,22 @@ cdef class HostBuffer:
def __releasebuffer__(self, Py_buffer *buffer):
pass

def __getstate__(self):
return memoryview(self)[:].tobytes()

def __setstate__(self, state):
cdef unique_ptr[vector[uint8_t]] new_c_obj = unique_ptr[vector[uint8_t]](
new vector[uint8_t]()
)
new_c_obj.get().reserve(len(state))
for byte in state:
new_c_obj.get().push_back(<uint8_t>byte)

self.c_obj = move(new_c_obj)
self.nbytes = dereference(self.c_obj).size()
self.shape[0] = self.nbytes
self.strides[0] = 1


cdef class PackedColumns:
"""Column data in a serialized format.
Expand Down

0 comments on commit 89cb0c3

Please sign in to comment.