From fbec76ec7d3985dbb1f1becb4463267a8b15718b Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Sat, 28 Dec 2019 10:07:24 -0600 Subject: [PATCH] Static methods to make empty arrays of a given type. (#38) In the end, they weren't static methods on `Content` objects, but normal methods on `Type` objects. * [WIP] Static methods to make empty arrays of a given type. * Added Type::empty to all types. * Using it in EmptyArray::astype. We're done. --- VERSION_INFO | 2 +- include/awkward/type/ArrayType.h | 1 + include/awkward/type/ListType.h | 1 + include/awkward/type/OptionType.h | 1 + include/awkward/type/PrimitiveType.h | 1 + include/awkward/type/RecordType.h | 1 + include/awkward/type/RegularType.h | 1 + include/awkward/type/Type.h | 3 +++ include/awkward/type/UnionType.h | 1 + include/awkward/type/UnknownType.h | 1 + src/libawkward/array/EmptyArray.cpp | 2 +- src/libawkward/type/ArrayType.cpp | 7 +++++ src/libawkward/type/ListType.cpp | 8 ++++++ src/libawkward/type/OptionType.cpp | 5 ++++ src/libawkward/type/PrimitiveType.cpp | 31 ++++++++++++++++++++++ src/libawkward/type/RecordType.cpp | 9 +++++++ src/libawkward/type/RegularType.cpp | 6 +++++ src/libawkward/type/UnionType.cpp | 8 ++++++ src/libawkward/type/UnknownType.cpp | 5 ++++ src/pyawkward.cpp | 24 ++++++++--------- tests/test_PR038_emptyarray_astype.py | 37 +++++++++++++++++++++++++++ 21 files changed, 141 insertions(+), 14 deletions(-) create mode 100644 tests/test_PR038_emptyarray_astype.py diff --git a/VERSION_INFO b/VERSION_INFO index 9f42295fc2..5786113b6a 100644 --- a/VERSION_INFO +++ b/VERSION_INFO @@ -1 +1 @@ -0.1.37 +0.1.38 diff --git a/include/awkward/type/ArrayType.h b/include/awkward/type/ArrayType.h index 933fe3be53..8d6045d5d8 100644 --- a/include/awkward/type/ArrayType.h +++ b/include/awkward/type/ArrayType.h @@ -18,6 +18,7 @@ namespace awkward { const std::string key(int64_t fieldindex) const override; bool haskey(const std::string& key) const override; const std::vector keys() const override; + const std::shared_ptr empty() const override; const std::shared_ptr type() const; int64_t length() const; diff --git a/include/awkward/type/ListType.h b/include/awkward/type/ListType.h index e2cc38980a..6a1f231397 100644 --- a/include/awkward/type/ListType.h +++ b/include/awkward/type/ListType.h @@ -18,6 +18,7 @@ namespace awkward { const std::string key(int64_t fieldindex) const override; bool haskey(const std::string& key) const override; const std::vector keys() const override; + const std::shared_ptr empty() const override; const std::shared_ptr type() const; diff --git a/include/awkward/type/OptionType.h b/include/awkward/type/OptionType.h index f2cb2723ad..6886e2ae04 100644 --- a/include/awkward/type/OptionType.h +++ b/include/awkward/type/OptionType.h @@ -18,6 +18,7 @@ namespace awkward { const std::string key(int64_t fieldindex) const override; bool haskey(const std::string& key) const override; const std::vector keys() const override; + const std::shared_ptr empty() const override; const std::shared_ptr type() const; diff --git a/include/awkward/type/PrimitiveType.h b/include/awkward/type/PrimitiveType.h index 80645e7952..e647bc30b9 100644 --- a/include/awkward/type/PrimitiveType.h +++ b/include/awkward/type/PrimitiveType.h @@ -33,6 +33,7 @@ namespace awkward { const std::string key(int64_t fieldindex) const override; bool haskey(const std::string& key) const override; const std::vector keys() const override; + const std::shared_ptr empty() const override; const DType dtype() const; diff --git a/include/awkward/type/RecordType.h b/include/awkward/type/RecordType.h index 6071372d8f..3513181e0c 100644 --- a/include/awkward/type/RecordType.h +++ b/include/awkward/type/RecordType.h @@ -29,6 +29,7 @@ namespace awkward { const std::string key(int64_t fieldindex) const override; bool haskey(const std::string& key) const override; const std::vector keys() const override; + const std::shared_ptr empty() const override; const std::shared_ptr field(int64_t fieldindex) const; const std::shared_ptr field(const std::string& key) const; diff --git a/include/awkward/type/RegularType.h b/include/awkward/type/RegularType.h index 20fde5d707..92774dff18 100644 --- a/include/awkward/type/RegularType.h +++ b/include/awkward/type/RegularType.h @@ -20,6 +20,7 @@ namespace awkward { const std::string key(int64_t fieldindex) const override; bool haskey(const std::string& key) const override; const std::vector keys() const override; + const std::shared_ptr empty() const override; const std::shared_ptr type() const; int64_t size() const; diff --git a/include/awkward/type/Type.h b/include/awkward/type/Type.h index d985b62f78..d273a08bac 100644 --- a/include/awkward/type/Type.h +++ b/include/awkward/type/Type.h @@ -10,6 +10,8 @@ #include "awkward/cpu-kernels/util.h" namespace awkward { + class Content; + class Type { public: typedef std::map Parameters; @@ -27,6 +29,7 @@ namespace awkward { virtual const std::string key(int64_t fieldindex) const = 0; virtual bool haskey(const std::string& key) const = 0; virtual const std::vector keys() const = 0; + virtual const std::shared_ptr empty() const = 0; const Parameters parameters() const; void setparameters(const Parameters& parameters); diff --git a/include/awkward/type/UnionType.h b/include/awkward/type/UnionType.h index 6107348a07..757c9117e5 100644 --- a/include/awkward/type/UnionType.h +++ b/include/awkward/type/UnionType.h @@ -20,6 +20,7 @@ namespace awkward { const std::string key(int64_t fieldindex) const override; bool haskey(const std::string& key) const override; const std::vector keys() const override; + const std::shared_ptr empty() const override; int64_t numtypes() const; const std::vector> types() const; diff --git a/include/awkward/type/UnknownType.h b/include/awkward/type/UnknownType.h index 1836ce5867..244ecea49c 100644 --- a/include/awkward/type/UnknownType.h +++ b/include/awkward/type/UnknownType.h @@ -18,6 +18,7 @@ namespace awkward { const std::string key(int64_t fieldindex) const override; bool haskey(const std::string& key) const override; const std::vector keys() const override; + const std::shared_ptr empty() const override; private: }; diff --git a/src/libawkward/array/EmptyArray.cpp b/src/libawkward/array/EmptyArray.cpp index eeef6d54ab..80dd5712a9 100644 --- a/src/libawkward/array/EmptyArray.cpp +++ b/src/libawkward/array/EmptyArray.cpp @@ -40,7 +40,7 @@ namespace awkward { } const std::shared_ptr EmptyArray::astype(const std::shared_ptr& type) const { - return std::make_shared(id_, type); + return type.get()->empty(); } const std::string EmptyArray::tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const { diff --git a/src/libawkward/type/ArrayType.cpp b/src/libawkward/type/ArrayType.cpp index 975e7492c9..c0fa4a80d3 100644 --- a/src/libawkward/type/ArrayType.cpp +++ b/src/libawkward/type/ArrayType.cpp @@ -55,6 +55,13 @@ namespace awkward { return type_.get()->keys(); } + const std::shared_ptr ArrayType::empty() const { + if (length_ != 0) { + throw std::invalid_argument(std::string("ArrayType with length ") + std::to_string(length_) + std::string(" does not describe an empty array")); + } + return type_.get()->empty(); + } + int64_t ArrayType::length() const { return length_; } diff --git a/src/libawkward/type/ListType.cpp b/src/libawkward/type/ListType.cpp index 2548a5b031..81697ce6a7 100644 --- a/src/libawkward/type/ListType.cpp +++ b/src/libawkward/type/ListType.cpp @@ -3,6 +3,7 @@ #include #include +#include "awkward/array/ListOffsetArray.h" #include "awkward/type/UnknownType.h" #include "awkward/type/OptionType.h" @@ -65,6 +66,13 @@ namespace awkward { return type_.get()->keys(); } + const std::shared_ptr ListType::empty() const { + Index64 offsets(1); + offsets.ptr().get()[0] = 0; + std::shared_ptr content = type_.get()->empty(); + return std::make_shared(Identity::none(), Type::none(), offsets, content); + } + const std::shared_ptr ListType::type() const { return type_; } diff --git a/src/libawkward/type/OptionType.cpp b/src/libawkward/type/OptionType.cpp index edeebcd39e..365ea482c2 100644 --- a/src/libawkward/type/OptionType.cpp +++ b/src/libawkward/type/OptionType.cpp @@ -72,6 +72,11 @@ namespace awkward { return type_.get()->keys(); } + const std::shared_ptr OptionType::empty() const { + std::shared_ptr content = type_.get()->empty(); + throw std::runtime_error("OptionType::empty() needs OptionArray"); + } + const std::shared_ptr OptionType::type() const { std::shared_ptr out = type_; while (OptionType* t = dynamic_cast(out.get())) { diff --git a/src/libawkward/type/PrimitiveType.cpp b/src/libawkward/type/PrimitiveType.cpp index e2f9fce0ac..2a4bc8a4ce 100644 --- a/src/libawkward/type/PrimitiveType.cpp +++ b/src/libawkward/type/PrimitiveType.cpp @@ -3,6 +3,7 @@ #include #include +#include "awkward/array/NumpyArray.h" #include "awkward/type/UnknownType.h" #include "awkward/type/OptionType.h" @@ -80,6 +81,36 @@ namespace awkward { throw std::invalid_argument("type contains no Records"); } + const std::shared_ptr PrimitiveType::empty() const { + std::shared_ptr ptr(new uint8_t[0], awkward::util::array_deleter()); + std::vector shape({ 0 }); + std::vector strides({ 0 }); + ssize_t itemsize; + std::string format; + switch (dtype_) { + case boolean: itemsize = 1; format = "?"; break; + case int8: itemsize = 1; format = "b"; break; + case uint8: itemsize = 1; format = "B"; break; + case int16: itemsize = 2; format = "h"; break; + case uint16: itemsize = 2; format = "H"; break; +#ifdef _MSC_VER + case int32: itemsize = 4; format = "l"; break; + case uint32: itemsize = 4; format = "L"; break; + case int64: itemsize = 8; format = "q"; break; + case uint64: itemsize = 8; format = "Q"; break; +#else + case int32: itemsize = 4; format = "i"; break; + case uint32: itemsize = 4; format = "I"; break; + case int64: itemsize = 8; format = "l"; break; + case uint64: itemsize = 8; format = "L"; break; +#endif + case float32: itemsize = 4; format = "f"; break; + case float64: itemsize = 8; format = "d"; break; + default: throw std::runtime_error(std::string("unexpected dtype: ") + std::to_string(dtype_)); + } + return std::make_shared(Identity::none(), Type::none(), ptr, shape, strides, 0, itemsize, format); + } + const PrimitiveType::DType PrimitiveType::dtype() const { return dtype_; } diff --git a/src/libawkward/type/RecordType.cpp b/src/libawkward/type/RecordType.cpp index e0a6350f60..dcbbc57e3d 100644 --- a/src/libawkward/type/RecordType.cpp +++ b/src/libawkward/type/RecordType.cpp @@ -3,6 +3,7 @@ #include #include +#include "awkward/array/RecordArray.h" #include "awkward/type/UnknownType.h" #include "awkward/type/OptionType.h" #include "awkward/util.h" @@ -160,6 +161,14 @@ namespace awkward { return util::keys(recordlookup_, numfields()); } + const std::shared_ptr RecordType::empty() const { + std::vector> contents; + for (auto type : types_) { + contents.push_back(type.get()->empty()); + } + return std::make_shared(Identity::none(), Type::none(), contents, recordlookup_); + } + const std::shared_ptr RecordType::field(int64_t fieldindex) const { if (fieldindex >= numfields()) { throw std::invalid_argument(std::string("fieldindex ") + std::to_string(fieldindex) + std::string(" for record with only " + std::to_string(numfields()) + std::string(" fields"))); diff --git a/src/libawkward/type/RegularType.cpp b/src/libawkward/type/RegularType.cpp index 08cc383efd..49c5260111 100644 --- a/src/libawkward/type/RegularType.cpp +++ b/src/libawkward/type/RegularType.cpp @@ -3,6 +3,7 @@ #include #include +#include "awkward/array/RegularArray.h" #include "awkward/type/UnknownType.h" #include "awkward/type/OptionType.h" @@ -66,6 +67,11 @@ namespace awkward { return type_.get()->keys(); } + const std::shared_ptr RegularType::empty() const { + std::shared_ptr content = type_.get()->empty(); + return std::make_shared(Identity::none(), Type::none(), content, size_); + } + const std::shared_ptr RegularType::type() const { return type_; } diff --git a/src/libawkward/type/UnionType.cpp b/src/libawkward/type/UnionType.cpp index 173f7b6f01..82a4bc6ab5 100644 --- a/src/libawkward/type/UnionType.cpp +++ b/src/libawkward/type/UnionType.cpp @@ -86,6 +86,14 @@ namespace awkward { return types_; } + const std::shared_ptr UnionType::empty() const { + std::vector> contents; + for (auto type : types_) { + contents.push_back(type.get()->empty()); + } + throw std::runtime_error("UnionType::empty() needs UnionArray"); + } + const std::shared_ptr UnionType::type(int64_t index) const { return types_[(size_t)index]; } diff --git a/src/libawkward/type/UnknownType.cpp b/src/libawkward/type/UnknownType.cpp index 4ddf1088da..f17e3ed34e 100644 --- a/src/libawkward/type/UnknownType.cpp +++ b/src/libawkward/type/UnknownType.cpp @@ -3,6 +3,7 @@ #include #include +#include "awkward/array/EmptyArray.h" #include "awkward/type/UnknownType.h" namespace awkward { @@ -60,4 +61,8 @@ namespace awkward { const std::vector UnknownType::keys() const { throw std::invalid_argument("type contains no Records"); } + + const std::shared_ptr UnknownType::empty() const { + return std::make_shared(Identity::none(), Type::none()); + } } diff --git a/src/pyawkward.cpp b/src/pyawkward.cpp index 7b250aced7..2ba2fd4752 100644 --- a/src/pyawkward.cpp +++ b/src/pyawkward.cpp @@ -49,18 +49,6 @@ class pyobject_deleter { PyObject* pyobj_; }; -py::class_> make_Type(py::handle m, std::string name) { - return (py::class_>(m, name.c_str()) - .def("__eq__", [](std::shared_ptr self, std::shared_ptr other) -> bool { - return self.get()->equal(other, true); - }) - .def("__ne__", [](std::shared_ptr self, std::shared_ptr other) -> bool { - return !self.get()->equal(other, true); - }) - ); -} - - py::object box(std::shared_ptr t) { if (ak::ArrayType* raw = dynamic_cast(t.get())) { return py::cast(*raw); @@ -799,6 +787,17 @@ void setparameters(T& self, py::object parameters) { self.setparameters(dict2parameters(parameters)); } +py::class_> make_Type(py::handle m, std::string name) { + return (py::class_>(m, name.c_str()) + .def("__eq__", [](std::shared_ptr self, std::shared_ptr other) -> bool { + return self.get()->equal(other, true); + }) + .def("__ne__", [](std::shared_ptr self, std::shared_ptr other) -> bool { + return !self.get()->equal(other, true); + }) + ); +} + template py::class_ type_methods(py::class_, ak::Type>& x) { return x.def("__repr__", &T::tostring) @@ -816,6 +815,7 @@ py::class_ type_methods(py::class_, ak::Type> .def("key", &T::key) .def("haskey", &T::haskey) .def("keys", &T::keys) + .def("empty", &T::empty) ; } diff --git a/tests/test_PR038_emptyarray_astype.py b/tests/test_PR038_emptyarray_astype.py new file mode 100644 index 0000000000..3305641624 --- /dev/null +++ b/tests/test_PR038_emptyarray_astype.py @@ -0,0 +1,37 @@ +# BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +import sys + +import pytest +import numpy + +import awkward1 + +def test_typeempty(): + assert numpy.asarray(awkward1.layout.PrimitiveType("bool").empty()).dtype == numpy.dtype("bool") + assert numpy.asarray(awkward1.layout.PrimitiveType("int8").empty()).dtype == numpy.dtype("int8") + assert numpy.asarray(awkward1.layout.PrimitiveType("int16").empty()).dtype == numpy.dtype("int16") + assert numpy.asarray(awkward1.layout.PrimitiveType("int32").empty()).dtype == numpy.dtype("int32") + assert numpy.asarray(awkward1.layout.PrimitiveType("int64").empty()).dtype == numpy.dtype("int64") + assert numpy.asarray(awkward1.layout.PrimitiveType("uint8").empty()).dtype == numpy.dtype("uint8") + assert numpy.asarray(awkward1.layout.PrimitiveType("uint16").empty()).dtype == numpy.dtype("uint16") + assert numpy.asarray(awkward1.layout.PrimitiveType("uint32").empty()).dtype == numpy.dtype("uint32") + assert numpy.asarray(awkward1.layout.PrimitiveType("uint64").empty()).dtype == numpy.dtype("uint64") + assert numpy.asarray(awkward1.layout.PrimitiveType("float32").empty()).dtype == numpy.dtype("float32") + assert numpy.asarray(awkward1.layout.PrimitiveType("float64").empty()).dtype == numpy.dtype("float64") + assert type(awkward1.layout.UnknownType().empty()) is awkward1.layout.EmptyArray + assert type(awkward1.layout.ArrayType(awkward1.layout.UnknownType(), 0).empty()) is awkward1.layout.EmptyArray + assert type(awkward1.layout.RegularType(awkward1.layout.UnknownType(), 5).empty()) is awkward1.layout.RegularArray + assert type(awkward1.layout.ListType(awkward1.layout.UnknownType()).empty()) is awkward1.layout.ListOffsetArray64 + array = awkward1.layout.RecordType({"one": awkward1.layout.PrimitiveType("float64"), "two": awkward1.layout.ListType(awkward1.layout.PrimitiveType("float64"))}).empty() + assert type(array) is awkward1.layout.RecordArray + assert type(array["one"]) is awkward1.layout.NumpyArray + assert numpy.asarray(array["one"]).dtype == numpy.dtype("float64") + assert type(array["two"]) is awkward1.layout.ListOffsetArray64 + +def test_astype(): + empty = awkward1.layout.EmptyArray() + assert numpy.asarray(empty.astype(awkward1.layout.PrimitiveType("bool"))).dtype == numpy.dtype("bool") + assert numpy.asarray(empty.astype(awkward1.layout.PrimitiveType("uint8"))).dtype == numpy.dtype("uint8") + assert numpy.asarray(empty.astype(awkward1.layout.PrimitiveType("float64"))).dtype == numpy.dtype("float64") + assert type(empty.astype(awkward1.layout.ListType(awkward1.layout.PrimitiveType("float64")))) is awkward1.layout.ListOffsetArray64