From 2e20719b91a1300b6383553e571fd65dfce1e202 Mon Sep 17 00:00:00 2001 From: itholic Date: Mon, 15 Feb 2021 16:45:08 +0900 Subject: [PATCH 1/6] Add tests --- databricks/koalas/series.py | 56 ++ .../koalas/tests/test_ops_on_diff_frames.py | 677 ++++++++++++++++++ 2 files changed, 733 insertions(+) diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py index 93b818d2f1..21ba0cd3c1 100644 --- a/databricks/koalas/series.py +++ b/databricks/koalas/series.py @@ -452,6 +452,10 @@ def spark_type(self): def add(self, other) -> "Series": return self + other + def __add__(self, other): + other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other + return IndexOpsMixin.__add__(self, other) + add.__doc__ = _flex_doc_SERIES.format( desc="Addition", op_name="+", @@ -463,6 +467,10 @@ def add(self, other) -> "Series": def radd(self, other) -> "Series": return other + self + def __radd__(self, other): + other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other + return IndexOpsMixin.__radd__(self, other) + radd.__doc__ = _flex_doc_SERIES.format( desc="Reverse Addition", op_name="+", @@ -498,6 +506,10 @@ def rdiv(self, other) -> "Series": def truediv(self, other) -> "Series": return self / other + def __truediv__(self, other): + other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other + return IndexOpsMixin.__truediv__(self, other) + truediv.__doc__ = _flex_doc_SERIES.format( desc="Floating division", op_name="/", @@ -509,6 +521,10 @@ def truediv(self, other) -> "Series": def rtruediv(self, other) -> "Series": return other / self + def __rtruediv__(self, other): + other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other + return IndexOpsMixin.__rtruediv__(self, other) + rtruediv.__doc__ = _flex_doc_SERIES.format( desc="Reverse Floating division", op_name="/", @@ -520,6 +536,10 @@ def rtruediv(self, other) -> "Series": def mul(self, other) -> "Series": return self * other + def __mul__(self, other): + other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other + return IndexOpsMixin.__mul__(self, other) + mul.__doc__ = _flex_doc_SERIES.format( desc="Multiplication", op_name="*", @@ -533,6 +553,10 @@ def mul(self, other) -> "Series": def rmul(self, other) -> "Series": return other * self + def __rmul__(self, other): + other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other + return IndexOpsMixin.__rmul__(self, other) + rmul.__doc__ = _flex_doc_SERIES.format( desc="Reverse Multiplication", op_name="*", @@ -544,6 +568,10 @@ def rmul(self, other) -> "Series": def sub(self, other) -> "Series": return self - other + def __sub__(self, other): + other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other + return IndexOpsMixin.__sub__(self, other) + sub.__doc__ = _flex_doc_SERIES.format( desc="Subtraction", op_name="-", @@ -557,6 +585,10 @@ def sub(self, other) -> "Series": def rsub(self, other) -> "Series": return other - self + def __rsub__(self, other): + other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other + return IndexOpsMixin.__rsub__(self, other) + rsub.__doc__ = _flex_doc_SERIES.format( desc="Reverse Subtraction", op_name="-", @@ -568,6 +600,10 @@ def rsub(self, other) -> "Series": def mod(self, other) -> "Series": return self % other + def __mod__(self, other): + other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other + return IndexOpsMixin.__mod__(self, other) + mod.__doc__ = _flex_doc_SERIES.format( desc="Modulo", op_name="%", @@ -579,6 +615,10 @@ def mod(self, other) -> "Series": def rmod(self, other) -> "Series": return other % self + def __rmod__(self, other): + other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other + return IndexOpsMixin.__rmod__(self, other) + rmod.__doc__ = _flex_doc_SERIES.format( desc="Reverse Modulo", op_name="%", @@ -590,6 +630,10 @@ def rmod(self, other) -> "Series": def pow(self, other) -> "Series": return self ** other + def __pow__(self, other): + other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other + return IndexOpsMixin.__pow__(self, other) + pow.__doc__ = _flex_doc_SERIES.format( desc="Exponential power of series", op_name="**", @@ -601,6 +645,10 @@ def pow(self, other) -> "Series": def rpow(self, other) -> "Series": return other ** self + def __rpow__(self, other): + other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other + return IndexOpsMixin.__rpow__(self, other) + rpow.__doc__ = _flex_doc_SERIES.format( desc="Reverse Exponential power", op_name="**", @@ -612,6 +660,10 @@ def rpow(self, other) -> "Series": def floordiv(self, other) -> "Series": return self // other + def __floordiv__(self, other): + other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other + return IndexOpsMixin.__floordiv__(self, other) + floordiv.__doc__ = _flex_doc_SERIES.format( desc="Integer division", op_name="//", @@ -623,6 +675,10 @@ def floordiv(self, other) -> "Series": def rfloordiv(self, other) -> "Series": return other // self + def __rfloordiv__(self, other): + other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other + return IndexOpsMixin.__rfloordiv__(self, other) + rfloordiv.__doc__ = _flex_doc_SERIES.format( desc="Reverse Integer division", op_name="//", diff --git a/databricks/koalas/tests/test_ops_on_diff_frames.py b/databricks/koalas/tests/test_ops_on_diff_frames.py index 36eacc3d5a..65bd7636f7 100644 --- a/databricks/koalas/tests/test_ops_on_diff_frames.py +++ b/databricks/koalas/tests/test_ops_on_diff_frames.py @@ -1528,6 +1528,595 @@ def test_pow_and_rpow(self): self.assert_eq(pser ** pser_other, (kser ** kser_other).sort_index()) self.assert_eq(pser.rpow(pser_other), kser.rpow(kser_other).sort_index()) + def test_series_add_and_radd(self): + pser = pd.Series([1, 2, 3, 4, 5, 6], name="x") + kser = ks.from_pandas(pser) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.add(pandas_other), kser.add(koalas_other).sort_index()) + self.assert_eq(pser + pandas_other, (kser + koalas_other).sort_index()) + self.assert_eq(pser.radd(pandas_other), kser.radd(koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.add(pandas_other), kser.add(koalas_other).sort_index()) + self.assert_eq(pser + pandas_other, (kser + koalas_other).sort_index()) + self.assert_eq(pser.radd(pandas_other), kser.radd(koalas_other).sort_index()) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.add(other), kser.add(other).sort_index()) + self.assert_eq(pser + other, (kser + other).sort_index()) + self.assert_eq(pser.radd(other), kser.radd(other).sort_index()) + self.assert_eq(other + pser, (other + kser).sort_index()) + else: + self.assert_eq(pser.add(other).rename("x"), kser.add(other).sort_index()) + self.assert_eq((pser + other).rename("x"), (kser + other).sort_index()) + self.assert_eq(pser.radd(other).rename("x"), kser.radd(other).sort_index()) + self.assert_eq((other + pser).rename("x"), (other + kser).sort_index()) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.add(other), kser.add(other).sort_index()) + self.assert_eq(pser + other, (kser + other).sort_index()) + self.assert_eq(pser.radd(other), kser.radd(other).sort_index()) + self.assert_eq(other + pser, (other + kser).sort_index()) + else: + self.assert_eq(pser.add(other).rename("x"), kser.add(other).sort_index()) + self.assert_eq((pser + other).rename("x"), (kser + other).sort_index()) + self.assert_eq(pser.radd(other).rename("x"), kser.radd(other).sort_index()) + self.assert_eq((other + pser).rename("x"), (other + kser).sort_index()) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.add(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser + other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.radd(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other + kser + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.add(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser + other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.radd(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other + kser + + def test_series_sub_and_rsub(self): + pser = pd.Series([1, 2, 3, 4, 5, 6], name="x") + kser = ks.from_pandas(pser) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.sub(pandas_other), kser.sub(koalas_other).sort_index()) + self.assert_eq(pser - pandas_other, (kser - koalas_other).sort_index()) + self.assert_eq(pser.rsub(pandas_other), kser.rsub(koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.sub(pandas_other), kser.sub(koalas_other).sort_index()) + self.assert_eq(pser - pandas_other, (kser - koalas_other).sort_index()) + self.assert_eq(pser.rsub(pandas_other), kser.rsub(koalas_other).sort_index()) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.sub(other), kser.sub(other).sort_index()) + self.assert_eq(pser - other, (kser - other).sort_index()) + # self.assert_eq(pser.rsub(other), kser.rsub(other).sort_index()) + # self.assert_eq(other - pser, (other - kser).sort_index()) + else: + self.assert_eq(pser.sub(other).rename("x"), kser.sub(other).sort_index()) + self.assert_eq((pser - other).rename("x"), (kser - other).sort_index()) + # self.assert_eq(pser.rsub(other).rename("x"), kser.rsub(other).sort_index()) + # self.assert_eq((other - pser).rename("x"), (other - kser).sort_index()) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.sub(other), kser.sub(other).sort_index()) + self.assert_eq(pser - other, (kser - other).sort_index()) + # self.assert_eq(pser.rsub(other), kser.rsub(other).sort_index()) + # self.assert_eq(other - pser, (other - kser).sort_index()) + else: + self.assert_eq(pser.sub(other).rename("x"), kser.sub(other).sort_index()) + self.assert_eq((pser - other).rename("x"), (kser - other).sort_index()) + # self.assert_eq(pser.rsub(other).rename("x"), kser.rsub(other).sort_index()) + # self.assert_eq((other - pser).rename("x"), (other - kser).sort_index()) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.sub(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser - other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.rsub(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other - kser + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.sub(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser - other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.rsub(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other - kser + + def test_series_mul_and_rmul(self): + pser = pd.Series([1, 2, 3, 4, 5, 6], name="x") + kser = ks.from_pandas(pser) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.mul(pandas_other), kser.mul(koalas_other).sort_index()) + self.assert_eq(pser * pandas_other, (kser * koalas_other).sort_index()) + self.assert_eq(pser.rmul(pandas_other), kser.rmul(koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.mul(pandas_other), kser.mul(koalas_other).sort_index()) + self.assert_eq(pser * pandas_other, (kser * koalas_other).sort_index()) + self.assert_eq(pser.rmul(pandas_other), kser.rmul(koalas_other).sort_index()) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.mul(other), kser.mul(other).sort_index()) + self.assert_eq(pser * other, (kser * other).sort_index()) + self.assert_eq(pser.rmul(other), kser.rmul(other).sort_index()) + self.assert_eq(other * pser, (other * kser).sort_index()) + else: + self.assert_eq(pser.mul(other).rename("x"), kser.mul(other).sort_index()) + self.assert_eq((pser * other).rename("x"), (kser * other).sort_index()) + self.assert_eq(pser.rmul(other).rename("x"), kser.rmul(other).sort_index()) + self.assert_eq((other * pser).rename("x"), (other * kser).sort_index()) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.mul(other), kser.mul(other).sort_index()) + self.assert_eq(pser * other, (kser * other).sort_index()) + self.assert_eq(pser.rmul(other), kser.rmul(other).sort_index()) + self.assert_eq(other * pser, (other * kser).sort_index()) + else: + self.assert_eq(pser.mul(other).rename("x"), kser.mul(other).sort_index()) + self.assert_eq((pser * other).rename("x"), (kser * other).sort_index()) + self.assert_eq(pser.rmul(other).rename("x"), kser.rmul(other).sort_index()) + self.assert_eq((other * pser).rename("x"), (other * kser).sort_index()) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.mul(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser * other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.rmul(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other * kser + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.mul(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser * other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.rmul(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other * kser + + def test_series_pow_and_rpow(self): + pser = pd.Series([1, 2, 3, 4, 5, 6], name="x") + kser = ks.from_pandas(pser) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.pow(pandas_other), kser.pow(koalas_other).sort_index()) + self.assert_eq(pser ** pandas_other, (kser ** koalas_other).sort_index()) + self.assert_eq(pser.rpow(pandas_other), kser.rpow(koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.pow(pandas_other), kser.pow(koalas_other).sort_index()) + self.assert_eq(pser ** pandas_other, (kser ** koalas_other).sort_index()) + self.assert_eq(pser.rpow(pandas_other), kser.rpow(koalas_other).sort_index()) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.pow(other), kser.pow(other).sort_index()) + self.assert_eq(pser ** other, (kser ** other).sort_index()) + self.assert_eq(pser.rpow(other), kser.rpow(other).sort_index()) + self.assert_eq(other ** pser, (other ** kser).sort_index()) + else: + self.assert_eq(pser.pow(other).rename("x"), kser.pow(other).sort_index()) + self.assert_eq((pser ** other).rename("x"), (kser ** other).sort_index()) + self.assert_eq(pser.rpow(other).rename("x"), kser.rpow(other).sort_index()) + self.assert_eq((other ** pser).rename("x"), (other ** kser).sort_index()) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.pow(other), kser.pow(other).sort_index()) + self.assert_eq(pser ** other, (kser ** other).sort_index()) + self.assert_eq(pser.rpow(other), kser.rpow(other).sort_index()) + self.assert_eq(other ** pser, (other ** kser).sort_index()) + else: + self.assert_eq(pser.pow(other).rename("x"), kser.pow(other).sort_index()) + self.assert_eq((pser ** other).rename("x"), (kser ** other).sort_index()) + self.assert_eq(pser.rpow(other).rename("x"), kser.rpow(other).sort_index()) + self.assert_eq((other ** pser).rename("x"), (other ** kser).sort_index()) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.pow(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser ** other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.rpow(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other ** kser + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.pow(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser ** other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.rpow(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other ** kser + + def test_series_mod_and_rmod(self): + pser = pd.Series([1, 2, 3, 4, 5, 6], name="x") + kser = ks.from_pandas(pser) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.mod(pandas_other), kser.mod(koalas_other).sort_index()) + self.assert_eq(pser % pandas_other, (kser % koalas_other).sort_index()) + self.assert_eq(pser.rmod(pandas_other), kser.rmod(koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.mod(pandas_other), kser.mod(koalas_other).sort_index()) + self.assert_eq(pser % pandas_other, (kser % koalas_other).sort_index()) + self.assert_eq(pser.rmod(pandas_other), kser.rmod(koalas_other).sort_index()) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.mod(other), kser.mod(other).sort_index()) + self.assert_eq(pser % other, (kser % other).sort_index()) + self.assert_eq(pser.rmod(other), kser.rmod(other).sort_index()) + self.assert_eq(other % pser, (other % kser).sort_index()) + else: + self.assert_eq(pser.mod(other).rename("x"), kser.mod(other).sort_index()) + self.assert_eq((pser % other).rename("x"), (kser % other).sort_index()) + self.assert_eq(pser.rmod(other).rename("x"), kser.rmod(other).sort_index()) + self.assert_eq((other % pser).rename("x"), (other % kser).sort_index()) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.mod(other), kser.mod(other).sort_index()) + self.assert_eq(pser % other, (kser % other).sort_index()) + self.assert_eq(pser.rmod(other), kser.rmod(other).sort_index()) + self.assert_eq(other % pser, (other % kser).sort_index()) + else: + self.assert_eq(pser.mod(other).rename("x"), kser.mod(other).sort_index()) + self.assert_eq((pser % other).rename("x"), (kser % other).sort_index()) + self.assert_eq(pser.rmod(other).rename("x"), kser.rmod(other).sort_index()) + self.assert_eq((other % pser).rename("x"), (other % kser).sort_index()) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.mod(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser % other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.rmod(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other % kser + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.mod(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser % other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.rmod(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other % kser + + def test_series_div_and_rdiv(self): + pser = pd.Series([1, 2, 3, 4, 5, 6], name="x") + kser = ks.from_pandas(pser) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.div(pandas_other), kser.div(koalas_other).sort_index()) + self.assert_eq(pser / pandas_other, (kser / koalas_other).sort_index()) + self.assert_eq(pser.rdiv(pandas_other), kser.rdiv(koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.div(pandas_other), kser.div(koalas_other).sort_index()) + self.assert_eq(pser / pandas_other, (kser / koalas_other).sort_index()) + self.assert_eq(pser.rdiv(pandas_other), kser.rdiv(koalas_other).sort_index()) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.div(other), kser.div(other).sort_index()) + self.assert_eq(pser / other, (kser / other).sort_index()) + self.assert_eq(pser.rdiv(other), kser.rdiv(other).sort_index()) + self.assert_eq(other / pser, (other / kser).sort_index()) + else: + self.assert_eq(pser.div(other).rename("x"), kser.div(other).sort_index()) + self.assert_eq((pser / other).rename("x"), (kser / other).sort_index()) + self.assert_eq(pser.rdiv(other).rename("x"), kser.rdiv(other).sort_index()) + self.assert_eq((other / pser).rename("x"), (other / kser).sort_index()) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.div(other), kser.div(other).sort_index()) + self.assert_eq(pser / other, (kser / other).sort_index()) + self.assert_eq(pser.rdiv(other), kser.rdiv(other).sort_index()) + self.assert_eq(other / pser, (other / kser).sort_index()) + else: + self.assert_eq(pser.div(other).rename("x"), kser.div(other).sort_index()) + self.assert_eq((pser / other).rename("x"), (kser / other).sort_index()) + self.assert_eq(pser.rdiv(other).rename("x"), kser.rdiv(other).sort_index()) + self.assert_eq((other / pser).rename("x"), (other / kser).sort_index()) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.div(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser / other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.rdiv(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other / kser + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.div(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser / other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.rdiv(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other / kser + + def test_series_floordiv_and_rfloordiv(self): + pser = pd.Series([1, 2, 3, 4, 5, 6], name="x") + kser = ks.from_pandas(pser) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.floordiv(pandas_other), kser.floordiv(koalas_other).sort_index()) + self.assert_eq(pser // pandas_other, (kser // koalas_other).sort_index()) + self.assert_eq(pser.rfloordiv(pandas_other), kser.rfloordiv(koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.floordiv(pandas_other), kser.floordiv(koalas_other).sort_index()) + self.assert_eq(pser // pandas_other, (kser // koalas_other).sort_index()) + self.assert_eq(pser.rfloordiv(pandas_other), kser.rfloordiv(koalas_other).sort_index()) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.floordiv(other), kser.floordiv(other).sort_index()) + self.assert_eq(pser // other, (kser // other).sort_index()) + self.assert_eq(pser.rfloordiv(other), kser.rfloordiv(other).sort_index()) + self.assert_eq(other // pser, (other // kser).sort_index()) + else: + self.assert_eq(pser.floordiv(other).rename("x"), kser.floordiv(other).sort_index()) + self.assert_eq((pser // other).rename("x"), (kser // other).sort_index()) + self.assert_eq(pser.rfloordiv(other).rename("x"), kser.rfloordiv(other).sort_index()) + self.assert_eq((other // pser).rename("x"), (other // kser).sort_index()) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.floordiv(other), kser.floordiv(other).sort_index()) + self.assert_eq(pser // other, (kser // other).sort_index()) + self.assert_eq(pser.rfloordiv(other), kser.rfloordiv(other).sort_index()) + self.assert_eq(other // pser, (other // kser).sort_index()) + else: + self.assert_eq(pser.floordiv(other).rename("x"), kser.floordiv(other).sort_index()) + self.assert_eq((pser // other).rename("x"), (kser // other).sort_index()) + self.assert_eq(pser.rfloordiv(other).rename("x"), kser.rfloordiv(other).sort_index()) + self.assert_eq((other // pser).rename("x"), (other // kser).sort_index()) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.floordiv(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser // other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.rfloordiv(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other // kser + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.floordiv(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser // other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.rfloordiv(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other // kser + class OpsOnDiffFramesDisabledTest(ReusedSQLTestCase, SQLTestUtils): @classmethod @@ -1694,3 +2283,91 @@ def test_pow_and_rpow(self): kser ** kser_other with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): kser.rpow(kser_other) + + def test_series_binary_operators(self): + pser = pd.Series([1, 2, 3, 4, 5, 6], name="x") + kser = ks.from_pandas(pser) + + others = ( + ks.Series([np.nan, 1, 3, 4, np.nan, 6], name="x"), + ks.Index([np.nan, 1, 3, 4, np.nan, 6], name="x"), + [np.nan, 1, 3, 4, np.nan, 6], + (np.nan, 1, 3, 4, np.nan, 6), + ) + # `add` and `radd` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.add(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser + other + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.radd(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other + kser + # `rub` and `rsub` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.sub(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser - other + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.rsub(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other - kser + # `mul` and `rmul` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.mul(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser * other + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.rmul(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other * kser + # `pow` and `rpow` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.pow(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser ** other + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.rpow(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other ** kser + # `mod` and `rmod` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.mod(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser % other + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.rmod(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other % kser + # `div` and `rdiv` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.div(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser / other + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.rdiv(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other / kser + # `floordiv` and `rfloordiv` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.floordiv(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser // other + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.rfloordiv(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other // kser From 922ba6a0dfb5fd49cc21a1e200915673ecc26667 Mon Sep 17 00:00:00 2001 From: itholic Date: Mon, 15 Feb 2021 22:41:52 +0900 Subject: [PATCH 2/6] Fix rsub --- databricks/koalas/base.py | 2 +- .../koalas/tests/test_ops_on_diff_frames.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/databricks/koalas/base.py b/databricks/koalas/base.py index eb0c431838..59911ccb4b 100644 --- a/databricks/koalas/base.py +++ b/databricks/koalas/base.py @@ -495,7 +495,7 @@ def __rsub__(self, other) -> Union["Series", "Index"]: return -column_op(F.datediff)(self, F.lit(other)).astype("long") else: raise TypeError("date subtraction can only be applied to date series.") - return column_op(Column.__rsub__)(self, other) + return column_op(lambda left, right: right - left)(self, other) def __rmul__(self, other) -> Union["Series", "Index"]: if isinstance(other, str): diff --git a/databricks/koalas/tests/test_ops_on_diff_frames.py b/databricks/koalas/tests/test_ops_on_diff_frames.py index 65bd7636f7..34c4f0b1e6 100644 --- a/databricks/koalas/tests/test_ops_on_diff_frames.py +++ b/databricks/koalas/tests/test_ops_on_diff_frames.py @@ -1637,26 +1637,26 @@ def test_series_sub_and_rsub(self): if LooseVersion(pd.__version__) >= LooseVersion("1.2"): self.assert_eq(pser.sub(other), kser.sub(other).sort_index()) self.assert_eq(pser - other, (kser - other).sort_index()) - # self.assert_eq(pser.rsub(other), kser.rsub(other).sort_index()) - # self.assert_eq(other - pser, (other - kser).sort_index()) + self.assert_eq(pser.rsub(other), kser.rsub(other).sort_index()) + self.assert_eq(other - pser, (other - kser).sort_index()) else: self.assert_eq(pser.sub(other).rename("x"), kser.sub(other).sort_index()) self.assert_eq((pser - other).rename("x"), (kser - other).sort_index()) - # self.assert_eq(pser.rsub(other).rename("x"), kser.rsub(other).sort_index()) - # self.assert_eq((other - pser).rename("x"), (other - kser).sort_index()) + self.assert_eq(pser.rsub(other).rename("x"), kser.rsub(other).sort_index()) + self.assert_eq((other - pser).rename("x"), (other - kser).sort_index()) # other = tuple other = (np.nan, 1, 3, 4, np.nan, 6) if LooseVersion(pd.__version__) >= LooseVersion("1.2"): self.assert_eq(pser.sub(other), kser.sub(other).sort_index()) self.assert_eq(pser - other, (kser - other).sort_index()) - # self.assert_eq(pser.rsub(other), kser.rsub(other).sort_index()) - # self.assert_eq(other - pser, (other - kser).sort_index()) + self.assert_eq(pser.rsub(other), kser.rsub(other).sort_index()) + self.assert_eq(other - pser, (other - kser).sort_index()) else: self.assert_eq(pser.sub(other).rename("x"), kser.sub(other).sort_index()) self.assert_eq((pser - other).rename("x"), (kser - other).sort_index()) - # self.assert_eq(pser.rsub(other).rename("x"), kser.rsub(other).sort_index()) - # self.assert_eq((other - pser).rename("x"), (other - kser).sort_index()) + self.assert_eq(pser.rsub(other).rename("x"), kser.rsub(other).sort_index()) + self.assert_eq((other - pser).rename("x"), (other - kser).sort_index()) # other = list with the different length other = [np.nan, 1, 3, 4, np.nan] From 02c333420d2d6984b0f2cd7ceaf62eed0bdb6b7a Mon Sep 17 00:00:00 2001 From: itholic Date: Thu, 18 Feb 2021 12:01:11 +0900 Subject: [PATCH 3/6] Add Index --- databricks/koalas/base.py | 30 + databricks/koalas/indexes/base.py | 2 +- databricks/koalas/series.py | 56 -- .../koalas/tests/test_ops_on_diff_frames.py | 518 ++++++++++++++++++ 4 files changed, 549 insertions(+), 57 deletions(-) diff --git a/databricks/koalas/base.py b/databricks/koalas/base.py index 59911ccb4b..f24d85dcf5 100644 --- a/databricks/koalas/base.py +++ b/databricks/koalas/base.py @@ -321,6 +321,8 @@ def spark_column(self) -> Column: __neg__ = column_op(Column.__neg__) def __add__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + other = ks.Index(other, name=self.name) # type: ignore if not isinstance(self.spark.data_type, StringType) and ( (isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, StringType)) or isinstance(other, str) @@ -339,6 +341,8 @@ def __add__(self, other) -> Union["Series", "Index"]: return column_op(Column.__add__)(self, other) def __sub__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + other = ks.Index(other, name=self.name) # type: ignore if ( isinstance(self.spark.data_type, StringType) or (isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, StringType)) @@ -383,6 +387,8 @@ def __sub__(self, other) -> Union["Series", "Index"]: return column_op(Column.__sub__)(self, other) def __mul__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + other = ks.Index(other, name=self.name) # type: ignore if isinstance(other, str): raise TypeError("multiplication can not be applied to a string literal.") @@ -422,6 +428,8 @@ def __truediv__(self, other) -> Union["Series", "Index"]: | -10 | null | -np.inf | +-----------------------|---------|---------+ """ + if isinstance(other, (list, tuple)): + other = ks.Index(other, name=self.name) # type: ignore if ( isinstance(self.spark.data_type, StringType) @@ -440,6 +448,8 @@ def truediv(left, right): return numpy_column_op(truediv)(self, other) def __mod__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + other = ks.Index(other, name=self.name) # type: ignore if ( isinstance(self.spark.data_type, StringType) or (isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, StringType)) @@ -453,6 +463,8 @@ def mod(left, right): return column_op(mod)(self, other) def __radd__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + other = ks.Index(other, name=self.name) # type: ignore # Handle 'literal' + df['col'] if not isinstance(self.spark.data_type, StringType) and isinstance(other, str): raise TypeError("string addition can only be applied to string series or literals.") @@ -466,6 +478,8 @@ def __radd__(self, other) -> Union["Series", "Index"]: return column_op(Column.__radd__)(self, other) def __rsub__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + other = ks.Index(other, name=self.name) # type: ignore if isinstance(self.spark.data_type, StringType) or isinstance(other, str): raise TypeError("substraction can not be applied to string series or literals.") @@ -498,6 +512,8 @@ def __rsub__(self, other) -> Union["Series", "Index"]: return column_op(lambda left, right: right - left)(self, other) def __rmul__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + other = ks.Index(other, name=self.name) # type: ignore if isinstance(other, str): raise TypeError("multiplication can not be applied to a string literal.") @@ -512,6 +528,8 @@ def __rmul__(self, other) -> Union["Series", "Index"]: return column_op(Column.__rmul__)(self, other) def __rtruediv__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + other = ks.Index(other, name=self.name) # type: ignore if isinstance(self.spark.data_type, StringType) or isinstance(other, str): raise TypeError("division can not be applied on string series or literals.") @@ -539,6 +557,8 @@ def __floordiv__(self, other) -> Union["Series", "Index"]: | -10 | null | -np.inf | +-----------------------|---------|---------+ """ + if isinstance(other, (list, tuple)): + other = ks.Index(other, name=self.name) # type: ignore if ( isinstance(self.spark.data_type, StringType) or (isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, StringType)) @@ -560,6 +580,8 @@ def floordiv(left, right): return numpy_column_op(floordiv)(self, other) def __rfloordiv__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + other = ks.Index(other, name=self.name) # type: ignore if isinstance(self.spark.data_type, StringType) or isinstance(other, str): raise TypeError("division can not be applied on string series or literals.") @@ -571,6 +593,8 @@ def rfloordiv(left, right): return numpy_column_op(rfloordiv)(self, other) def __rmod__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + other = ks.Index(other, name=self.name) # type: ignore if isinstance(self.spark.data_type, StringType) or isinstance(other, str): raise TypeError("modulo can not be applied on string series or literals.") @@ -580,12 +604,18 @@ def rmod(left, right): return column_op(rmod)(self, other) def __pow__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + other = ks.Index(other, name=self.name) # type: ignore + def pow_func(left, right): return F.when(left == 1, left).otherwise(Column.__pow__(left, right)) return column_op(pow_func)(self, other) def __rpow__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + other = ks.Index(other, name=self.name) # type: ignore + def rpow_func(left, right): return F.when(F.lit(right == 1), right).otherwise(Column.__rpow__(left, right)) diff --git a/databricks/koalas/indexes/base.py b/databricks/koalas/indexes/base.py index 03911b7487..0f02a1396e 100644 --- a/databricks/koalas/indexes/base.py +++ b/databricks/koalas/indexes/base.py @@ -102,7 +102,7 @@ class Index(IndexOpsMixin): Index(['a', 'b', 'c'], dtype='object') """ - def __new__(cls, data: Union[DataFrame, list], dtype=None, name=None, names=None): + def __new__(cls, data: Union[DataFrame, list, tuple], dtype=None, name=None, names=None): from databricks.koalas.indexes.datetimes import DatetimeIndex from databricks.koalas.indexes.multi import MultiIndex from databricks.koalas.indexes.numeric import Float64Index, Int64Index diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py index 21ba0cd3c1..93b818d2f1 100644 --- a/databricks/koalas/series.py +++ b/databricks/koalas/series.py @@ -452,10 +452,6 @@ def spark_type(self): def add(self, other) -> "Series": return self + other - def __add__(self, other): - other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other - return IndexOpsMixin.__add__(self, other) - add.__doc__ = _flex_doc_SERIES.format( desc="Addition", op_name="+", @@ -467,10 +463,6 @@ def __add__(self, other): def radd(self, other) -> "Series": return other + self - def __radd__(self, other): - other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other - return IndexOpsMixin.__radd__(self, other) - radd.__doc__ = _flex_doc_SERIES.format( desc="Reverse Addition", op_name="+", @@ -506,10 +498,6 @@ def rdiv(self, other) -> "Series": def truediv(self, other) -> "Series": return self / other - def __truediv__(self, other): - other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other - return IndexOpsMixin.__truediv__(self, other) - truediv.__doc__ = _flex_doc_SERIES.format( desc="Floating division", op_name="/", @@ -521,10 +509,6 @@ def __truediv__(self, other): def rtruediv(self, other) -> "Series": return other / self - def __rtruediv__(self, other): - other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other - return IndexOpsMixin.__rtruediv__(self, other) - rtruediv.__doc__ = _flex_doc_SERIES.format( desc="Reverse Floating division", op_name="/", @@ -536,10 +520,6 @@ def __rtruediv__(self, other): def mul(self, other) -> "Series": return self * other - def __mul__(self, other): - other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other - return IndexOpsMixin.__mul__(self, other) - mul.__doc__ = _flex_doc_SERIES.format( desc="Multiplication", op_name="*", @@ -553,10 +533,6 @@ def __mul__(self, other): def rmul(self, other) -> "Series": return other * self - def __rmul__(self, other): - other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other - return IndexOpsMixin.__rmul__(self, other) - rmul.__doc__ = _flex_doc_SERIES.format( desc="Reverse Multiplication", op_name="*", @@ -568,10 +544,6 @@ def __rmul__(self, other): def sub(self, other) -> "Series": return self - other - def __sub__(self, other): - other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other - return IndexOpsMixin.__sub__(self, other) - sub.__doc__ = _flex_doc_SERIES.format( desc="Subtraction", op_name="-", @@ -585,10 +557,6 @@ def __sub__(self, other): def rsub(self, other) -> "Series": return other - self - def __rsub__(self, other): - other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other - return IndexOpsMixin.__rsub__(self, other) - rsub.__doc__ = _flex_doc_SERIES.format( desc="Reverse Subtraction", op_name="-", @@ -600,10 +568,6 @@ def __rsub__(self, other): def mod(self, other) -> "Series": return self % other - def __mod__(self, other): - other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other - return IndexOpsMixin.__mod__(self, other) - mod.__doc__ = _flex_doc_SERIES.format( desc="Modulo", op_name="%", @@ -615,10 +579,6 @@ def __mod__(self, other): def rmod(self, other) -> "Series": return other % self - def __rmod__(self, other): - other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other - return IndexOpsMixin.__rmod__(self, other) - rmod.__doc__ = _flex_doc_SERIES.format( desc="Reverse Modulo", op_name="%", @@ -630,10 +590,6 @@ def __rmod__(self, other): def pow(self, other) -> "Series": return self ** other - def __pow__(self, other): - other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other - return IndexOpsMixin.__pow__(self, other) - pow.__doc__ = _flex_doc_SERIES.format( desc="Exponential power of series", op_name="**", @@ -645,10 +601,6 @@ def __pow__(self, other): def rpow(self, other) -> "Series": return other ** self - def __rpow__(self, other): - other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other - return IndexOpsMixin.__rpow__(self, other) - rpow.__doc__ = _flex_doc_SERIES.format( desc="Reverse Exponential power", op_name="**", @@ -660,10 +612,6 @@ def __rpow__(self, other): def floordiv(self, other) -> "Series": return self // other - def __floordiv__(self, other): - other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other - return IndexOpsMixin.__floordiv__(self, other) - floordiv.__doc__ = _flex_doc_SERIES.format( desc="Integer division", op_name="//", @@ -675,10 +623,6 @@ def __floordiv__(self, other): def rfloordiv(self, other) -> "Series": return other // self - def __rfloordiv__(self, other): - other = ks.Index(other, name=self.name) if isinstance(other, (list, tuple)) else other - return IndexOpsMixin.__rfloordiv__(self, other) - rfloordiv.__doc__ = _flex_doc_SERIES.format( desc="Reverse Integer division", op_name="//", diff --git a/databricks/koalas/tests/test_ops_on_diff_frames.py b/databricks/koalas/tests/test_ops_on_diff_frames.py index 34c4f0b1e6..410dd5269c 100644 --- a/databricks/koalas/tests/test_ops_on_diff_frames.py +++ b/databricks/koalas/tests/test_ops_on_diff_frames.py @@ -2117,6 +2117,471 @@ def test_series_floordiv_and_rfloordiv(self): ): other // kser + def test_index_add_and_radd(self): + pidx = pd.Index([1, 2, 3, 4, 5, 6], name="x") + kidx = ks.from_pandas(pidx) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx + pandas_other, (kidx + koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx + pandas_other, kidx + koalas_other) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx + other, kidx + other) + self.assert_eq(other + pidx, other + kidx) + else: + expected_result = ks.Index( + [np.nan, 3.0, 6.0, 8.0, np.nan, 12.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx + other) + self.assert_eq(expected_result, other + kidx) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx + other, kidx + other) + self.assert_eq(other + pidx, other + kidx) + else: + expected_result = ks.Index( + [np.nan, 3.0, 6.0, 8.0, np.nan, 12.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx + other) + self.assert_eq(expected_result, other + kidx) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx + other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other + kidx + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx + other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other + kidx + + def test_index_sub_and_rsub(self): + pidx = pd.Index([1, 2, 3, 4, 5, 6], name="x") + kidx = ks.from_pandas(pidx) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx - pandas_other, (kidx - koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx - pandas_other, kidx - koalas_other) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx - other, kidx - other) + self.assert_eq(other - pidx, other - kidx) + else: + expected_result = ks.Index( + [np.nan, 1.0, 0.0, 0.0, np.nan, 0.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx - other) + expected_result = ks.Index( + [np.nan, -1.0, 0.0, 0.0, np.nan, 0.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, other - kidx) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx - other, kidx - other) + self.assert_eq(other - pidx, other - kidx) + else: + expected_result = ks.Index( + [np.nan, 1.0, 0.0, 0.0, np.nan, 0.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx - other) + expected_result = ks.Index( + [np.nan, -1.0, 0.0, 0.0, np.nan, 0.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, other - kidx) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx - other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other - kidx + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx - other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other - kidx + + def test_index_mul_and_rmul(self): + pidx = pd.Index([1, 2, 3, 4, 5, 6], name="x") + kidx = ks.from_pandas(pidx) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx * pandas_other, (kidx * koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx * pandas_other, kidx * koalas_other) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx * other, kidx * other) + self.assert_eq(other * pidx, other * kidx) + else: + expected_result = ks.Index( + [np.nan, 2.0, 9.0, 16.0, np.nan, 36.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx * other) + self.assert_eq(expected_result, other * kidx) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx * other, kidx * other) + self.assert_eq(other * pidx, other * kidx) + else: + expected_result = ks.Index( + [np.nan, 2.0, 9.0, 16.0, np.nan, 36.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx * other) + self.assert_eq(expected_result, other * kidx) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx * other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other * kidx + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx * other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other * kidx + + def test_index_pow_and_rpow(self): + pidx = pd.Index([1, 2, 3, 4, 5, 6], name="x") + kidx = ks.from_pandas(pidx) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx ** pandas_other, (kidx ** koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx ** pandas_other, kidx ** koalas_other) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx ** other, kidx ** other) + self.assert_eq(other ** pidx, other ** kidx) + else: + expected_result = ks.Index( + [1.0, 2.0, 27.0, 256.0, np.nan, 46656.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx ** other) + expected_result = ks.Index( + [np.nan, 1.0, 27.0, 256.0, np.nan, 46656.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, other ** kidx) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx ** other, kidx ** other) + self.assert_eq(other ** pidx, other ** kidx) + else: + expected_result = ks.Index( + [1.0, 2.0, 27.0, 256.0, np.nan, 46656.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx ** other) + expected_result = ks.Index( + [np.nan, 1.0, 27.0, 256.0, np.nan, 46656.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, other ** kidx) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx ** other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other ** kidx + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx ** other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other ** kidx + + def test_index_mod_and_rmod(self): + pidx = pd.Index([1, 2, 3, 4, 5, 6], name="x") + kidx = ks.from_pandas(pidx) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx % pandas_other, (kidx % koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx % pandas_other, kidx % koalas_other) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx % other, kidx % other) + self.assert_eq(other % pidx, other % kidx) + else: + expected_result = ks.Index( + [np.nan, 0.0, 0.0, 0.0, np.nan, 0.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx % other) + expected_result = ks.Index( + [np.nan, 1.0, 0.0, 0.0, np.nan, 0.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, other % kidx) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx % other, kidx % other) + self.assert_eq(other % pidx, other % kidx) + else: + expected_result = ks.Index( + [np.nan, 0.0, 0.0, 0.0, np.nan, 0.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx % other) + expected_result = ks.Index( + [np.nan, 1.0, 0.0, 0.0, np.nan, 0.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, other % kidx) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx % other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other % kidx + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx % other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other % kidx + + def test_index_div_and_rdiv(self): + pidx = pd.Index([1, 2, 3, 4, 5, 6], name="x") + kidx = ks.from_pandas(pidx) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx / pandas_other, (kidx / koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx / pandas_other, kidx / koalas_other) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx / other, kidx / other) + self.assert_eq(other / pidx, other / kidx) + else: + expected_result = ks.Index( + [np.nan, 2.0, 1.0, 1.0, np.nan, 1.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx / other) + expected_result = ks.Index( + [np.nan, 0.5, 1.0, 1.0, np.nan, 1.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, other / kidx) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx / other, kidx / other) + self.assert_eq(other / pidx, other / kidx) + else: + expected_result = ks.Index( + [np.nan, 2.0, 1.0, 1.0, np.nan, 1.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx / other) + expected_result = ks.Index( + [np.nan, 0.5, 1.0, 1.0, np.nan, 1.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, other / kidx) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx / other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other / kidx + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx / other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other / kidx + + def test_index_floordiv_and_rfloordiv(self): + pidx = pd.Index([1, 2, 3, 4, 5, 6], name="x") + kidx = ks.from_pandas(pidx) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx // pandas_other, (kidx // koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx // pandas_other, kidx // koalas_other) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx // other, kidx // other) + self.assert_eq(other // pidx, other // kidx) + else: + expected_result = ks.Index( + [np.nan, 2.0, 1.0, 1.0, np.nan, 1.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx // other) + expected_result = ks.Index( + [np.nan, 0.0, 1.0, 1.0, np.nan, 1.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, other // kidx) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx // other, kidx // other) + self.assert_eq(other // pidx, other // kidx) + else: + expected_result = ks.Index( + [np.nan, 2.0, 1.0, 1.0, np.nan, 1.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx // other) + expected_result = ks.Index( + [np.nan, 0.0, 1.0, 1.0, np.nan, 1.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, other // kidx) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx // other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other // kidx + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx // other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other // kidx + class OpsOnDiffFramesDisabledTest(ReusedSQLTestCase, SQLTestUtils): @classmethod @@ -2371,3 +2836,56 @@ def test_series_binary_operators(self): kser.rfloordiv(other) with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): other // kser + + def test_index_binary_operators(self): + pidx = pd.Index([1, 2, 3, 4, 5, 6], name="x") + kidx = ks.from_pandas(pidx) + + others = ( + ks.Series([np.nan, 1, 3, 4, np.nan, 6], name="x"), + ks.Index([np.nan, 1, 3, 4, np.nan, 6], name="x"), + [np.nan, 1, 3, 4, np.nan, 6], + (np.nan, 1, 3, 4, np.nan, 6), + ) + # `add` and `radd` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kidx + other + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other + kidx + # `rub` and `rsub` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kidx - other + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other - kidx + # `mul` and `rmul` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kidx * other + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other * kidx + # `pow` and `rpow` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kidx ** other + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other ** kidx + # `mod` and `rmod` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kidx % other + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other % kidx + # `div` and `rdiv` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kidx / other + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other / kidx + # `floordiv` and `rfloordiv` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kidx // other + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other // kidx From 8df4ff9b71a11625f6607e1ce09ac8a291fd10ec Mon Sep 17 00:00:00 2001 From: itholic Date: Thu, 18 Feb 2021 16:21:54 +0900 Subject: [PATCH 4/6] Fix test for mod and rmod --- databricks/koalas/tests/test_ops_on_diff_frames.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/databricks/koalas/tests/test_ops_on_diff_frames.py b/databricks/koalas/tests/test_ops_on_diff_frames.py index 410dd5269c..4c90fbb76e 100644 --- a/databricks/koalas/tests/test_ops_on_diff_frames.py +++ b/databricks/koalas/tests/test_ops_on_diff_frames.py @@ -2396,7 +2396,7 @@ def test_index_mod_and_rmod(self): # other = list other = [np.nan, 1, 3, 4, np.nan, 6] - if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): self.assert_eq(pidx % other, kidx % other) self.assert_eq(other % pidx, other % kidx) else: @@ -2411,7 +2411,7 @@ def test_index_mod_and_rmod(self): # other = tuple other = (np.nan, 1, 3, 4, np.nan, 6) - if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): self.assert_eq(pidx % other, kidx % other) self.assert_eq(other % pidx, other % kidx) else: From 5d2d1c5c33c748d4357c2e4b3bc717877740f183 Mon Sep 17 00:00:00 2001 From: itholic Date: Thu, 18 Feb 2021 18:56:02 +0900 Subject: [PATCH 5/6] Use pandas --- databricks/koalas/base.py | 43 +++++++++++++++++++++++++++----------- databricks/koalas/utils.py | 28 +++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 12 deletions(-) diff --git a/databricks/koalas/base.py b/databricks/koalas/base.py index f24d85dcf5..e63cf5ac29 100644 --- a/databricks/koalas/base.py +++ b/databricks/koalas/base.py @@ -58,6 +58,7 @@ scol_for, validate_axis, ERROR_MESSAGE_CANNOT_COMBINE, + check_same_length, ) from databricks.koalas.frame import DataFrame @@ -322,7 +323,8 @@ def spark_column(self) -> Column: def __add__(self, other) -> Union["Series", "Index"]: if isinstance(other, (list, tuple)): - other = ks.Index(other, name=self.name) # type: ignore + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(pindex_ops + other) # type: ignore if not isinstance(self.spark.data_type, StringType) and ( (isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, StringType)) or isinstance(other, str) @@ -342,7 +344,8 @@ def __add__(self, other) -> Union["Series", "Index"]: def __sub__(self, other) -> Union["Series", "Index"]: if isinstance(other, (list, tuple)): - other = ks.Index(other, name=self.name) # type: ignore + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(pindex_ops - other) # type: ignore if ( isinstance(self.spark.data_type, StringType) or (isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, StringType)) @@ -388,7 +391,8 @@ def __sub__(self, other) -> Union["Series", "Index"]: def __mul__(self, other) -> Union["Series", "Index"]: if isinstance(other, (list, tuple)): - other = ks.Index(other, name=self.name) # type: ignore + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(pindex_ops * other) # type: ignore if isinstance(other, str): raise TypeError("multiplication can not be applied to a string literal.") @@ -429,7 +433,8 @@ def __truediv__(self, other) -> Union["Series", "Index"]: +-----------------------|---------|---------+ """ if isinstance(other, (list, tuple)): - other = ks.Index(other, name=self.name) # type: ignore + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(pindex_ops / other) # type: ignore if ( isinstance(self.spark.data_type, StringType) @@ -449,7 +454,8 @@ def truediv(left, right): def __mod__(self, other) -> Union["Series", "Index"]: if isinstance(other, (list, tuple)): - other = ks.Index(other, name=self.name) # type: ignore + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(pindex_ops % other) # type: ignore if ( isinstance(self.spark.data_type, StringType) or (isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, StringType)) @@ -463,6 +469,9 @@ def mod(left, right): return column_op(mod)(self, other) def __radd__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(other + pindex_ops) # type: ignore if isinstance(other, (list, tuple)): other = ks.Index(other, name=self.name) # type: ignore # Handle 'literal' + df['col'] @@ -479,7 +488,8 @@ def __radd__(self, other) -> Union["Series", "Index"]: def __rsub__(self, other) -> Union["Series", "Index"]: if isinstance(other, (list, tuple)): - other = ks.Index(other, name=self.name) # type: ignore + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(other - pindex_ops) # type: ignore if isinstance(self.spark.data_type, StringType) or isinstance(other, str): raise TypeError("substraction can not be applied to string series or literals.") @@ -513,7 +523,8 @@ def __rsub__(self, other) -> Union["Series", "Index"]: def __rmul__(self, other) -> Union["Series", "Index"]: if isinstance(other, (list, tuple)): - other = ks.Index(other, name=self.name) # type: ignore + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(other * pindex_ops) # type: ignore if isinstance(other, str): raise TypeError("multiplication can not be applied to a string literal.") @@ -529,7 +540,8 @@ def __rmul__(self, other) -> Union["Series", "Index"]: def __rtruediv__(self, other) -> Union["Series", "Index"]: if isinstance(other, (list, tuple)): - other = ks.Index(other, name=self.name) # type: ignore + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(other / pindex_ops) # type: ignore if isinstance(self.spark.data_type, StringType) or isinstance(other, str): raise TypeError("division can not be applied on string series or literals.") @@ -558,7 +570,8 @@ def __floordiv__(self, other) -> Union["Series", "Index"]: +-----------------------|---------|---------+ """ if isinstance(other, (list, tuple)): - other = ks.Index(other, name=self.name) # type: ignore + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(pindex_ops // other) # type: ignore if ( isinstance(self.spark.data_type, StringType) or (isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, StringType)) @@ -580,6 +593,9 @@ def floordiv(left, right): return numpy_column_op(floordiv)(self, other) def __rfloordiv__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(other // pindex_ops) # type: ignore if isinstance(other, (list, tuple)): other = ks.Index(other, name=self.name) # type: ignore if isinstance(self.spark.data_type, StringType) or isinstance(other, str): @@ -594,7 +610,8 @@ def rfloordiv(left, right): def __rmod__(self, other) -> Union["Series", "Index"]: if isinstance(other, (list, tuple)): - other = ks.Index(other, name=self.name) # type: ignore + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(other % pindex_ops) # type: ignore if isinstance(self.spark.data_type, StringType) or isinstance(other, str): raise TypeError("modulo can not be applied on string series or literals.") @@ -605,7 +622,8 @@ def rmod(left, right): def __pow__(self, other) -> Union["Series", "Index"]: if isinstance(other, (list, tuple)): - other = ks.Index(other, name=self.name) # type: ignore + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(pindex_ops ** other) # type: ignore def pow_func(left, right): return F.when(left == 1, left).otherwise(Column.__pow__(left, right)) @@ -614,7 +632,8 @@ def pow_func(left, right): def __rpow__(self, other) -> Union["Series", "Index"]: if isinstance(other, (list, tuple)): - other = ks.Index(other, name=self.name) # type: ignore + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(other ** pindex_ops) # type: ignore def rpow_func(left, right): return F.when(F.lit(right == 1), right).otherwise(Column.__rpow__(left, right)) diff --git a/databricks/koalas/utils.py b/databricks/koalas/utils.py index 293b3c7a60..4f4b981c10 100644 --- a/databricks/koalas/utils.py +++ b/databricks/koalas/utils.py @@ -813,3 +813,31 @@ def compare_disallow_null(left, right, comp): def compare_allow_null(left, right, comp): return left.isNull() | right.isNull() | comp(left, right) + + +def check_same_length(left: "IndexOpsMixin", right: Union[list, tuple]): + """ + Check if given `left` and `right` have the same length. + If True, return the converted pandas object and `right`. + This function is used for binary operations of Series and Index. + """ + with ks.option_context("compute.ordered_head", True): + len_right = len(right) + if isinstance(left, ks.Series): + pindex_ops = left.head(len_right + 1)._to_internal_pandas() + elif isinstance(left, ks.Index): + pindex_ops = left._kdf.head(len_right + 1).index._to_internal_pandas() + # pandas < 1.2.0 doesn't fully support binary operations with list or tuple for Index. + # So, we convert list or tuple to the Index for this case. + if LooseVersion(pd.__version__) < LooseVersion("1.2.0"): + right = pd.Index(right, name=pindex_ops.name) + else: + raise TypeError("check_same_length allows only Series or Index") + len_pindex_ops = len(pindex_ops) + if len_pindex_ops != len_right: + raise ValueError( + "operands could not be broadcast together with shapes ({},) ({},)".format( + len_pindex_ops, len_right + ) + ) + return pindex_ops, right From 0fd3666ae9537661e4402168c06d5baa6efffc8b Mon Sep 17 00:00:00 2001 From: itholic Date: Thu, 18 Feb 2021 21:03:38 +0900 Subject: [PATCH 6/6] Fix test --- databricks/koalas/tests/test_ops_on_diff_frames.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/databricks/koalas/tests/test_ops_on_diff_frames.py b/databricks/koalas/tests/test_ops_on_diff_frames.py index 4c90fbb76e..06493de08a 100644 --- a/databricks/koalas/tests/test_ops_on_diff_frames.py +++ b/databricks/koalas/tests/test_ops_on_diff_frames.py @@ -2756,8 +2756,6 @@ def test_series_binary_operators(self): others = ( ks.Series([np.nan, 1, 3, 4, np.nan, 6], name="x"), ks.Index([np.nan, 1, 3, 4, np.nan, 6], name="x"), - [np.nan, 1, 3, 4, np.nan, 6], - (np.nan, 1, 3, 4, np.nan, 6), ) # `add` and `radd` for other in others: @@ -2844,8 +2842,6 @@ def test_index_binary_operators(self): others = ( ks.Series([np.nan, 1, 3, 4, np.nan, 6], name="x"), ks.Index([np.nan, 1, 3, 4, np.nan, 6], name="x"), - [np.nan, 1, 3, 4, np.nan, 6], - (np.nan, 1, 3, 4, np.nan, 6), ) # `add` and `radd` for other in others: