diff --git a/databricks/koalas/base.py b/databricks/koalas/base.py index eb0c431838..e63cf5ac29 100644 --- a/databricks/koalas/base.py +++ b/databricks/koalas/base.py @@ -58,6 +58,7 @@ scol_for, validate_axis, ERROR_MESSAGE_CANNOT_COMBINE, + check_same_length, ) from databricks.koalas.frame import DataFrame @@ -321,6 +322,9 @@ def spark_column(self) -> Column: __neg__ = column_op(Column.__neg__) def __add__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(pindex_ops + other) # type: ignore if not isinstance(self.spark.data_type, StringType) and ( (isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, StringType)) or isinstance(other, str) @@ -339,6 +343,9 @@ def __add__(self, other) -> Union["Series", "Index"]: return column_op(Column.__add__)(self, other) def __sub__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(pindex_ops - other) # type: ignore if ( isinstance(self.spark.data_type, StringType) or (isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, StringType)) @@ -383,6 +390,9 @@ def __sub__(self, other) -> Union["Series", "Index"]: return column_op(Column.__sub__)(self, other) def __mul__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(pindex_ops * other) # type: ignore if isinstance(other, str): raise TypeError("multiplication can not be applied to a string literal.") @@ -422,6 +432,9 @@ def __truediv__(self, other) -> Union["Series", "Index"]: | -10 | null | -np.inf | +-----------------------|---------|---------+ """ + if isinstance(other, (list, tuple)): + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(pindex_ops / other) # type: ignore if ( isinstance(self.spark.data_type, StringType) @@ -440,6 +453,9 @@ def truediv(left, right): return numpy_column_op(truediv)(self, other) def __mod__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(pindex_ops % other) # type: ignore if ( isinstance(self.spark.data_type, StringType) or (isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, StringType)) @@ -453,6 +469,11 @@ def mod(left, right): return column_op(mod)(self, other) def __radd__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(other + pindex_ops) # type: ignore + if isinstance(other, (list, tuple)): + other = ks.Index(other, name=self.name) # type: ignore # Handle 'literal' + df['col'] if not isinstance(self.spark.data_type, StringType) and isinstance(other, str): raise TypeError("string addition can only be applied to string series or literals.") @@ -466,6 +487,9 @@ def __radd__(self, other) -> Union["Series", "Index"]: return column_op(Column.__radd__)(self, other) def __rsub__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(other - pindex_ops) # type: ignore if isinstance(self.spark.data_type, StringType) or isinstance(other, str): raise TypeError("substraction can not be applied to string series or literals.") @@ -495,9 +519,12 @@ def __rsub__(self, other) -> Union["Series", "Index"]: return -column_op(F.datediff)(self, F.lit(other)).astype("long") else: raise TypeError("date subtraction can only be applied to date series.") - return column_op(Column.__rsub__)(self, other) + return column_op(lambda left, right: right - left)(self, other) def __rmul__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(other * pindex_ops) # type: ignore if isinstance(other, str): raise TypeError("multiplication can not be applied to a string literal.") @@ -512,6 +539,9 @@ def __rmul__(self, other) -> Union["Series", "Index"]: return column_op(Column.__rmul__)(self, other) def __rtruediv__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(other / pindex_ops) # type: ignore if isinstance(self.spark.data_type, StringType) or isinstance(other, str): raise TypeError("division can not be applied on string series or literals.") @@ -539,6 +569,9 @@ def __floordiv__(self, other) -> Union["Series", "Index"]: | -10 | null | -np.inf | +-----------------------|---------|---------+ """ + if isinstance(other, (list, tuple)): + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(pindex_ops // other) # type: ignore if ( isinstance(self.spark.data_type, StringType) or (isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, StringType)) @@ -560,6 +593,11 @@ def floordiv(left, right): return numpy_column_op(floordiv)(self, other) def __rfloordiv__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(other // pindex_ops) # type: ignore + if isinstance(other, (list, tuple)): + other = ks.Index(other, name=self.name) # type: ignore if isinstance(self.spark.data_type, StringType) or isinstance(other, str): raise TypeError("division can not be applied on string series or literals.") @@ -571,6 +609,9 @@ def rfloordiv(left, right): return numpy_column_op(rfloordiv)(self, other) def __rmod__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(other % pindex_ops) # type: ignore if isinstance(self.spark.data_type, StringType) or isinstance(other, str): raise TypeError("modulo can not be applied on string series or literals.") @@ -580,12 +621,20 @@ def rmod(left, right): return column_op(rmod)(self, other) def __pow__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(pindex_ops ** other) # type: ignore + def pow_func(left, right): return F.when(left == 1, left).otherwise(Column.__pow__(left, right)) return column_op(pow_func)(self, other) def __rpow__(self, other) -> Union["Series", "Index"]: + if isinstance(other, (list, tuple)): + pindex_ops, other = check_same_length(self, other) + return ks.from_pandas(other ** pindex_ops) # type: ignore + def rpow_func(left, right): return F.when(F.lit(right == 1), right).otherwise(Column.__rpow__(left, right)) diff --git a/databricks/koalas/indexes/base.py b/databricks/koalas/indexes/base.py index 03911b7487..0f02a1396e 100644 --- a/databricks/koalas/indexes/base.py +++ b/databricks/koalas/indexes/base.py @@ -102,7 +102,7 @@ class Index(IndexOpsMixin): Index(['a', 'b', 'c'], dtype='object') """ - def __new__(cls, data: Union[DataFrame, list], dtype=None, name=None, names=None): + def __new__(cls, data: Union[DataFrame, list, tuple], dtype=None, name=None, names=None): from databricks.koalas.indexes.datetimes import DatetimeIndex from databricks.koalas.indexes.multi import MultiIndex from databricks.koalas.indexes.numeric import Float64Index, Int64Index diff --git a/databricks/koalas/tests/test_ops_on_diff_frames.py b/databricks/koalas/tests/test_ops_on_diff_frames.py index 36eacc3d5a..06493de08a 100644 --- a/databricks/koalas/tests/test_ops_on_diff_frames.py +++ b/databricks/koalas/tests/test_ops_on_diff_frames.py @@ -1528,6 +1528,1060 @@ def test_pow_and_rpow(self): self.assert_eq(pser ** pser_other, (kser ** kser_other).sort_index()) self.assert_eq(pser.rpow(pser_other), kser.rpow(kser_other).sort_index()) + def test_series_add_and_radd(self): + pser = pd.Series([1, 2, 3, 4, 5, 6], name="x") + kser = ks.from_pandas(pser) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.add(pandas_other), kser.add(koalas_other).sort_index()) + self.assert_eq(pser + pandas_other, (kser + koalas_other).sort_index()) + self.assert_eq(pser.radd(pandas_other), kser.radd(koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.add(pandas_other), kser.add(koalas_other).sort_index()) + self.assert_eq(pser + pandas_other, (kser + koalas_other).sort_index()) + self.assert_eq(pser.radd(pandas_other), kser.radd(koalas_other).sort_index()) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.add(other), kser.add(other).sort_index()) + self.assert_eq(pser + other, (kser + other).sort_index()) + self.assert_eq(pser.radd(other), kser.radd(other).sort_index()) + self.assert_eq(other + pser, (other + kser).sort_index()) + else: + self.assert_eq(pser.add(other).rename("x"), kser.add(other).sort_index()) + self.assert_eq((pser + other).rename("x"), (kser + other).sort_index()) + self.assert_eq(pser.radd(other).rename("x"), kser.radd(other).sort_index()) + self.assert_eq((other + pser).rename("x"), (other + kser).sort_index()) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.add(other), kser.add(other).sort_index()) + self.assert_eq(pser + other, (kser + other).sort_index()) + self.assert_eq(pser.radd(other), kser.radd(other).sort_index()) + self.assert_eq(other + pser, (other + kser).sort_index()) + else: + self.assert_eq(pser.add(other).rename("x"), kser.add(other).sort_index()) + self.assert_eq((pser + other).rename("x"), (kser + other).sort_index()) + self.assert_eq(pser.radd(other).rename("x"), kser.radd(other).sort_index()) + self.assert_eq((other + pser).rename("x"), (other + kser).sort_index()) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.add(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser + other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.radd(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other + kser + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.add(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser + other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.radd(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other + kser + + def test_series_sub_and_rsub(self): + pser = pd.Series([1, 2, 3, 4, 5, 6], name="x") + kser = ks.from_pandas(pser) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.sub(pandas_other), kser.sub(koalas_other).sort_index()) + self.assert_eq(pser - pandas_other, (kser - koalas_other).sort_index()) + self.assert_eq(pser.rsub(pandas_other), kser.rsub(koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.sub(pandas_other), kser.sub(koalas_other).sort_index()) + self.assert_eq(pser - pandas_other, (kser - koalas_other).sort_index()) + self.assert_eq(pser.rsub(pandas_other), kser.rsub(koalas_other).sort_index()) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.sub(other), kser.sub(other).sort_index()) + self.assert_eq(pser - other, (kser - other).sort_index()) + self.assert_eq(pser.rsub(other), kser.rsub(other).sort_index()) + self.assert_eq(other - pser, (other - kser).sort_index()) + else: + self.assert_eq(pser.sub(other).rename("x"), kser.sub(other).sort_index()) + self.assert_eq((pser - other).rename("x"), (kser - other).sort_index()) + self.assert_eq(pser.rsub(other).rename("x"), kser.rsub(other).sort_index()) + self.assert_eq((other - pser).rename("x"), (other - kser).sort_index()) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.sub(other), kser.sub(other).sort_index()) + self.assert_eq(pser - other, (kser - other).sort_index()) + self.assert_eq(pser.rsub(other), kser.rsub(other).sort_index()) + self.assert_eq(other - pser, (other - kser).sort_index()) + else: + self.assert_eq(pser.sub(other).rename("x"), kser.sub(other).sort_index()) + self.assert_eq((pser - other).rename("x"), (kser - other).sort_index()) + self.assert_eq(pser.rsub(other).rename("x"), kser.rsub(other).sort_index()) + self.assert_eq((other - pser).rename("x"), (other - kser).sort_index()) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.sub(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser - other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.rsub(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other - kser + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.sub(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser - other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.rsub(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other - kser + + def test_series_mul_and_rmul(self): + pser = pd.Series([1, 2, 3, 4, 5, 6], name="x") + kser = ks.from_pandas(pser) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.mul(pandas_other), kser.mul(koalas_other).sort_index()) + self.assert_eq(pser * pandas_other, (kser * koalas_other).sort_index()) + self.assert_eq(pser.rmul(pandas_other), kser.rmul(koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.mul(pandas_other), kser.mul(koalas_other).sort_index()) + self.assert_eq(pser * pandas_other, (kser * koalas_other).sort_index()) + self.assert_eq(pser.rmul(pandas_other), kser.rmul(koalas_other).sort_index()) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.mul(other), kser.mul(other).sort_index()) + self.assert_eq(pser * other, (kser * other).sort_index()) + self.assert_eq(pser.rmul(other), kser.rmul(other).sort_index()) + self.assert_eq(other * pser, (other * kser).sort_index()) + else: + self.assert_eq(pser.mul(other).rename("x"), kser.mul(other).sort_index()) + self.assert_eq((pser * other).rename("x"), (kser * other).sort_index()) + self.assert_eq(pser.rmul(other).rename("x"), kser.rmul(other).sort_index()) + self.assert_eq((other * pser).rename("x"), (other * kser).sort_index()) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.mul(other), kser.mul(other).sort_index()) + self.assert_eq(pser * other, (kser * other).sort_index()) + self.assert_eq(pser.rmul(other), kser.rmul(other).sort_index()) + self.assert_eq(other * pser, (other * kser).sort_index()) + else: + self.assert_eq(pser.mul(other).rename("x"), kser.mul(other).sort_index()) + self.assert_eq((pser * other).rename("x"), (kser * other).sort_index()) + self.assert_eq(pser.rmul(other).rename("x"), kser.rmul(other).sort_index()) + self.assert_eq((other * pser).rename("x"), (other * kser).sort_index()) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.mul(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser * other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.rmul(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other * kser + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.mul(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser * other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.rmul(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other * kser + + def test_series_pow_and_rpow(self): + pser = pd.Series([1, 2, 3, 4, 5, 6], name="x") + kser = ks.from_pandas(pser) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.pow(pandas_other), kser.pow(koalas_other).sort_index()) + self.assert_eq(pser ** pandas_other, (kser ** koalas_other).sort_index()) + self.assert_eq(pser.rpow(pandas_other), kser.rpow(koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.pow(pandas_other), kser.pow(koalas_other).sort_index()) + self.assert_eq(pser ** pandas_other, (kser ** koalas_other).sort_index()) + self.assert_eq(pser.rpow(pandas_other), kser.rpow(koalas_other).sort_index()) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.pow(other), kser.pow(other).sort_index()) + self.assert_eq(pser ** other, (kser ** other).sort_index()) + self.assert_eq(pser.rpow(other), kser.rpow(other).sort_index()) + self.assert_eq(other ** pser, (other ** kser).sort_index()) + else: + self.assert_eq(pser.pow(other).rename("x"), kser.pow(other).sort_index()) + self.assert_eq((pser ** other).rename("x"), (kser ** other).sort_index()) + self.assert_eq(pser.rpow(other).rename("x"), kser.rpow(other).sort_index()) + self.assert_eq((other ** pser).rename("x"), (other ** kser).sort_index()) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.pow(other), kser.pow(other).sort_index()) + self.assert_eq(pser ** other, (kser ** other).sort_index()) + self.assert_eq(pser.rpow(other), kser.rpow(other).sort_index()) + self.assert_eq(other ** pser, (other ** kser).sort_index()) + else: + self.assert_eq(pser.pow(other).rename("x"), kser.pow(other).sort_index()) + self.assert_eq((pser ** other).rename("x"), (kser ** other).sort_index()) + self.assert_eq(pser.rpow(other).rename("x"), kser.rpow(other).sort_index()) + self.assert_eq((other ** pser).rename("x"), (other ** kser).sort_index()) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.pow(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser ** other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.rpow(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other ** kser + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.pow(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser ** other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.rpow(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other ** kser + + def test_series_mod_and_rmod(self): + pser = pd.Series([1, 2, 3, 4, 5, 6], name="x") + kser = ks.from_pandas(pser) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.mod(pandas_other), kser.mod(koalas_other).sort_index()) + self.assert_eq(pser % pandas_other, (kser % koalas_other).sort_index()) + self.assert_eq(pser.rmod(pandas_other), kser.rmod(koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.mod(pandas_other), kser.mod(koalas_other).sort_index()) + self.assert_eq(pser % pandas_other, (kser % koalas_other).sort_index()) + self.assert_eq(pser.rmod(pandas_other), kser.rmod(koalas_other).sort_index()) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.mod(other), kser.mod(other).sort_index()) + self.assert_eq(pser % other, (kser % other).sort_index()) + self.assert_eq(pser.rmod(other), kser.rmod(other).sort_index()) + self.assert_eq(other % pser, (other % kser).sort_index()) + else: + self.assert_eq(pser.mod(other).rename("x"), kser.mod(other).sort_index()) + self.assert_eq((pser % other).rename("x"), (kser % other).sort_index()) + self.assert_eq(pser.rmod(other).rename("x"), kser.rmod(other).sort_index()) + self.assert_eq((other % pser).rename("x"), (other % kser).sort_index()) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.mod(other), kser.mod(other).sort_index()) + self.assert_eq(pser % other, (kser % other).sort_index()) + self.assert_eq(pser.rmod(other), kser.rmod(other).sort_index()) + self.assert_eq(other % pser, (other % kser).sort_index()) + else: + self.assert_eq(pser.mod(other).rename("x"), kser.mod(other).sort_index()) + self.assert_eq((pser % other).rename("x"), (kser % other).sort_index()) + self.assert_eq(pser.rmod(other).rename("x"), kser.rmod(other).sort_index()) + self.assert_eq((other % pser).rename("x"), (other % kser).sort_index()) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.mod(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser % other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.rmod(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other % kser + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.mod(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser % other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.rmod(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other % kser + + def test_series_div_and_rdiv(self): + pser = pd.Series([1, 2, 3, 4, 5, 6], name="x") + kser = ks.from_pandas(pser) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.div(pandas_other), kser.div(koalas_other).sort_index()) + self.assert_eq(pser / pandas_other, (kser / koalas_other).sort_index()) + self.assert_eq(pser.rdiv(pandas_other), kser.rdiv(koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.div(pandas_other), kser.div(koalas_other).sort_index()) + self.assert_eq(pser / pandas_other, (kser / koalas_other).sort_index()) + self.assert_eq(pser.rdiv(pandas_other), kser.rdiv(koalas_other).sort_index()) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.div(other), kser.div(other).sort_index()) + self.assert_eq(pser / other, (kser / other).sort_index()) + self.assert_eq(pser.rdiv(other), kser.rdiv(other).sort_index()) + self.assert_eq(other / pser, (other / kser).sort_index()) + else: + self.assert_eq(pser.div(other).rename("x"), kser.div(other).sort_index()) + self.assert_eq((pser / other).rename("x"), (kser / other).sort_index()) + self.assert_eq(pser.rdiv(other).rename("x"), kser.rdiv(other).sort_index()) + self.assert_eq((other / pser).rename("x"), (other / kser).sort_index()) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.div(other), kser.div(other).sort_index()) + self.assert_eq(pser / other, (kser / other).sort_index()) + self.assert_eq(pser.rdiv(other), kser.rdiv(other).sort_index()) + self.assert_eq(other / pser, (other / kser).sort_index()) + else: + self.assert_eq(pser.div(other).rename("x"), kser.div(other).sort_index()) + self.assert_eq((pser / other).rename("x"), (kser / other).sort_index()) + self.assert_eq(pser.rdiv(other).rename("x"), kser.rdiv(other).sort_index()) + self.assert_eq((other / pser).rename("x"), (other / kser).sort_index()) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.div(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser / other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.rdiv(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other / kser + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.div(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser / other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.rdiv(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other / kser + + def test_series_floordiv_and_rfloordiv(self): + pser = pd.Series([1, 2, 3, 4, 5, 6], name="x") + kser = ks.from_pandas(pser) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.floordiv(pandas_other), kser.floordiv(koalas_other).sort_index()) + self.assert_eq(pser // pandas_other, (kser // koalas_other).sort_index()) + self.assert_eq(pser.rfloordiv(pandas_other), kser.rfloordiv(koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.floordiv(pandas_other), kser.floordiv(koalas_other).sort_index()) + self.assert_eq(pser // pandas_other, (kser // koalas_other).sort_index()) + self.assert_eq(pser.rfloordiv(pandas_other), kser.rfloordiv(koalas_other).sort_index()) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.floordiv(other), kser.floordiv(other).sort_index()) + self.assert_eq(pser // other, (kser // other).sort_index()) + self.assert_eq(pser.rfloordiv(other), kser.rfloordiv(other).sort_index()) + self.assert_eq(other // pser, (other // kser).sort_index()) + else: + self.assert_eq(pser.floordiv(other).rename("x"), kser.floordiv(other).sort_index()) + self.assert_eq((pser // other).rename("x"), (kser // other).sort_index()) + self.assert_eq(pser.rfloordiv(other).rename("x"), kser.rfloordiv(other).sort_index()) + self.assert_eq((other // pser).rename("x"), (other // kser).sort_index()) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.floordiv(other), kser.floordiv(other).sort_index()) + self.assert_eq(pser // other, (kser // other).sort_index()) + self.assert_eq(pser.rfloordiv(other), kser.rfloordiv(other).sort_index()) + self.assert_eq(other // pser, (other // kser).sort_index()) + else: + self.assert_eq(pser.floordiv(other).rename("x"), kser.floordiv(other).sort_index()) + self.assert_eq((pser // other).rename("x"), (kser // other).sort_index()) + self.assert_eq(pser.rfloordiv(other).rename("x"), kser.rfloordiv(other).sort_index()) + self.assert_eq((other // pser).rename("x"), (other // kser).sort_index()) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.floordiv(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser // other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.rfloordiv(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other // kser + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.floordiv(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser // other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.rfloordiv(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other // kser + + def test_index_add_and_radd(self): + pidx = pd.Index([1, 2, 3, 4, 5, 6], name="x") + kidx = ks.from_pandas(pidx) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx + pandas_other, (kidx + koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx + pandas_other, kidx + koalas_other) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx + other, kidx + other) + self.assert_eq(other + pidx, other + kidx) + else: + expected_result = ks.Index( + [np.nan, 3.0, 6.0, 8.0, np.nan, 12.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx + other) + self.assert_eq(expected_result, other + kidx) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx + other, kidx + other) + self.assert_eq(other + pidx, other + kidx) + else: + expected_result = ks.Index( + [np.nan, 3.0, 6.0, 8.0, np.nan, 12.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx + other) + self.assert_eq(expected_result, other + kidx) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx + other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other + kidx + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx + other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other + kidx + + def test_index_sub_and_rsub(self): + pidx = pd.Index([1, 2, 3, 4, 5, 6], name="x") + kidx = ks.from_pandas(pidx) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx - pandas_other, (kidx - koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx - pandas_other, kidx - koalas_other) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx - other, kidx - other) + self.assert_eq(other - pidx, other - kidx) + else: + expected_result = ks.Index( + [np.nan, 1.0, 0.0, 0.0, np.nan, 0.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx - other) + expected_result = ks.Index( + [np.nan, -1.0, 0.0, 0.0, np.nan, 0.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, other - kidx) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx - other, kidx - other) + self.assert_eq(other - pidx, other - kidx) + else: + expected_result = ks.Index( + [np.nan, 1.0, 0.0, 0.0, np.nan, 0.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx - other) + expected_result = ks.Index( + [np.nan, -1.0, 0.0, 0.0, np.nan, 0.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, other - kidx) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx - other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other - kidx + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx - other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other - kidx + + def test_index_mul_and_rmul(self): + pidx = pd.Index([1, 2, 3, 4, 5, 6], name="x") + kidx = ks.from_pandas(pidx) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx * pandas_other, (kidx * koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx * pandas_other, kidx * koalas_other) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx * other, kidx * other) + self.assert_eq(other * pidx, other * kidx) + else: + expected_result = ks.Index( + [np.nan, 2.0, 9.0, 16.0, np.nan, 36.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx * other) + self.assert_eq(expected_result, other * kidx) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx * other, kidx * other) + self.assert_eq(other * pidx, other * kidx) + else: + expected_result = ks.Index( + [np.nan, 2.0, 9.0, 16.0, np.nan, 36.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx * other) + self.assert_eq(expected_result, other * kidx) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx * other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other * kidx + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx * other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other * kidx + + def test_index_pow_and_rpow(self): + pidx = pd.Index([1, 2, 3, 4, 5, 6], name="x") + kidx = ks.from_pandas(pidx) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx ** pandas_other, (kidx ** koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx ** pandas_other, kidx ** koalas_other) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx ** other, kidx ** other) + self.assert_eq(other ** pidx, other ** kidx) + else: + expected_result = ks.Index( + [1.0, 2.0, 27.0, 256.0, np.nan, 46656.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx ** other) + expected_result = ks.Index( + [np.nan, 1.0, 27.0, 256.0, np.nan, 46656.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, other ** kidx) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx ** other, kidx ** other) + self.assert_eq(other ** pidx, other ** kidx) + else: + expected_result = ks.Index( + [1.0, 2.0, 27.0, 256.0, np.nan, 46656.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx ** other) + expected_result = ks.Index( + [np.nan, 1.0, 27.0, 256.0, np.nan, 46656.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, other ** kidx) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx ** other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other ** kidx + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx ** other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other ** kidx + + def test_index_mod_and_rmod(self): + pidx = pd.Index([1, 2, 3, 4, 5, 6], name="x") + kidx = ks.from_pandas(pidx) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx % pandas_other, (kidx % koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx % pandas_other, kidx % koalas_other) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pidx % other, kidx % other) + self.assert_eq(other % pidx, other % kidx) + else: + expected_result = ks.Index( + [np.nan, 0.0, 0.0, 0.0, np.nan, 0.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx % other) + expected_result = ks.Index( + [np.nan, 1.0, 0.0, 0.0, np.nan, 0.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, other % kidx) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pidx % other, kidx % other) + self.assert_eq(other % pidx, other % kidx) + else: + expected_result = ks.Index( + [np.nan, 0.0, 0.0, 0.0, np.nan, 0.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx % other) + expected_result = ks.Index( + [np.nan, 1.0, 0.0, 0.0, np.nan, 0.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, other % kidx) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx % other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other % kidx + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx % other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other % kidx + + def test_index_div_and_rdiv(self): + pidx = pd.Index([1, 2, 3, 4, 5, 6], name="x") + kidx = ks.from_pandas(pidx) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx / pandas_other, (kidx / koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx / pandas_other, kidx / koalas_other) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx / other, kidx / other) + self.assert_eq(other / pidx, other / kidx) + else: + expected_result = ks.Index( + [np.nan, 2.0, 1.0, 1.0, np.nan, 1.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx / other) + expected_result = ks.Index( + [np.nan, 0.5, 1.0, 1.0, np.nan, 1.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, other / kidx) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx / other, kidx / other) + self.assert_eq(other / pidx, other / kidx) + else: + expected_result = ks.Index( + [np.nan, 2.0, 1.0, 1.0, np.nan, 1.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx / other) + expected_result = ks.Index( + [np.nan, 0.5, 1.0, 1.0, np.nan, 1.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, other / kidx) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx / other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other / kidx + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx / other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other / kidx + + def test_index_floordiv_and_rfloordiv(self): + pidx = pd.Index([1, 2, 3, 4, 5, 6], name="x") + kidx = ks.from_pandas(pidx) + + # other = Series + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], name="x", index=[10, 20, 30, 40, 50, 60] + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx // pandas_other, (kidx // koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pidx // pandas_other, kidx // koalas_other) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx // other, kidx // other) + self.assert_eq(other // pidx, other // kidx) + else: + expected_result = ks.Index( + [np.nan, 2.0, 1.0, 1.0, np.nan, 1.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx // other) + expected_result = ks.Index( + [np.nan, 0.0, 1.0, 1.0, np.nan, 1.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, other // kidx) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(pidx // other, kidx // other) + self.assert_eq(other // pidx, other // kidx) + else: + expected_result = ks.Index( + [np.nan, 2.0, 1.0, 1.0, np.nan, 1.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, kidx // other) + expected_result = ks.Index( + [np.nan, 0.0, 1.0, 1.0, np.nan, 1.0], dtype="float64", name="x" + ) + self.assert_eq(expected_result, other // kidx) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx // other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other // kidx + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kidx // other + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + other // kidx + class OpsOnDiffFramesDisabledTest(ReusedSQLTestCase, SQLTestUtils): @classmethod @@ -1694,3 +2748,140 @@ def test_pow_and_rpow(self): kser ** kser_other with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): kser.rpow(kser_other) + + def test_series_binary_operators(self): + pser = pd.Series([1, 2, 3, 4, 5, 6], name="x") + kser = ks.from_pandas(pser) + + others = ( + ks.Series([np.nan, 1, 3, 4, np.nan, 6], name="x"), + ks.Index([np.nan, 1, 3, 4, np.nan, 6], name="x"), + ) + # `add` and `radd` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.add(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser + other + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.radd(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other + kser + # `rub` and `rsub` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.sub(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser - other + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.rsub(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other - kser + # `mul` and `rmul` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.mul(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser * other + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.rmul(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other * kser + # `pow` and `rpow` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.pow(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser ** other + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.rpow(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other ** kser + # `mod` and `rmod` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.mod(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser % other + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.rmod(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other % kser + # `div` and `rdiv` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.div(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser / other + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.rdiv(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other / kser + # `floordiv` and `rfloordiv` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.floordiv(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser // other + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.rfloordiv(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other // kser + + def test_index_binary_operators(self): + pidx = pd.Index([1, 2, 3, 4, 5, 6], name="x") + kidx = ks.from_pandas(pidx) + + others = ( + ks.Series([np.nan, 1, 3, 4, np.nan, 6], name="x"), + ks.Index([np.nan, 1, 3, 4, np.nan, 6], name="x"), + ) + # `add` and `radd` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kidx + other + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other + kidx + # `rub` and `rsub` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kidx - other + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other - kidx + # `mul` and `rmul` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kidx * other + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other * kidx + # `pow` and `rpow` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kidx ** other + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other ** kidx + # `mod` and `rmod` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kidx % other + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other % kidx + # `div` and `rdiv` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kidx / other + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other / kidx + # `floordiv` and `rfloordiv` + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kidx // other + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + other // kidx diff --git a/databricks/koalas/utils.py b/databricks/koalas/utils.py index 293b3c7a60..4f4b981c10 100644 --- a/databricks/koalas/utils.py +++ b/databricks/koalas/utils.py @@ -813,3 +813,31 @@ def compare_disallow_null(left, right, comp): def compare_allow_null(left, right, comp): return left.isNull() | right.isNull() | comp(left, right) + + +def check_same_length(left: "IndexOpsMixin", right: Union[list, tuple]): + """ + Check if given `left` and `right` have the same length. + If True, return the converted pandas object and `right`. + This function is used for binary operations of Series and Index. + """ + with ks.option_context("compute.ordered_head", True): + len_right = len(right) + if isinstance(left, ks.Series): + pindex_ops = left.head(len_right + 1)._to_internal_pandas() + elif isinstance(left, ks.Index): + pindex_ops = left._kdf.head(len_right + 1).index._to_internal_pandas() + # pandas < 1.2.0 doesn't fully support binary operations with list or tuple for Index. + # So, we convert list or tuple to the Index for this case. + if LooseVersion(pd.__version__) < LooseVersion("1.2.0"): + right = pd.Index(right, name=pindex_ops.name) + else: + raise TypeError("check_same_length allows only Series or Index") + len_pindex_ops = len(pindex_ops) + if len_pindex_ops != len_right: + raise ValueError( + "operands could not be broadcast together with shapes ({},) ({},)".format( + len_pindex_ops, len_right + ) + ) + return pindex_ops, right