159 lines
6.3 KiB
Python
159 lines
6.3 KiB
Python
from decimal import Decimal
|
|
|
|
import numpy as np
|
|
import pytest
|
|
|
|
from pandas._libs.missing import is_matching_na
|
|
|
|
from pandas import Index
|
|
import pandas._testing as tm
|
|
|
|
|
|
class TestGetIndexer:
|
|
@pytest.mark.parametrize(
|
|
"method,expected",
|
|
[
|
|
("pad", np.array([-1, 0, 1, 1], dtype=np.intp)),
|
|
("backfill", np.array([0, 0, 1, -1], dtype=np.intp)),
|
|
],
|
|
)
|
|
def test_get_indexer_strings(self, method, expected):
|
|
expected = np.array(expected, dtype=np.intp)
|
|
index = Index(["b", "c"], dtype=object)
|
|
actual = index.get_indexer(["a", "b", "c", "d"], method=method)
|
|
|
|
tm.assert_numpy_array_equal(actual, expected)
|
|
|
|
def test_get_indexer_strings_raises(self):
|
|
index = Index(["b", "c"], dtype=object)
|
|
|
|
msg = "|".join(
|
|
[
|
|
"operation 'sub' not supported for dtype 'str'",
|
|
r"unsupported operand type\(s\) for -: 'str' and 'str'",
|
|
]
|
|
)
|
|
with pytest.raises(TypeError, match=msg):
|
|
index.get_indexer(["a", "b", "c", "d"], method="nearest")
|
|
|
|
with pytest.raises(TypeError, match=msg):
|
|
index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2)
|
|
|
|
with pytest.raises(TypeError, match=msg):
|
|
index.get_indexer(
|
|
["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
|
|
)
|
|
|
|
def test_get_indexer_with_NA_values(
|
|
self, unique_nulls_fixture, unique_nulls_fixture2
|
|
):
|
|
# GH#22332
|
|
# check pairwise, that no pair of na values
|
|
# is mangled
|
|
if unique_nulls_fixture is unique_nulls_fixture2:
|
|
return # skip it, values are not unique
|
|
arr = np.array([unique_nulls_fixture, unique_nulls_fixture2], dtype=object)
|
|
index = Index(arr, dtype=object)
|
|
result = index.get_indexer(
|
|
Index(
|
|
[unique_nulls_fixture, unique_nulls_fixture2, "Unknown"], dtype=object
|
|
)
|
|
)
|
|
expected = np.array([0, 1, -1], dtype=np.intp)
|
|
tm.assert_numpy_array_equal(result, expected)
|
|
|
|
def test_get_indexer_infer_string_missing_values(self):
|
|
# ensure the passed list is not cast to string but to object so that
|
|
# the None value is matched in the index
|
|
# https://github.com/pandas-dev/pandas/issues/55834
|
|
idx = Index(["a", "b", None], dtype="object")
|
|
result = idx.get_indexer([None, "x"])
|
|
expected = np.array([2, -1], dtype=np.intp)
|
|
tm.assert_numpy_array_equal(result, expected)
|
|
|
|
|
|
class TestGetIndexerNonUnique:
|
|
def test_get_indexer_non_unique_nas(self, nulls_fixture):
|
|
# even though this isn't non-unique, this should still work
|
|
index = Index(["a", "b", nulls_fixture], dtype=object)
|
|
indexer, missing = index.get_indexer_non_unique([nulls_fixture])
|
|
|
|
expected_indexer = np.array([2], dtype=np.intp)
|
|
expected_missing = np.array([], dtype=np.intp)
|
|
tm.assert_numpy_array_equal(indexer, expected_indexer)
|
|
tm.assert_numpy_array_equal(missing, expected_missing)
|
|
|
|
# actually non-unique
|
|
index = Index(["a", nulls_fixture, "b", nulls_fixture], dtype=object)
|
|
indexer, missing = index.get_indexer_non_unique([nulls_fixture])
|
|
|
|
expected_indexer = np.array([1, 3], dtype=np.intp)
|
|
tm.assert_numpy_array_equal(indexer, expected_indexer)
|
|
tm.assert_numpy_array_equal(missing, expected_missing)
|
|
|
|
# matching-but-not-identical nans
|
|
if is_matching_na(nulls_fixture, float("NaN")):
|
|
index = Index(["a", float("NaN"), "b", float("NaN")], dtype=object)
|
|
match_but_not_identical = True
|
|
elif is_matching_na(nulls_fixture, Decimal("NaN")):
|
|
index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")], dtype=object)
|
|
match_but_not_identical = True
|
|
else:
|
|
match_but_not_identical = False
|
|
|
|
if match_but_not_identical:
|
|
indexer, missing = index.get_indexer_non_unique([nulls_fixture])
|
|
|
|
expected_indexer = np.array([1, 3], dtype=np.intp)
|
|
tm.assert_numpy_array_equal(indexer, expected_indexer)
|
|
tm.assert_numpy_array_equal(missing, expected_missing)
|
|
|
|
@pytest.mark.filterwarnings("ignore:elementwise comp:DeprecationWarning")
|
|
def test_get_indexer_non_unique_np_nats(self, np_nat_fixture, np_nat_fixture2):
|
|
expected_missing = np.array([], dtype=np.intp)
|
|
# matching-but-not-identical nats
|
|
if is_matching_na(np_nat_fixture, np_nat_fixture2):
|
|
# ensure nats are different objects
|
|
index = Index(
|
|
np.array(
|
|
["2021-10-02", np_nat_fixture.copy(), np_nat_fixture2.copy()],
|
|
dtype=object,
|
|
),
|
|
dtype=object,
|
|
)
|
|
# pass as index to prevent target from being casted to DatetimeIndex
|
|
indexer, missing = index.get_indexer_non_unique(
|
|
Index([np_nat_fixture], dtype=object)
|
|
)
|
|
expected_indexer = np.array([1, 2], dtype=np.intp)
|
|
tm.assert_numpy_array_equal(indexer, expected_indexer)
|
|
tm.assert_numpy_array_equal(missing, expected_missing)
|
|
# dt64nat vs td64nat
|
|
else:
|
|
try:
|
|
np_nat_fixture == np_nat_fixture2
|
|
except (TypeError, OverflowError):
|
|
# Numpy will raise on uncomparable types, like
|
|
# np.datetime64('NaT', 'Y') and np.datetime64('NaT', 'ps')
|
|
# https://github.com/numpy/numpy/issues/22762
|
|
return
|
|
index = Index(
|
|
np.array(
|
|
[
|
|
"2021-10-02",
|
|
np_nat_fixture,
|
|
np_nat_fixture2,
|
|
np_nat_fixture,
|
|
np_nat_fixture2,
|
|
],
|
|
dtype=object,
|
|
),
|
|
dtype=object,
|
|
)
|
|
# pass as index to prevent target from being casted to DatetimeIndex
|
|
indexer, missing = index.get_indexer_non_unique(
|
|
Index([np_nat_fixture], dtype=object)
|
|
)
|
|
expected_indexer = np.array([1, 3], dtype=np.intp)
|
|
tm.assert_numpy_array_equal(indexer, expected_indexer)
|
|
tm.assert_numpy_array_equal(missing, expected_missing)
|