Dispatch pd.is_na for scalar extension value · Issue #27825 · pandas-dev/pandas (original) (raw)
Navigation Menu
- Explore
- Pricing
Provide feedback
Saved searches
Use saved searches to filter your results more quickly
Appearance settings
Description
Right now, I don't believe there's a way for an ExtensionDtype to declare a custom scalar NA value and have pd.isna(scalar)
do the right thing.
_nas = object()
class NaSType(str): """ NA for String type. """
# TODO: enforce singleton
def __new__(cls, value):
if value is not _nas:
raise ValueError("Cannot create NaS from '{}'".format(value))
return super().__new__(cls, value)
def __eq__(self, other):
# TODO: array comparisons, etc.
return False
def __str__(self):
return "NaS"
def __repr__(self):
return str(self)
NaS = NaSType(_nas)
@register_extension_dtype class StringDtype(ExtensionDtype):
@property
def na_value(self):
return NaS
@property
def type(self) -> Type:
return str
@property
def name(self) -> str:
return "string"
@classmethod
def construct_from_string(cls, string: str):
if string in {"string", "str"}:
return cls()
return super().construct_from_string(string)
@classmethod
def construct_array_type(cls) -> "Type[StringArray]":
return StringArray
In [18]: NaS Out[18]: NaS
In [19]: pd.isna(NaS) Out[19]: False
That should be True
. In https://github.com/pandas-dev/pandas/blob/master/pandas/core/dtypes/missing.py#L131-L132 we go straight to lib missing.checknull(obj)
for scalar values.