Ensure isinstance checks use ABC classes where available (original) (raw)

Here's a quick little script I wrote to check for things like isinstance(obj, MultiIndex)

import ast import pathlib

Taken from pandas.core.dtypes.generic

BAD_NAMES = {'Index', 'Int64Index', 'UInt64Index', 'RangeIndex', 'Float64Index', 'MultiIndex', 'DatetimeIndex', 'TimedeltaIndex', 'PeriodIndex', 'CategoricalIndex', 'IntervalIndex', 'IndexClass', 'Series', 'DataFrame', 'SparseDataFrame', 'SparseSeries', 'SparseArray', 'Categorical', 'DatetimeArray', 'TimedeltaArray', 'PeriodArray', 'Period', 'DateOffset', 'Interval', 'ExtensionArray', 'PandasArray'}

def check_name_or_attr(filename, node): if isinstance(node, ast.Name): # direct name if node.id in BAD_NAMES: print(f"{filename}:{node.lineno} isinstance check of {node.id}") elif isinstance(node, ast.Attribute): # ex: pd.DataFrame if node.attr in BAD_NAMES: print(f"{filename}:{node.lineno} isinstance check of {node.attr}")

for path in pathlib.Path('.').glob('**/*.py'): filename = path.resolve()

with open(path) as fh:
    contents = fh.read()
    tree = ast.parse(contents)

    for node in ast.walk(tree):
        try:
            isinstance_check = node.value.func.id == 'isinstance'
        except AttributeError:
            continue

        if isinstance_check:
            types = node.value.args[1]

            if isinstance(types, ast.Tuple):
                for elt in types.elts:
                    check_name_or_attr(filename, elt)
            else:
                check_name_or_attr(filename, types)

Running that from project root yields all of the following errors:

/Users/williamayd/clones/pandas/pandas/core/frame.py:4681 isinstance check of MultiIndex /Users/williamayd/clones/pandas/pandas/core/indexing.py:2499 isinstance check of MultiIndex /Users/williamayd/clones/pandas/pandas/core/indexing.py:564 isinstance check of MultiIndex /Users/williamayd/clones/pandas/pandas/core/reshape/tile.py:540 isinstance check of Series /Users/williamayd/clones/pandas/pandas/core/reshape/concat.py:373 isinstance check of DataFrame /Users/williamayd/clones/pandas/pandas/core/reshape/concat.py:377 isinstance check of Series /Users/williamayd/clones/pandas/pandas/core/computation/align.py:72 isinstance check of Series /Users/williamayd/clones/pandas/pandas/core/arrays/period.py:931 isinstance check of Period /Users/williamayd/clones/pandas/pandas/core/arrays/period.py:932 isinstance check of Period /Users/williamayd/clones/pandas/pandas/core/indexes/base.py:3626 isinstance check of MultiIndex /Users/williamayd/clones/pandas/pandas/core/indexes/base.py:3627 isinstance check of MultiIndex /Users/williamayd/clones/pandas/pandas/io/formats/excel.py:453 isinstance check of Index /Users/williamayd/clones/pandas/pandas/io/formats/excel.py:501 isinstance check of Index /Users/williamayd/clones/pandas/pandas/io/formats/excel.py:552 isinstance check of Index /Users/williamayd/clones/pandas/pandas/io/formats/excel.py:592 isinstance check of Index

So I think these need to be swapped out for their ABC equivalents