BUG: ENH: Allow callable for on_bad_lines in read_csv when engine="python" #45146 · Issue #46569 · pandas-dev/pandas (original) (raw)
import pandas as pd
lst = []
def failed_line(bad_line: list[str]) -> list[str]: print("failed line") lst.append(bad_line) return lst
if name == 'main': dtypes = { "CV_CCT": object, "C_NOMBRE": object, "CV_TIPO": float, "C_TIPO": object, "CV_ADMINISTRATIVA": object, "C_ADMINISTRATIVA": object, "CV_ESTATUS": int, "C_ESTATUS": object, "F_FUNDACION": object, "INMUEBLE_CV_INMUEBLE": object, "INMUEBLE_CV_VIALIDAD_PRINCIPAL": int, "INMUEBLE_C_VIALIDAD_PRINCIPAL": object, "INMUEBLE_CV_VIALIDAD_DERECHA": int, "INMUEBLE_C_VIALIDAD_DERECHA": object, "INMUEBLE_CV_VIALIDAD_IZQUIERDA": int, "INMUEBLE_C_VIALIDAD_IZQUIERDA": object, "INMUEBLE_CV_VIALIDAD_POSTERIOR": int, "INMUEBLE_C_VIALIDAD_POSTERIOR": object, "INMUEBLE_N_EXTNUM": int, "INMUEBLE_C_EXTALF": object, "INMUEBLE_N_INTNUM": int, "INMUEBLE_C_INTALF": object, "INMUEBLE_CV_ENT": object, "INMUEBLE_C_NOM_ENT": object, "INMUEBLE_CV_MUN": object, "INMUEBLE_C_NOM_MUN": object, "INMUEBLE_CV_LOC": object, "INMUEBLE_C_NOM_LOC": object, "INMUEBLE_CV_ASEN": object, "INMUEBLE_C_NOM_ASEN": object, "INMUEBLE_CV_CODIGO_POSTAL": object, "INMUEBLE_C_DESC_UBICACION": object, "INMUEBLE_LATITUD": float, "INMUEBLE_LONGITUD": float, "SOSTENIMIENTO_CV_CONTROL": int, "SOSTENIMIENTO_C_CONTROL": object, "SOSTENIMIENTO_CV_SUBCONTROL": int, "SOSTENIMIENTO_C_SUBCONTROL": object, "SOSTENIMIENTO_CV_DEPENDENCIAN1": int, "SOSTENIMIENTO_C_DEPENDENCIAN1": object, "SOSTENIMIENTO_CV_DEPENDENCIAN2": int, "SOSTENIMIENTO_C_DEPENDENCIAN2": object, "SOSTENIMIENTO_CV_DEPENDENCIAN3": int, "SOSTENIMIENTO_C_DEPENDENCIAN3": object, "SOSTENIMIENTO_CV_DEPENDENCIAN4": int, "SOSTENIMIENTO_C_DEPENDENCIAN4": object, "SOSTENIMIENTO_CV_DEPENDENCIAN5": int, "SOSTENIMIENTO_C_DEPENDENCIAN5": object, "SOSTENIMIENTO_CV_SERVICIO": object, "SOSTENIMIENTO_C_SERVICIO": object, "DEPOPERATIVA_CV_DEPENDENCIAN1": int, "DEPOPERATIVA_C_DEPENDENCIAN1": object, "DEPOPERATIVA_CV_DEPENDENCIAN2": int, "DEPOPERATIVA_C_DEPENDENCIAN2": object, "DEPOPERATIVA_CV_DEPENDENCIAN3": int, "DEPOPERATIVA_C_DEPENDENCIAN3": object, "DEPOPERATIVA_CV_DEPENDENCIAN4": int, "DEPOPERATIVA_C_DEPENDENCIAN4": object, "DEPOPERATIVA_CV_DEPENDENCIAN5": int, "DEPOPERATIVA_C_DEPENDENCIAN5": object, "CONTACTO_CV_CARGO": int, "CONTACTO_C_CARGO": object, "CONTACTO_CV_TIPODIRECTOR": int, "CONTACTO_C_TIPODIRECTOR": object, "CONTACTO_C_ASOCIACION": object, "CONTACTO_C_CURP": object, "CONTACTO_C_RFC": object, "CONTACTO_C_NOMBRE": object, "CONTACTO_C_APELLIDO1": object, "CONTACTO_C_APELLIDO2": object, "CONTACTO_C_TELEFONO": object, "CONTACTO_C_CELULAR": object, "CONTACTO_C_EMAIL": object, "CONTACTO_C_EXTENSION": object, "CONTACTO_C_PWEB": object, "SERREG_CV_CCT": object, "JEFSEC_CV_CCT": object, "SUPERVISION_CV_CCT": object, "C_TUNO_1": object, "C_TUNO_2": object, "C_TUNO_3": object, "TIPONIVELSUB_CV_SERVICION1": int, "TIPONIVELSUB_C_SERVICION1": object, "TIPONIVELSUB_CV_SERVICION2": int, "TIPONIVELSUB_C_SERVICION2": object, "TIPONIVELSUB_CV_SERVICION3": int, "TIPONIVELSUB_C_SERVICION3": object, "C_SERVICIO_CAM": object, "CARACTERISTCA_CV_CARACTERIZAN1": int, "CARACTERISTCA_C_CARACTERIZAN1": object, "CARACTERISTCA_CV_CARACTERIZAN2": int, "CARACTERISTCA_C_CARACTERIZAN2": object, "Unnamed: 92": object }
df = pd.read_csv("cct15_bad.csv", encoding='ansi', dtype=dtypes, on_bad_lines=failed_line, engine='python')
failed_line() never gets fired.
failed_line() to get fired.
[cct15_bad.zip](https://github.com/pandas-dev/pandas/files/8377144/cct15\_bad.zip)