Add warning if both converter and dtype specified · pandas-dev/pandas@e0d1606 (original) (raw)

`@@ -13,7 +13,7 @@ from cpython cimport (PyObject, PyBytes_FromString,

`

13

13

` PyUnicode_Check, PyUnicode_AsUTF8String,

`

14

14

` PyErr_Occurred, PyErr_Fetch)

`

15

15

`from cpython.ref cimport PyObject, Py_XDECREF

`

16

``

`-

from io.common import CParserError, DtypeWarning, EmptyDataError

`

``

16

`+

from io.common import CParserError, DtypeWarning, EmptyDataError, ParserWarning

`

17

17

``

18

18

``

19

19

`cdef extern from "Python.h":

`

`@@ -982,7 +982,7 @@ cdef class TextReader:

`

982

982

` Py_ssize_t i, nused

`

983

983

` kh_str_t *na_hashset = NULL

`

984

984

`int start, end

`

985

``

`-

object name, na_flist

`

``

985

`+

object name, na_flist, col_dtype = None

`

986

986

` bint na_filter = 0

`

987

987

` Py_ssize_t num_cols

`

988

988

``

`@@ -1038,14 +1038,33 @@ cdef class TextReader:

`

1038

1038

`else:

`

1039

1039

` na_filter = 0

`

1040

1040

``

``

1041

`+

col_dtype = None

`

``

1042

`+

if self.dtype is not None:

`

``

1043

`+

if isinstance(self.dtype, dict):

`

``

1044

`+

if name in self.dtype:

`

``

1045

`+

col_dtype = self.dtype[name]

`

``

1046

`+

elif i in self.dtype:

`

``

1047

`+

col_dtype = self.dtype[i]

`

``

1048

`+

else:

`

``

1049

`+

if self.dtype.names:

`

``

1050

`+

structured array

`

``

1051

`+

col_dtype = np.dtype(self.dtype.descr[i][1])

`

``

1052

`+

else:

`

``

1053

`+

col_dtype = self.dtype

`

``

1054

+

1041

1055

`if conv:

`

``

1056

`+

if col_dtype is not None:

`

``

1057

`+

warnings.warn(("Both a converter and dtype were specified "

`

``

1058

`+

"for column {0} - only the converter will "

`

``

1059

`+

"be used").format(name), ParserWarning,

`

``

1060

`+

stacklevel=5)

`

1042

1061

` results[i] = _apply_converter(conv, self.parser, i, start, end,

`

1043

1062

`self.c_encoding)

`

1044

1063

`continue

`

1045

1064

``

1046

1065

`# Should return as the desired dtype (inferred or specified)

`

1047

1066

` col_res, na_count = self._convert_tokens(

`

1048

``

`-

i, start, end, name, na_filter, na_hashset, na_flist)

`

``

1067

`+

i, start, end, name, na_filter, na_hashset, na_flist, col_dtype)

`

1049

1068

``

1050

1069

`if na_filter:

`

1051

1070

`self._free_na_set(na_hashset)

`

`@@ -1070,32 +1089,17 @@ cdef class TextReader:

`

1070

1089

` cdef inline _convert_tokens(self, Py_ssize_t i, int start, int end,

`

1071

1090

`object name, bint na_filter,

`

1072

1091

` kh_str_t *na_hashset,

`

1073

``

`-

object na_flist):

`

1074

``

`-

cdef:

`

1075

``

`-

object col_dtype = None

`

1076

``

-

1077

``

`-

if self.dtype is not None:

`

1078

``

`-

if isinstance(self.dtype, dict):

`

1079

``

`-

if name in self.dtype:

`

1080

``

`-

col_dtype = self.dtype[name]

`

1081

``

`-

elif i in self.dtype:

`

1082

``

`-

col_dtype = self.dtype[i]

`

1083

``

`-

else:

`

1084

``

`-

if self.dtype.names:

`

1085

``

`-

structured array

`

1086

``

`-

col_dtype = np.dtype(self.dtype.descr[i][1])

`

1087

``

`-

else:

`

1088

``

`-

col_dtype = self.dtype

`

``

1092

`+

object na_flist, object col_dtype):

`

1089

1093

``

1090

``

`-

if col_dtype is not None:

`

1091

``

`-

col_res, na_count = self._convert_with_dtype(

`

1092

``

`-

col_dtype, i, start, end, na_filter,

`

1093

``

`-

1, na_hashset, na_flist)

`

``

1094

`+

if col_dtype is not None:

`

``

1095

`+

col_res, na_count = self._convert_with_dtype(

`

``

1096

`+

col_dtype, i, start, end, na_filter,

`

``

1097

`+

1, na_hashset, na_flist)

`

1094

1098

``

1095

``

`-

Fallback on the parse (e.g. we requested int dtype,

`

1096

``

`-

but its actually a float).

`

1097

``

`-

if col_res is not None:

`

1098

``

`-

return col_res, na_count

`

``

1099

`+

Fallback on the parse (e.g. we requested int dtype,

`

``

1100

`+

but its actually a float).

`

``

1101

`+

if col_res is not None:

`

``

1102

`+

return col_res, na_count

`

1099

1103

``

1100

1104

`if i in self.noconvert:

`

1101

1105

`return self._string_convert(i, start, end, na_filter, na_hashset)

`