Add warning if both converter and dtype specified · pandas-dev/pandas@e0d1606 (original) (raw)

`@@ -13,7 +13,7 @@ from cpython cimport (PyObject, PyBytes_FromString,

13

` PyUnicode_Check, PyUnicode_AsUTF8String,

14

` PyErr_Occurred, PyErr_Fetch)

15

`from cpython.ref cimport PyObject, Py_XDECREF

16

from io.common import CParserError, DtypeWarning, EmptyDataError

16

from io.common import CParserError, DtypeWarning, EmptyDataError, ParserWarning

17

18

19

`cdef extern from "Python.h":

`@@ -982,7 +982,7 @@ cdef class TextReader:

982

` Py_ssize_t i, nused

983

` kh_str_t *na_hashset = NULL

984

`int start, end

985

object name, na_flist

985

object name, na_flist, col_dtype = None

986

` bint na_filter = 0

987

` Py_ssize_t num_cols

988

`@@ -1038,14 +1038,33 @@ cdef class TextReader:

1038

`else:

1039

` na_filter = 0

1040

1041

col_dtype = None

1042

if self.dtype is not None:

1043

if isinstance(self.dtype, dict):

1044

if name in self.dtype:

1045

col_dtype = self.dtype[name]

1046

elif i in self.dtype:

1047

col_dtype = self.dtype[i]

1048

else:

1049

if self.dtype.names:

1050

structured array

1051

col_dtype = np.dtype(self.dtype.descr[i][1])

1052

else:

1053

col_dtype = self.dtype

1054

+

1041

1055

`if conv:

1056

if col_dtype is not None:

1057

warnings.warn(("Both a converter and dtype were specified "

1058

"for column {0} - only the converter will "

1059

"be used").format(name), ParserWarning,

1060

stacklevel=5)

1042

1061

` results[i] = _apply_converter(conv, self.parser, i, start, end,

1043

1062

`self.c_encoding)

1044

1063

`continue

1045

1064

1046

1065

`# Should return as the desired dtype (inferred or specified)

1047

1066

` col_res, na_count = self._convert_tokens(

1048

i, start, end, name, na_filter, na_hashset, na_flist)

1067

i, start, end, name, na_filter, na_hashset, na_flist, col_dtype)

1049

1068

1050

1069

`if na_filter:

1051

1070

`self._free_na_set(na_hashset)

`@@ -1070,32 +1089,17 @@ cdef class TextReader:

1070

1089

` cdef inline _convert_tokens(self, Py_ssize_t i, int start, int end,

1071

1090

`object name, bint na_filter,

1072

1091

` kh_str_t *na_hashset,

1073

object na_flist):

1074

cdef:

1075

object col_dtype = None

1076

-

1077

if self.dtype is not None:

1078

if isinstance(self.dtype, dict):

1079

if name in self.dtype:

1080

col_dtype = self.dtype[name]

1081

elif i in self.dtype:

1082

col_dtype = self.dtype[i]

1083

else:

1084

if self.dtype.names:

1085

structured array

1086

col_dtype = np.dtype(self.dtype.descr[i][1])

1087

else:

1088

col_dtype = self.dtype

1092

object na_flist, object col_dtype):

1089

1093

1090

if col_dtype is not None:

1091

col_res, na_count = self._convert_with_dtype(

1092

col_dtype, i, start, end, na_filter,

1093

1, na_hashset, na_flist)

1094

if col_dtype is not None:

1095

col_res, na_count = self._convert_with_dtype(

1096

col_dtype, i, start, end, na_filter,

1097

1, na_hashset, na_flist)

1094

1098

1095

Fallback on the parse (e.g. we requested int dtype,

1096

but its actually a float).

1097

if col_res is not None:

1098

return col_res, na_count

1099

Fallback on the parse (e.g. we requested int dtype,

1100

but its actually a float).

1101

if col_res is not None:

1102

return col_res, na_count

1099

1103

1100

1104

`if i in self.noconvert:

1101

1105

`return self._string_convert(i, start, end, na_filter, na_hashset)