Add warning if both converter and dtype specified · pandas-dev/pandas@e0d1606 (original) (raw)
`@@ -13,7 +13,7 @@ from cpython cimport (PyObject, PyBytes_FromString,
`
13
13
` PyUnicode_Check, PyUnicode_AsUTF8String,
`
14
14
` PyErr_Occurred, PyErr_Fetch)
`
15
15
`from cpython.ref cimport PyObject, Py_XDECREF
`
16
``
`-
from io.common import CParserError, DtypeWarning, EmptyDataError
`
``
16
`+
from io.common import CParserError, DtypeWarning, EmptyDataError, ParserWarning
`
17
17
``
18
18
``
19
19
`cdef extern from "Python.h":
`
`@@ -982,7 +982,7 @@ cdef class TextReader:
`
982
982
` Py_ssize_t i, nused
`
983
983
` kh_str_t *na_hashset = NULL
`
984
984
`int start, end
`
985
``
`-
object name, na_flist
`
``
985
`+
object name, na_flist, col_dtype = None
`
986
986
` bint na_filter = 0
`
987
987
` Py_ssize_t num_cols
`
988
988
``
`@@ -1038,14 +1038,33 @@ cdef class TextReader:
`
1038
1038
`else:
`
1039
1039
` na_filter = 0
`
1040
1040
``
``
1041
`+
col_dtype = None
`
``
1042
`+
if self.dtype is not None:
`
``
1043
`+
if isinstance(self.dtype, dict):
`
``
1044
`+
if name in self.dtype:
`
``
1045
`+
col_dtype = self.dtype[name]
`
``
1046
`+
elif i in self.dtype:
`
``
1047
`+
col_dtype = self.dtype[i]
`
``
1048
`+
else:
`
``
1049
`+
if self.dtype.names:
`
``
1050
`+
structured array
`
``
1051
`+
col_dtype = np.dtype(self.dtype.descr[i][1])
`
``
1052
`+
else:
`
``
1053
`+
col_dtype = self.dtype
`
``
1054
+
1041
1055
`if conv:
`
``
1056
`+
if col_dtype is not None:
`
``
1057
`+
warnings.warn(("Both a converter and dtype were specified "
`
``
1058
`+
"for column {0} - only the converter will "
`
``
1059
`+
"be used").format(name), ParserWarning,
`
``
1060
`+
stacklevel=5)
`
1042
1061
` results[i] = _apply_converter(conv, self.parser, i, start, end,
`
1043
1062
`self.c_encoding)
`
1044
1063
`continue
`
1045
1064
``
1046
1065
`# Should return as the desired dtype (inferred or specified)
`
1047
1066
` col_res, na_count = self._convert_tokens(
`
1048
``
`-
i, start, end, name, na_filter, na_hashset, na_flist)
`
``
1067
`+
i, start, end, name, na_filter, na_hashset, na_flist, col_dtype)
`
1049
1068
``
1050
1069
`if na_filter:
`
1051
1070
`self._free_na_set(na_hashset)
`
`@@ -1070,32 +1089,17 @@ cdef class TextReader:
`
1070
1089
` cdef inline _convert_tokens(self, Py_ssize_t i, int start, int end,
`
1071
1090
`object name, bint na_filter,
`
1072
1091
` kh_str_t *na_hashset,
`
1073
``
`-
object na_flist):
`
1074
``
`-
cdef:
`
1075
``
`-
object col_dtype = None
`
1076
``
-
1077
``
`-
if self.dtype is not None:
`
1078
``
`-
if isinstance(self.dtype, dict):
`
1079
``
`-
if name in self.dtype:
`
1080
``
`-
col_dtype = self.dtype[name]
`
1081
``
`-
elif i in self.dtype:
`
1082
``
`-
col_dtype = self.dtype[i]
`
1083
``
`-
else:
`
1084
``
`-
if self.dtype.names:
`
1085
``
`-
structured array
`
1086
``
`-
col_dtype = np.dtype(self.dtype.descr[i][1])
`
1087
``
`-
else:
`
1088
``
`-
col_dtype = self.dtype
`
``
1092
`+
object na_flist, object col_dtype):
`
1089
1093
``
1090
``
`-
if col_dtype is not None:
`
1091
``
`-
col_res, na_count = self._convert_with_dtype(
`
1092
``
`-
col_dtype, i, start, end, na_filter,
`
1093
``
`-
1, na_hashset, na_flist)
`
``
1094
`+
if col_dtype is not None:
`
``
1095
`+
col_res, na_count = self._convert_with_dtype(
`
``
1096
`+
col_dtype, i, start, end, na_filter,
`
``
1097
`+
1, na_hashset, na_flist)
`
1094
1098
``
1095
``
`-
Fallback on the parse (e.g. we requested int dtype,
`
1096
``
`-
but its actually a float).
`
1097
``
`-
if col_res is not None:
`
1098
``
`-
return col_res, na_count
`
``
1099
`+
Fallback on the parse (e.g. we requested int dtype,
`
``
1100
`+
but its actually a float).
`
``
1101
`+
if col_res is not None:
`
``
1102
`+
return col_res, na_count
`
1099
1103
``
1100
1104
`if i in self.noconvert:
`
1101
1105
`return self._string_convert(i, start, end, na_filter, na_hashset)
`