BUG: dtype compat with get_dummies · Issue #8725 · pandas-dev/pandas (original) (raw)
from SO
I think should not coerce to floats but be an int (or a categorical)
In [14]: df = pd.DataFrame({'hour': [0, 1, 3, 8, 13, 14], 'val': np.random.randn(6)})
In [15]: df['hour_cat'] = pd.Categorical(df['hour'], categories=range(24))
In [16]: df
Out[16]:
hour val hour_cat
0 0 0.326395 0
1 1 1.179421 1
2 3 -1.043560 3
3 8 0.430482 8
4 13 0.558709 13
5 14 -0.385931 14
In [17]: df.dtypes
Out[17]:
hour int64
val float64
hour_cat category
dtype: object
In [6]: pd.get_dummies(df['hour_cat'])
Out[6]:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
In [7]: pd.get_dummies(df['hour_cat']).dtypes
Out[7]:
0 float64
1 float64
2 float64
3 float64
4 float64
5 float64
6 float64
7 float64
8 float64
9 float64
10 float64
11 float64
12 float64
13 float64
14 float64
15 float64
16 float64
17 float64
18 float64
19 float64
20 float64
21 float64
22 float64
23 float64
dtype: object