BUG: dtype compat with get_dummies · Issue #8725 · pandas-dev/pandas (original) (raw)

@jreback

from SO

I think should not coerce to floats but be an int (or a categorical)

In [14]: df = pd.DataFrame({'hour': [0, 1, 3, 8, 13, 14], 'val': np.random.randn(6)})

In [15]: df['hour_cat'] = pd.Categorical(df['hour'], categories=range(24))

In [16]: df
Out[16]: 
   hour       val hour_cat
0     0  0.326395        0
1     1  1.179421        1
2     3 -1.043560        3
3     8  0.430482        8
4    13  0.558709       13
5    14 -0.385931       14

In [17]: df.dtypes
Out[17]: 
hour           int64
val          float64
hour_cat    category
dtype: object

In [6]: pd.get_dummies(df['hour_cat'])
Out[6]: 
   0   1   2   3   4   5   6   7   8   9   10  11  12  13  14  15  16  17  18  19  20  21  22  23
0   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
1   0   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
2   0   0   0   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
3   0   0   0   0   0   0   0   0   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
4   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0   0   0   0   0   0   0
5   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0   0   0   0   0   0

In [7]: pd.get_dummies(df['hour_cat']).dtypes
Out[7]: 
0     float64
1     float64
2     float64
3     float64
4     float64
5     float64
6     float64
7     float64
8     float64
9     float64
10    float64
11    float64
12    float64
13    float64
14    float64
15    float64
16    float64
17    float64
18    float64
19    float64
20    float64
21    float64
22    float64
23    float64
dtype: object