ENH: added ignore_index option to DataFrame.append, and speed optimiz… · pandas-dev/pandas@1ba5625 (original) (raw)
`@@ -19,15 +19,15 @@
`
19
19
`from numpy import nan
`
20
20
`import numpy as np
`
21
21
``
22
``
`-
from pandas.core.common import (isnull, notnull, PandasError, _ensure_index,
`
``
22
`+
from pandas.core.common import (isnull, notnull, PandasError,
`
23
23
`_try_sort, _pfixed, _default_index,
`
24
24
`_infer_dtype, _stringify)
`
25
25
`from pandas.core.daterange import DateRange
`
26
26
`from pandas.core.generic import AxisProperty, NDFrame
`
27
``
`-
from pandas.core.index import Index, MultiIndex, NULL_INDEX
`
``
27
`+
from pandas.core.index import Index, MultiIndex, NULL_INDEX, _ensure_index
`
28
28
`from pandas.core.indexing import _NDFrameIndexer, _maybe_droplevels
`
29
29
`from pandas.core.internals import BlockManager, make_block, form_blocks
`
30
``
`-
from pandas.core.series import Series, _is_bool_indexer
`
``
30
`+
from pandas.core.series import Series, _is_bool_indexer, _maybe_upcast
`
31
31
`from pandas.util.decorators import deprecate
`
32
32
`import pandas.core.common as common
`
33
33
`import pandas.core.datetools as datetools
`
`@@ -2008,11 +2008,18 @@ def f(x):
`
2008
2008
`#----------------------------------------------------------------------
`
2009
2009
`# Merging / joining methods
`
2010
2010
``
2011
``
`-
def append(self, other):
`
``
2011
`+
def append(self, other, ignore_index=False):
`
2012
2012
`"""
`
2013
2013
` Append columns of other to end of this frame's columns and index.
`
2014
2014
` Columns not in this frame are added as new columns.
`
2015
2015
``
``
2016
`+
Parameters
`
``
2017
`+
`
``
2018
`+
other : DataFrame
`
``
2019
`+
ignore_index : boolean, default False
`
``
2020
`+
If True do not use the index labels. Useful for gluing together
`
``
2021
`+
record arrays
`
``
2022
+
2016
2023
` Returns
`
2017
2024
` -------
`
2018
2025
` appended : DataFrame
`
`@@ -2022,28 +2029,53 @@ def append(self, other):
`
2022
2029
`if not self:
`
2023
2030
`return other.copy()
`
2024
2031
``
2025
``
`-
new_index = np.concatenate((self.index, other.index))
`
2026
``
`-
new_data = {}
`
``
2032
`+
if ignore_index:
`
``
2033
`+
new_index = None
`
``
2034
`+
else:
`
``
2035
`+
new_index = np.concatenate((self.index, other.index))
`
``
2036
+
``
2037
`+
if self.columns.equals(other.columns):
`
``
2038
`+
return self._append_same_columns(other, new_index)
`
``
2039
`+
else:
`
``
2040
`+
return self._append_different_columns(other, new_index)
`
2027
2041
``
2028
``
`-
new_columns = self.columns
`
``
2042
`+
def _append_different_columns(self, other, new_index):
`
``
2043
`+
new_columns = self.columns + other.columns
`
``
2044
`+
new_data = self._append_column_by_column(other)
`
``
2045
`+
return self._constructor(data=new_data, index=new_index,
`
``
2046
`+
columns=new_columns)
`
``
2047
+
``
2048
`+
def _append_same_columns(self, other, new_index):
`
``
2049
`+
if self._is_mixed_type:
`
``
2050
`+
new_data = self._append_column_by_column(other)
`
``
2051
`+
else:
`
``
2052
`+
new_data= np.concatenate((self.values, other.values), axis=0)
`
``
2053
`+
return self._constructor(new_data, index=new_index,
`
``
2054
`+
columns=self.columns)
`
2029
2055
``
2030
``
`-
if not new_columns.equals(other.columns):
`
2031
``
`-
new_columns = self.columns + other.columns
`
``
2056
`+
def _append_column_by_column(self, other):
`
``
2057
`+
def _concat_missing(values, n):
`
``
2058
`+
values = _maybe_upcast(values)
`
``
2059
`+
missing_values = np.empty(n, dtype=values.dtype)
`
``
2060
`+
missing_values.fill(np.nan)
`
``
2061
`+
return values, missing_values
`
2032
2062
``
2033
``
`-
for column, series in self.iteritems():
`
2034
``
`-
values = series.values
`
2035
``
`-
if column in other:
`
2036
``
`-
other_values = other[column].values
`
2037
``
`-
new_data[column] = np.concatenate((values, other_values))
`
``
2063
`+
new_data = {}
`
``
2064
`+
for col in self:
`
``
2065
`+
values = self._data.get(col)
`
``
2066
`+
if col in other:
`
``
2067
`+
other_values = other._data.get(col)
`
2038
2068
`else:
`
2039
``
`-
new_data[column] = series
`
``
2069
`+
values, other_values = _concat_missing(values, len(other))
`
``
2070
`+
new_data[col] = np.concatenate((values, other_values))
`
2040
2071
``
2041
``
`-
for column, series in other.iteritems():
`
2042
``
`-
if column not in self:
`
2043
``
`-
new_data[column] = series
`
``
2072
`+
for col in other:
`
``
2073
`+
values = other._data.get(col)
`
``
2074
`+
if col not in self:
`
``
2075
`+
values, missing_values = _concat_missing(values, len(self))
`
``
2076
`+
new_data[col] = np.concatenate((missing_values, values))
`
2044
2077
``
2045
``
`-
return self._constructor(data=new_data, index=new_index,
`
2046
``
`-
columns=new_columns)
`
``
2078
`+
return new_data
`
2047
2079
``
2048
2080
`def join(self, other, on=None, how=None, lsuffix='', rsuffix=''):
`
2049
2081
`"""
`
`@@ -3137,7 +3169,6 @@ def _homogenize(data, index, columns, dtype=None):
`
3137
3169
``
3138
3170
`return homogenized
`
3139
3171
``
3140
``
-
3141
3172
`def _put_str(s, space):
`
3142
3173
`return ('%s' % s)[:space].ljust(space)
`
3143
3174
``