ENH: added ignore_index option to DataFrame.append, and speed optimiz… · pandas-dev/pandas@1ba5625 (original) (raw)

`@@ -19,15 +19,15 @@

`

19

19

`from numpy import nan

`

20

20

`import numpy as np

`

21

21

``

22

``

`-

from pandas.core.common import (isnull, notnull, PandasError, _ensure_index,

`

``

22

`+

from pandas.core.common import (isnull, notnull, PandasError,

`

23

23

`_try_sort, _pfixed, _default_index,

`

24

24

`_infer_dtype, _stringify)

`

25

25

`from pandas.core.daterange import DateRange

`

26

26

`from pandas.core.generic import AxisProperty, NDFrame

`

27

``

`-

from pandas.core.index import Index, MultiIndex, NULL_INDEX

`

``

27

`+

from pandas.core.index import Index, MultiIndex, NULL_INDEX, _ensure_index

`

28

28

`from pandas.core.indexing import _NDFrameIndexer, _maybe_droplevels

`

29

29

`from pandas.core.internals import BlockManager, make_block, form_blocks

`

30

``

`-

from pandas.core.series import Series, _is_bool_indexer

`

``

30

`+

from pandas.core.series import Series, _is_bool_indexer, _maybe_upcast

`

31

31

`from pandas.util.decorators import deprecate

`

32

32

`import pandas.core.common as common

`

33

33

`import pandas.core.datetools as datetools

`

`@@ -2008,11 +2008,18 @@ def f(x):

`

2008

2008

`#----------------------------------------------------------------------

`

2009

2009

`# Merging / joining methods

`

2010

2010

``

2011

``

`-

def append(self, other):

`

``

2011

`+

def append(self, other, ignore_index=False):

`

2012

2012

`"""

`

2013

2013

` Append columns of other to end of this frame's columns and index.

`

2014

2014

` Columns not in this frame are added as new columns.

`

2015

2015

``

``

2016

`+

Parameters

`

``

2017

`+


`

``

2018

`+

other : DataFrame

`

``

2019

`+

ignore_index : boolean, default False

`

``

2020

`+

If True do not use the index labels. Useful for gluing together

`

``

2021

`+

record arrays

`

``

2022

+

2016

2023

` Returns

`

2017

2024

` -------

`

2018

2025

` appended : DataFrame

`

`@@ -2022,28 +2029,53 @@ def append(self, other):

`

2022

2029

`if not self:

`

2023

2030

`return other.copy()

`

2024

2031

``

2025

``

`-

new_index = np.concatenate((self.index, other.index))

`

2026

``

`-

new_data = {}

`

``

2032

`+

if ignore_index:

`

``

2033

`+

new_index = None

`

``

2034

`+

else:

`

``

2035

`+

new_index = np.concatenate((self.index, other.index))

`

``

2036

+

``

2037

`+

if self.columns.equals(other.columns):

`

``

2038

`+

return self._append_same_columns(other, new_index)

`

``

2039

`+

else:

`

``

2040

`+

return self._append_different_columns(other, new_index)

`

2027

2041

``

2028

``

`-

new_columns = self.columns

`

``

2042

`+

def _append_different_columns(self, other, new_index):

`

``

2043

`+

new_columns = self.columns + other.columns

`

``

2044

`+

new_data = self._append_column_by_column(other)

`

``

2045

`+

return self._constructor(data=new_data, index=new_index,

`

``

2046

`+

columns=new_columns)

`

``

2047

+

``

2048

`+

def _append_same_columns(self, other, new_index):

`

``

2049

`+

if self._is_mixed_type:

`

``

2050

`+

new_data = self._append_column_by_column(other)

`

``

2051

`+

else:

`

``

2052

`+

new_data= np.concatenate((self.values, other.values), axis=0)

`

``

2053

`+

return self._constructor(new_data, index=new_index,

`

``

2054

`+

columns=self.columns)

`

2029

2055

``

2030

``

`-

if not new_columns.equals(other.columns):

`

2031

``

`-

new_columns = self.columns + other.columns

`

``

2056

`+

def _append_column_by_column(self, other):

`

``

2057

`+

def _concat_missing(values, n):

`

``

2058

`+

values = _maybe_upcast(values)

`

``

2059

`+

missing_values = np.empty(n, dtype=values.dtype)

`

``

2060

`+

missing_values.fill(np.nan)

`

``

2061

`+

return values, missing_values

`

2032

2062

``

2033

``

`-

for column, series in self.iteritems():

`

2034

``

`-

values = series.values

`

2035

``

`-

if column in other:

`

2036

``

`-

other_values = other[column].values

`

2037

``

`-

new_data[column] = np.concatenate((values, other_values))

`

``

2063

`+

new_data = {}

`

``

2064

`+

for col in self:

`

``

2065

`+

values = self._data.get(col)

`

``

2066

`+

if col in other:

`

``

2067

`+

other_values = other._data.get(col)

`

2038

2068

`else:

`

2039

``

`-

new_data[column] = series

`

``

2069

`+

values, other_values = _concat_missing(values, len(other))

`

``

2070

`+

new_data[col] = np.concatenate((values, other_values))

`

2040

2071

``

2041

``

`-

for column, series in other.iteritems():

`

2042

``

`-

if column not in self:

`

2043

``

`-

new_data[column] = series

`

``

2072

`+

for col in other:

`

``

2073

`+

values = other._data.get(col)

`

``

2074

`+

if col not in self:

`

``

2075

`+

values, missing_values = _concat_missing(values, len(self))

`

``

2076

`+

new_data[col] = np.concatenate((missing_values, values))

`

2044

2077

``

2045

``

`-

return self._constructor(data=new_data, index=new_index,

`

2046

``

`-

columns=new_columns)

`

``

2078

`+

return new_data

`

2047

2079

``

2048

2080

`def join(self, other, on=None, how=None, lsuffix='', rsuffix=''):

`

2049

2081

`"""

`

`@@ -3137,7 +3169,6 @@ def _homogenize(data, index, columns, dtype=None):

`

3137

3169

``

3138

3170

`return homogenized

`

3139

3171

``

3140

``

-

3141

3172

`def _put_str(s, space):

`

3142

3173

`return ('%s' % s)[:space].ljust(space)

`

3143

3174

``