BUG: to_csv should allow writing of dupe cols if within same block GH… · pandas-dev/pandas@1f138a4 (original) (raw)

`@@ -803,11 +803,20 @@ def init(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,

`

803

803

`ncols = sum(len(b.items) for b in self.blocks)

`

804

804

`self.data =[None] * ncols

`

805

805

``

806

``

`-

fail early if we have duplicate columns

`

807

``

`-

if len(set(self.cols)) != len(self.cols):

`

808

``

`-

raise Exception("duplicate columns are not permitted in to_csv")

`

``

806

`+

if self.obj.columns.is_unique:

`

``

807

`+

self.colname_map = dict((k,i) for i,k in enumerate(obj.columns))

`

``

808

`+

else:

`

``

809

`+

ks = [set(x.items) for x in self.blocks]

`

``

810

`+

u = len(reduce(lambda a,x: a.union(x),ks,set()))

`

``

811

`+

t = sum(map(len,ks))

`

``

812

`+

if u != t:

`

``

813

`+

if len(set(self.cols)) != len(self.cols):

`

``

814

`+

raise NotImplementedError("duplicate columns with differing dtypes are unsupported")

`

``

815

`+

else:

`

``

816

`+

if columns are not unique and we acces this,

`

``

817

`+

we're doing it wrong

`

``

818

`+

pass

`

809

819

``

810

``

`-

self.colname_map = dict((k,i) for i,k in enumerate(obj.columns))

`

811

820

``

812

821

`if chunksize is None:

`

813

822

`chunksize = (100000/ (len(self.cols) or 1)) or 1

`

`@@ -1002,17 +1011,20 @@ def _save(self):

`

1002

1011

``

1003

1012

`def _save_chunk(self, start_i, end_i):

`

1004

1013

``

1005

``

`-

colname_map = self.colname_map

`

1006

1014

`data_index = self.data_index

`

1007

1015

``

1008

1016

`# create the data for a chunk

`

1009

1017

`slicer = slice(start_i,end_i)

`

1010

``

`-

for i in range(len(self.blocks)):

`

1011

``

`-

b = self.blocks[i]

`

1012

``

`-

d = b.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)

`

1013

``

`-

for j, k in enumerate(b.items):

`

1014

``

`-

self.data is a preallocated list

`

1015

``

`-

self.data[colname_map[k]] = d[j]

`

``

1018

`+

if self.obj.columns.is_unique:

`

``

1019

`+

for i in range(len(self.blocks)):

`

``

1020

`+

b = self.blocks[i]

`

``

1021

`+

d = b.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)

`

``

1022

`+

for j, k in enumerate(b.items):

`

``

1023

`+

self.data is a preallocated list

`

``

1024

`+

self.data[self.colname_map[k]] = d[j]

`

``

1025

`+

else:

`

``

1026

`+

for i in range(len(self.cols)):

`

``

1027

`+

self.data[i] = self.obj.icol(i).values[slicer].tolist()

`

1016

1028

``

1017

1029

`ix = data_index.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)

`

1018

1030

``