BUG: to_csv should allow writing of dupe cols if within same block GH… · pandas-dev/pandas@1f138a4 (original) (raw)

`@@ -803,11 +803,20 @@ def init(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,

803

`ncols = sum(len(b.items) for b in self.blocks)

804

`self.data =[None] * ncols

805

806

fail early if we have duplicate columns

807

if len(set(self.cols)) != len(self.cols):

808

raise Exception("duplicate columns are not permitted in to_csv")

806

if self.obj.columns.is_unique:

807

self.colname_map = dict((k,i) for i,k in enumerate(obj.columns))

808

else:

809

ks = [set(x.items) for x in self.blocks]

810

u = len(reduce(lambda a,x: a.union(x),ks,set()))

811

t = sum(map(len,ks))

812

if u != t:

813

if len(set(self.cols)) != len(self.cols):

814

raise NotImplementedError("duplicate columns with differing dtypes are unsupported")

815

else:

816

if columns are not unique and we acces this,

817

we're doing it wrong

818

pass

809

819

810

self.colname_map = dict((k,i) for i,k in enumerate(obj.columns))

811

820

812

821

`if chunksize is None:

813

822

`chunksize = (100000/ (len(self.cols) or 1)) or 1

`@@ -1002,17 +1011,20 @@ def _save(self):

1002

1011

1003

1012

`def _save_chunk(self, start_i, end_i):

1004

1013

1005

colname_map = self.colname_map

1006

1014

`data_index = self.data_index

1007

1015

1008

1016

`# create the data for a chunk

1009

1017

`slicer = slice(start_i,end_i)

1010

for i in range(len(self.blocks)):

1011

b = self.blocks[i]

1012

d = b.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)

1013

for j, k in enumerate(b.items):

1014

self.data is a preallocated list

1015

self.data[colname_map[k]] = d[j]

1018

if self.obj.columns.is_unique:

1019

for i in range(len(self.blocks)):

1020

b = self.blocks[i]

1021

d = b.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)

1022

for j, k in enumerate(b.items):

1023

self.data is a preallocated list

1024

self.data[self.colname_map[k]] = d[j]

1025

else:

1026

for i in range(len(self.cols)):

1027

self.data[i] = self.obj.icol(i).values[slicer].tolist()

1016

1028

1017

1029

`ix = data_index.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)

1018

1030