BUG: to_csv should allow writing of dupe cols if within same block GH… · pandas-dev/pandas@1f138a4 (original) (raw)
`@@ -803,11 +803,20 @@ def init(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
`
803
803
`ncols = sum(len(b.items) for b in self.blocks)
`
804
804
`self.data =[None] * ncols
`
805
805
``
806
``
`-
fail early if we have duplicate columns
`
807
``
`-
if len(set(self.cols)) != len(self.cols):
`
808
``
`-
raise Exception("duplicate columns are not permitted in to_csv")
`
``
806
`+
if self.obj.columns.is_unique:
`
``
807
`+
self.colname_map = dict((k,i) for i,k in enumerate(obj.columns))
`
``
808
`+
else:
`
``
809
`+
ks = [set(x.items) for x in self.blocks]
`
``
810
`+
u = len(reduce(lambda a,x: a.union(x),ks,set()))
`
``
811
`+
t = sum(map(len,ks))
`
``
812
`+
if u != t:
`
``
813
`+
if len(set(self.cols)) != len(self.cols):
`
``
814
`+
raise NotImplementedError("duplicate columns with differing dtypes are unsupported")
`
``
815
`+
else:
`
``
816
`+
if columns are not unique and we acces this,
`
``
817
`+
we're doing it wrong
`
``
818
`+
pass
`
809
819
``
810
``
`-
self.colname_map = dict((k,i) for i,k in enumerate(obj.columns))
`
811
820
``
812
821
`if chunksize is None:
`
813
822
`chunksize = (100000/ (len(self.cols) or 1)) or 1
`
`@@ -1002,17 +1011,20 @@ def _save(self):
`
1002
1011
``
1003
1012
`def _save_chunk(self, start_i, end_i):
`
1004
1013
``
1005
``
`-
colname_map = self.colname_map
`
1006
1014
`data_index = self.data_index
`
1007
1015
``
1008
1016
`# create the data for a chunk
`
1009
1017
`slicer = slice(start_i,end_i)
`
1010
``
`-
for i in range(len(self.blocks)):
`
1011
``
`-
b = self.blocks[i]
`
1012
``
`-
d = b.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)
`
1013
``
`-
for j, k in enumerate(b.items):
`
1014
``
`-
self.data is a preallocated list
`
1015
``
`-
self.data[colname_map[k]] = d[j]
`
``
1018
`+
if self.obj.columns.is_unique:
`
``
1019
`+
for i in range(len(self.blocks)):
`
``
1020
`+
b = self.blocks[i]
`
``
1021
`+
d = b.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)
`
``
1022
`+
for j, k in enumerate(b.items):
`
``
1023
`+
self.data is a preallocated list
`
``
1024
`+
self.data[self.colname_map[k]] = d[j]
`
``
1025
`+
else:
`
``
1026
`+
for i in range(len(self.cols)):
`
``
1027
`+
self.data[i] = self.obj.icol(i).values[slicer].tolist()
`
1016
1028
``
1017
1029
`ix = data_index.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)
`
1018
1030
``