mod.rs - source (original) (raw)
polars_core/chunked_array/array/
mod.rs
1//! Special fixed-size-list utility methods
2
3mod iterator;
4
5use std::borrow::Cow;
6
7use either::Either;
8
9use crate::prelude::*;
10
11impl ArrayChunked {
12 /// Get the inner data type of the fixed size list.
13 pub fn inner_dtype(&self) -> &DataType {
14 match self.dtype() {
15 DataType::Array(dt, _size) => dt.as_ref(),
16 _ => unreachable!(),
17 }
18 }
19
20 pub fn width(&self) -> usize {
21 match self.dtype() {
22 DataType::Array(_dt, size) => *size,
23 _ => unreachable!(),
24 }
25 }
26
27 /// # Safety
28 /// The caller must ensure that the logical type given fits the physical type of the array.
29 pub unsafe fn to_logical(&mut self, inner_dtype: DataType) {
30 debug_assert_eq!(&inner_dtype.to_physical(), self.inner_dtype());
31 let width = self.width();
32 let fld = Arc::make_mut(&mut self.field);
33 fld.coerce(DataType::Array(Box::new(inner_dtype), width))
34 }
35
36 /// Convert the datatype of the array into the physical datatype.
37 pub fn to_physical_repr(&self) -> Cow<ArrayChunked> {
38 let Cow::Owned(physical_repr) = self.get_inner().to_physical_repr() else {
39 return Cow::Borrowed(self);
40 };
41
42 let chunk_len_validity_iter =
43 if physical_repr.chunks().len() == 1 && self.chunks().len() > 1 {
44 // Physical repr got rechunked, rechunk our validity as well.
45 Either::Left(std::iter::once((self.len(), self.rechunk_validity())))
46 } else {
47 // No rechunking, expect the same number of chunks.
48 assert_eq!(self.chunks().len(), physical_repr.chunks().len());
49 Either::Right(
50 self.chunks()
51 .iter()
52 .map(|c| (c.len(), c.validity().cloned())),
53 )
54 };
55
56 let width = self.width();
57 let chunks: Vec<_> = chunk_len_validity_iter
58 .zip(physical_repr.into_chunks())
59 .map(|((len, validity), values)| {
60 FixedSizeListArray::new(
61 ArrowDataType::FixedSizeList(
62 Box::new(ArrowField::new(
63 PlSmallStr::from_static("item"),
64 values.dtype().clone(),
65 true,
66 )),
67 width,
68 ),
69 len,
70 values,
71 validity,
72 )
73 .to_boxed()
74 })
75 .collect();
76
77 let name = self.name().clone();
78 let dtype = DataType::Array(Box::new(self.inner_dtype().to_physical()), width);
79 Cow::Owned(unsafe { ArrayChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype) })
80 }
81
82 /// Convert a non-logical [`ArrayChunked`] back into a logical [`ArrayChunked`] without casting.
83 ///
84 /// # Safety
85 ///
86 /// This can lead to invalid memory access in downstream code.
87 pub unsafe fn from_physical_unchecked(&self, to_inner_dtype: DataType) -> PolarsResult<Self> {
88 debug_assert!(!self.inner_dtype().is_logical());
89
90 let chunks = self
91 .downcast_iter()
92 .map(|chunk| chunk.values())
93 .cloned()
94 .collect();
95
96 let inner = unsafe {
97 Series::from_chunks_and_dtype_unchecked(PlSmallStr::EMPTY, chunks, self.inner_dtype())
98 };
99 let inner = unsafe { inner.from_physical_unchecked(&to_inner_dtype) }?;
100
101 let chunks: Vec<_> = self
102 .downcast_iter()
103 .zip(inner.into_chunks())
104 .map(|(chunk, values)| {
105 FixedSizeListArray::new(
106 ArrowDataType::FixedSizeList(
107 Box::new(ArrowField::new(
108 PlSmallStr::from_static("item"),
109 values.dtype().clone(),
110 true,
111 )),
112 self.width(),
113 ),
114 chunk.len(),
115 values,
116 chunk.validity().cloned(),
117 )
118 .to_boxed()
119 })
120 .collect();
121
122 let name = self.name().clone();
123 let dtype = DataType::Array(Box::new(to_inner_dtype), self.width());
124 Ok(unsafe { Self::from_chunks_and_dtype_unchecked(name, chunks, dtype) })
125 }
126
127 /// Get the inner values as `Series`
128 pub fn get_inner(&self) -> Series {
129 let chunks: Vec<_> = self.downcast_iter().map(|c| c.values().clone()).collect();
130
131 // SAFETY: Data type of arrays matches because they are chunks from the same array.
132 unsafe {
133 Series::from_chunks_and_dtype_unchecked(self.name().clone(), chunks, self.inner_dtype())
134 }
135 }
136
137 /// Ignore the list indices and apply `func` to the inner type as [`Series`].
138 pub fn apply_to_inner(
139 &self,
140 func: &dyn Fn(Series) -> PolarsResult<Series>,
141 ) -> PolarsResult<ArrayChunked> {
142 // Rechunk or the generated Series will have wrong length.
143 let ca = self.rechunk();
144 let arr = ca.downcast_as_array();
145
146 // SAFETY:
147 // Inner dtype is passed correctly
148 let elements = unsafe {
149 Series::from_chunks_and_dtype_unchecked(
150 self.name().clone(),
151 vec![arr.values().clone()],
152 ca.inner_dtype(),
153 )
154 };
155
156 let expected_len = elements.len();
157 let out: Series = func(elements)?;
158 polars_ensure!(
159 out.len() == expected_len,
160 ComputeError: "the function should apply element-wise, it removed elements instead"
161 );
162 let out = out.rechunk();
163 let values = out.chunks()[0].clone();
164
165 let inner_dtype = FixedSizeListArray::default_datatype(values.dtype().clone(), ca.width());
166 let arr = FixedSizeListArray::new(inner_dtype, arr.len(), values, arr.validity().cloned());
167
168 Ok(unsafe {
169 ArrayChunked::from_chunks_and_dtype_unchecked(
170 self.name().clone(),
171 vec![arr.into_boxed()],
172 DataType::Array(Box::new(out.dtype().clone()), self.width()),
173 )
174 })
175 }
176
177 /// Recurse nested types until we are at the leaf array.
178 pub fn get_leaf_array(&self) -> Series {
179 let mut current = self.get_inner();
180 while let Some(child_array) = current.try_array() {
181 current = child_array.get_inner();
182 }
183 current
184 }
185}