Tensors — Apache Arrow v20.0.0 (original) (raw)
Dense Tensors#
class Tensor#
Subclassed by arrow::NumericTensor< TYPE >
Public Functions
Tensor(const std::shared_ptr<DataType> &type, const std::shared_ptr<Buffer> &data, const std::vector<int64_t> &shape)#
Constructor with no dimension names or strides, data assumed to be row-major.
Tensor(const std::shared_ptr<DataType> &type, const std::shared_ptr<Buffer> &data, const std::vector<int64_t> &shape, const std::vector<int64_t> &strides)#
Constructor with non-negative strides.
Tensor(const std::shared_ptr<DataType> &type, const std::shared_ptr<Buffer> &data, const std::vector<int64_t> &shape, const std::vector<int64_t> &strides, const std::vectorstd::string\ &dim_names)#
Constructor with non-negative strides and dimension names.
int64_t size() const#
Total number of value cells in the tensor.
inline bool is_mutable() const#
Return true if the underlying data buffer is mutable.
bool is_contiguous() const#
Either row major or column major.
bool is_row_major() const#
AKA “C order”.
bool is_column_major() const#
AKA “Fortran order”.
Result<int64_t> CountNonZero() const#
Compute the number of non-zero values in the tensor.
template<typename ValueType>
inline const ValueType::c_type &Value(const std::vector<int64_t> &index) const#
Returns the value at the given index without data-type and bounds checks.
Public Static Functions
static inline Result<std::shared_ptr<Tensor>> Make(const std::shared_ptr<DataType> &type, const std::shared_ptr<Buffer> &data, const std::vector<int64_t> &shape, const std::vector<int64_t> &strides = {}, const std::vectorstd::string\ &dim_names = {})#
Create a Tensor with full parameters.
This factory function will return Status::Invalid when the parameters are inconsistent
Parameters:
- type – [in] The data type of the tensor values
- data – [in] The buffer of the tensor content
- shape – [in] The shape of the tensor
- strides – [in] The strides of the tensor (if this is empty, the data assumed to be row-major)
- dim_names – [in] The names of the tensor dimensions
static inline int64_t CalculateValueOffset(const std::vector<int64_t> &strides, const std::vector<int64_t> &index)#
Return the offset of the given index on the given strides.
template<typename TYPE>
class NumericTensor : public arrow::Tensor#
Public Functions
inline NumericTensor(const std::shared_ptr<Buffer> &data, const std::vector<int64_t> &shape, const std::vector<int64_t> &strides, const std::vectorstd::string\ &dim_names)#
Constructor with non-negative strides and dimension names.
inline NumericTensor(const std::shared_ptr<Buffer> &data, const std::vector<int64_t> &shape)#
Constructor with no dimension names or strides, data assumed to be row-major.
inline NumericTensor(const std::shared_ptr<Buffer> &data, const std::vector<int64_t> &shape, const std::vector<int64_t> &strides)#
Constructor with non-negative strides.
Public Static Functions
static inline Result<std::shared_ptr<NumericTensor<TYPE>>> Make(const std::shared_ptr<Buffer> &data, const std::vector<int64_t> &shape, const std::vector<int64_t> &strides = {}, const std::vectorstd::string\ &dim_names = {})#
Create a NumericTensor with full parameters.
This factory function will return Status::Invalid when the parameters are inconsistent
Parameters:
- data – [in] The buffer of the tensor content
- shape – [in] The shape of the tensor
- strides – [in] The strides of the tensor (if this is empty, the data assumed to be row-major)
- dim_names – [in] The names of the tensor dimensions
Sparse Tensors#
enum arrow::SparseTensorFormat::type#
EXPERIMENTAL: The index format type of SparseTensor.
Values:
enumerator COO#
Coordinate list (COO) format.
enumerator CSR#
Compressed sparse row (CSR) format.
enumerator CSC#
Compressed sparse column (CSC) format.
enumerator CSF#
Compressed sparse fiber (CSF) format.
class SparseIndex#
EXPERIMENTAL: The base class for the index of a sparse tensor.
SparseIndex describes where the non-zero elements are within a SparseTensor.
There are several ways to represent this. The format_id is used to distinguish what kind of representation is used. Each possible value of format_id must have only one corresponding concrete subclass of SparseIndex.
Subclassed by arrow::internal::SparseIndexBase< SparseCOOIndex >, arrow::internal::SparseIndexBase< SparseCSCIndex >, arrow::internal::SparseIndexBase< SparseCSFIndex >, arrow::internal::SparseIndexBase< SparseCSRIndex >, arrow::internal::SparseIndexBase< SparseIndexType >
Public Functions
inline SparseTensorFormat::type format_id() const#
Return the identifier of the format type.
virtual int64_t non_zero_length() const = 0#
Return the number of non zero values in the sparse tensor related to this sparse index.
virtual std::string ToString() const = 0#
Return the string representation of the sparse index.
class SparseCOOIndex : public arrow::internal::SparseIndexBase<SparseCOOIndex>#
EXPERIMENTAL: The index data for a COO sparse tensor.
A COO sparse index manages the location of its non-zero values by their coordinates.
Public Functions
explicit SparseCOOIndex(const std::shared_ptr<Tensor> &coords, bool is_canonical)#
Construct SparseCOOIndex from column-major NumericTensor.
inline const std::shared_ptr<Tensor> &indices() const#
Return a tensor that has the coordinates of the non-zero values.
The returned tensor is a N x D tensor where N is the number of non-zero values and D is the number of dimensions in the logical data. The column at index i
is a D-tuple of coordinates indicating that the logical value at those coordinates should be found at physical index i
.
inline virtual int64_t non_zero_length() const override#
Return the number of non zero values in the sparse tensor related to this sparse index.
inline bool is_canonical() const#
Return whether a sparse tensor index is canonical, or not.
If a sparse tensor index is canonical, it is sorted in the lexicographical order, and the corresponding sparse tensor doesn’t have duplicated entries.
virtual std::string ToString() const override#
Return a string representation of the sparse index.
inline bool Equals(const SparseCOOIndex &other) const#
Return whether the COO indices are equal.
Public Static Functions
static Result<std::shared_ptr<SparseCOOIndex>> Make(const std::shared_ptr<Tensor> &coords, bool is_canonical)#
Make SparseCOOIndex from a coords tensor and canonicality.
static Result<std::shared_ptr<SparseCOOIndex>> Make(const std::shared_ptr<Tensor> &coords)#
Make SparseCOOIndex from a coords tensor with canonicality auto-detection.
static Result<std::shared_ptr<SparseCOOIndex>> Make(const std::shared_ptr<DataType> &indices_type, const std::vector<int64_t> &indices_shape, const std::vector<int64_t> &indices_strides, std::shared_ptr<Buffer> indices_data)#
Make SparseCOOIndex from raw properties with canonicality auto-detection.
static Result<std::shared_ptr<SparseCOOIndex>> Make(const std::shared_ptr<DataType> &indices_type, const std::vector<int64_t> &indices_shape, const std::vector<int64_t> &indices_strides, std::shared_ptr<Buffer> indices_data, bool is_canonical)#
Make SparseCOOIndex from raw properties.
static Result<std::shared_ptr<SparseCOOIndex>> Make(const std::shared_ptr<DataType> &indices_type, const std::vector<int64_t> &shape, int64_t non_zero_length, std::shared_ptr<Buffer> indices_data)#
Make SparseCOOIndex from sparse tensor’s shape properties and data with canonicality auto-detection.
The indices_data should be in row-major (C-like) order. If not, use the raw properties constructor.
static Result<std::shared_ptr<SparseCOOIndex>> Make(const std::shared_ptr<DataType> &indices_type, const std::vector<int64_t> &shape, int64_t non_zero_length, std::shared_ptr<Buffer> indices_data, bool is_canonical)#
Make SparseCOOIndex from sparse tensor’s shape properties and data.
The indices_data should be in row-major (C-like) order. If not, use the raw properties constructor.
class SparseCSRIndex : public arrow::internal::SparseCSXIndex<SparseCSRIndex, internal::SparseMatrixCompressedAxis::ROW>#
EXPERIMENTAL: The index data for a CSR sparse matrix.
A CSR sparse index manages the location of its non-zero values by two vectors.
The first vector, called indptr, represents the range of the rows; the i-th row spans from indptr[i] to indptr[i+1] in the corresponding value vector. So the length of an indptr vector is the number of rows + 1.
The other vector, called indices, represents the column indices of the corresponding non-zero values. So the length of an indices vector is same as the number of non-zero-values.
class SparseTensor#
EXPERIMENTAL: The base class of sparse tensor container.
Subclassed by arrow::SparseTensorImpl< SparseIndexType >
Public Functions
inline std::shared_ptr<DataType> type() const#
Return a value type of the sparse tensor.
inline std::shared_ptr<Buffer> data() const#
Return a buffer that contains the value vector of the sparse tensor.
inline const uint8_t *raw_data() const#
Return an immutable raw data pointer.
inline uint8_t *raw_mutable_data() const#
Return a mutable raw data pointer.
inline const std::vector<int64_t> &shape() const#
Return a shape vector of the sparse tensor.
inline const std::shared_ptr<SparseIndex> &sparse_index() const#
Return a sparse index of the sparse tensor.
inline int ndim() const#
Return a number of dimensions of the sparse tensor.
inline const std::vectorstd::string\ &dim_names() const#
Return a vector of dimension names.
const std::string &dim_name(int i) const#
Return the name of the i-th dimension.
int64_t size() const#
Total number of value cells in the sparse tensor.
inline bool is_mutable() const#
Return true if the underlying data buffer is mutable.
inline int64_t non_zero_length() const#
Total number of non-zero cells in the sparse tensor.
bool Equals(const SparseTensor &other, const EqualOptions& = EqualOptions::Defaults()) const#
Return whether sparse tensors are equal.
Result<std::shared_ptr<Tensor>> ToTensor(MemoryPool *pool) const#
Return dense representation of sparse tensor as tensor.
The returned Tensor has row-major order (C-like).
template<typename SparseIndexType>
class SparseTensorImpl : public arrow::SparseTensor#
EXPERIMENTAL: Concrete sparse tensor implementation classes with sparse index type.
Public Functions
inline SparseTensorImpl(const std::shared_ptr<SparseIndexType> &sparse_index, const std::shared_ptr<DataType> &type, const std::shared_ptr<Buffer> &data, const std::vector<int64_t> &shape, const std::vectorstd::string\ &dim_names)#
Construct a sparse tensor from physical data buffer and logical index.
inline SparseTensorImpl(const std::shared_ptr<DataType> &type, const std::vector<int64_t> &shape, const std::vectorstd::string\ &dim_names = {})#
Construct an empty sparse tensor.
Public Static Functions
static inline Result<std::shared_ptr<SparseTensorImpl<SparseIndexType>>> Make(const std::shared_ptr<SparseIndexType> &sparse_index, const std::shared_ptr<DataType> &type, const std::shared_ptr<Buffer> &data, const std::vector<int64_t> &shape, const std::vectorstd::string\ &dim_names)#
Create a SparseTensor with full parameters.
static inline Result<std::shared_ptr<SparseTensorImpl<SparseIndexType>>> Make(const Tensor &tensor, const std::shared_ptr<DataType> &index_value_type, MemoryPool *pool = default_memory_pool())#
Create a sparse tensor from a dense tensor.
The dense tensor is re-encoded as a sparse index and a physical data buffer for the non-zero value.
using arrow::SparseCOOTensor = SparseTensorImpl<SparseCOOIndex>#
EXPERIMENTAL: Type alias for COO sparse tensor.
using arrow::SparseCSCMatrix = SparseTensorImpl<SparseCSCIndex>#
EXPERIMENTAL: Type alias for CSC sparse matrix.
using arrow::SparseCSFTensor = SparseTensorImpl<SparseCSFIndex>#
EXPERIMENTAL: Type alias for CSF sparse matrix.
using arrow::SparseCSRMatrix = SparseTensorImpl<SparseCSRIndex>#
EXPERIMENTAL: Type alias for CSR sparse matrix.