ENH: unit of measurement / physical quantities · Issue #10349 · pandas-dev/pandas (original) (raw)

quantities related
xref #2494
xref #1071

custom meta-data
xref #2485

It would be very convenient if unit support could be integrated into pandas.
Idea: pandas checks for the presence of a unit-attribute of columns and - if present - uses it

For my example I use the module pint and add an attribute 'unit' to columns (and a 'title'...).

Example:

from pandas import DataFrame as DF from pint import UnitRegistry units = UnitRegistry()

class ColumnDescription(): '''Column description with additional attributes.

The idea is to use this description to be able to add unit and title
attributes to a column description in one step.

A list of ColumnDescriptions is than used as argument to DataFrame()
with unit support.
'''

def __init__(self, name, data, title = None, unit = None):
    '''
    Args:
        name (str): Name of the column..
        data (list): List of the column data.
        title (str): Title of the column. Defaults to None.
        unit (str): Unit of the column (see documentation of module pint).
            Defaults to None.

    '''

    self.data = data 
    '''(list): List of the column data.'''

    self.name = name
    '''(str): Name of the column, naming convention similar to python variables.

    Used to access the column with pandas syntax, e.g. df['column'] or df.column.
    '''

    self.title = title 
    '''(str): Title of the column. 

    More human readable than the 'name'. E.g.:
    Title: 'This is a column title'.
    name: 'column_title'.
    '''

    self.unit = unit
    '''Unit of the column (see module pint).

    Intended to be used in calculations involving different columns.
    '''

class DataFrame(DF): '''Data Frame with support for ColumnDescriptions (e.g. unit support).

1. See documentation of pandas.DataFrame.
2. When used with ColumnDescriptions supports additional column attributes
like title and unit.
'''

def __init__(self, data, title = None):
    '''
    Args:
        data (list or dict):
            1. Dict, as in documentation of DataFrame
            2. List of the column data (of type ColumnDescription).
        title (str): Title of the data frame. Defaults to None.
    '''

    if isinstance(data, list):
        if isinstance(data[0], ColumnDescription):
            d = {}

            for column in data:
                d[column.name] = column.data

            super(DataFrame, self).__init__(d)

            for column in data:
                self[column.name].title = column.title
                self[column.name].unit = column.unit

            self.title = title

    else:
        super(DataFrame, self).__init__(data)

if name == 'main':

data = [ ColumnDescription('length',
                           [1, 10],
                           title = 'Length in meter',
                           unit = 'meter'),
         ColumnDescription('time',
                           [10, 1],
                           title = 'Time in s',
                           unit = 's') ]

d = {'length':[1, 10],
     'time': [10, 1]}
df = DataFrame(d)
print 'standard df'
print df

df = DataFrame(data)
print '\n' + 'new df'
print df

####use of dimensions####
# pint works with numpy arrays
# df[name] is currently not working with pint, but would be I think 
# it would be a real enhancement if it would...
test = df.as_matrix(['length']) * units(df['length'].unit) / \
       (df.as_matrix(['time']) * units(df['time'].unit))
print '\n' + 'unit test'
print test
print '\n' + 'magnitude'
print test.magnitude
print '\n' + 'dimensionality'
print test.dimensionality