PERF: Parse certain dates in Cython instead of falling back to dateutil.parse by vnlitvinov · Pull Request #25922 · pandas-dev/pandas (original) (raw)

cdef inline object _parse_dateabbr_string(object date_string, object default,
object freq):
cdef:
object ret
int year, quarter = -1, month, mnum, date_len
# special handling for possibilities eg, 2Q2005, 2Q05, 2005Q1, 05Q1
assert isinstance(date_string, (str, unicode))
# len(date_string) == 0
# should be NaT???
if date_string in nat_strings:
return NaT, NaT, ''
date_string = date_string.upper()
date_len = len(date_string)
if date_len == 4:
# parse year only like 2000
try:
ret = default.replace(year=int(date_string))
return ret, ret, 'year'
except ValueError:
pass
try:
if 4 <= date_len <= 7:
i = date_string.index('Q', 1, 6)
if i == 1:
quarter = int(date_string[0])
if date_len == 4 or (date_len == 5
and date_string[i + 1] == '-'):
# r'(\d)Q-?(\d\d)')
year = 2000 + int(date_string[-2:])
elif date_len == 6 or (date_len == 7
and date_string[i + 1] == '-'):
# r'(\d)Q-?(\d\d\d\d)')
year = int(date_string[-4:])
else:
raise ValueError
elif i == 2 or i == 3:
# r'(\d\d)-?Q(\d)'
if date_len == 4 or (date_len == 5
and date_string[i - 1] == '-'):
quarter = int(date_string[-1])
year = 2000 + int(date_string[:2])
else:
raise ValueError
elif i == 4 or i == 5:
if date_len == 6 or (date_len == 7
and date_string[i - 1] == '-'):
# r'(\d\d\d\d)-?Q(\d)'
quarter = int(date_string[-1])
year = int(date_string[:4])
else:
raise ValueError
if not (1 <= quarter <= 4):
msg = ('Incorrect quarterly string is given, quarter must be '
'between 1 and 4: {dstr}')
raise DateParseError(msg.format(dstr=date_string))
if freq is not None:
# hack attack, #1228
try:
mnum = MONTH_NUMBERS[_get_rule_month(freq)] + 1
except (KeyError, ValueError):
msg = ('Unable to retrieve month information from given '
'freq: {freq}'.format(freq=freq))
raise DateParseError(msg)
month = (mnum + (quarter - 1) * 3) % 12 + 1
if month > mnum:
year -= 1
else:
month = (quarter - 1) * 3 + 1
ret = default.replace(year=year, month=month)
return ret, ret, 'quarter'
except DateParseError:
raise
except ValueError:
pass
if date_len == 6 and (freq == 'M' or
getattr(freq, 'rule_code', None) == 'M'):
year = int(date_string[:4])
month = int(date_string[4:6])
try:
ret = default.replace(year=year, month=month)
return ret, ret, 'month'
except ValueError:
pass
for pat in ['%Y-%m', '%m-%Y', '%b %Y', '%b-%Y']:
try:
ret = datetime.strptime(date_string, pat)
return ret, ret, 'month'
except ValueError:
pass
raise ValueError('Unable to parse {0}'.format(date_string))