Reimplement fraction string parsing without slowish regular expressions. · scoder/quicktions@cc034e0 (original) (raw)

`@@ -91,21 +91,6 @@ except AttributeError: # pre Py3.2

`

91

91

` _PyHASH_INF = hash(float('+inf'))

`

92

92

``

93

93

``

94

``

`-

cdef object _parse_rational = re.compile(r"""

`

95

``

`-

\A\s* # optional whitespace at the start, then

`

96

``

`-

(?P[-+]?) # an optional sign, then

`

97

``

`-

(?=\d|.\d) # lookahead for digit or .digit

`

98

``

`-

(?P\d*) # numerator (possibly empty)

`

99

``

`-

(?: # followed by

`

100

``

`-

(?:/(?P\d+))? # an optional denominator

`

101

``

`-

| # or

`

102

``

`-

(?:.(?P\d*))? # an optional fractional part

`

103

``

`-

(?:E(?P[-+]?\d+))? # and optional exponent

`

104

``

`-

)

`

105

``

`-

\s*\Z # and optional whitespace to finish

`

106

``

`-

""", re.VERBOSE | re.IGNORECASE).match

`

107

``

-

108

``

-

109

94

`cdef class Fraction:

`

110

95

`"""A Rational number.

`

111

96

``

`@@ -161,34 +146,12 @@ cdef class Fraction:

`

161

146

`self._denominator = (numerator)._denominator

`

162

147

`return

`

163

148

``

164

``

`-

elif isinstance(numerator, basestring):

`

165

``

`-

Handle construction from strings.

`

166

``

`-

m = _parse_rational(numerator)

`

167

``

`-

if m is None:

`

168

``

`-

raise ValueError('Invalid literal for Fraction: %r' %

`

169

``

`-

numerator)

`

170

``

`-

group = m.group

`

171

``

`-

numerator = int(group('num') or 0)

`

172

``

`-

denom = group('denom')

`

173

``

`-

if denom:

`

174

``

`-

denominator = int(denom)

`

175

``

`-

else:

`

176

``

`-

decimal = group('decimal')

`

177

``

`-

if decimal:

`

178

``

`-

scale = 10 ** len(decimal)

`

179

``

`-

numerator = numerator * scale + int(decimal)

`

180

``

`-

denominator = scale

`

181

``

`-

else:

`

182

``

`-

denominator = 1

`

183

``

`-

exp = group('exp')

`

184

``

`-

if exp:

`

185

``

`-

exp = int(exp)

`

186

``

`-

if exp >= 0:

`

187

``

`-

numerator *= 10**exp

`

188

``

`-

else:

`

189

``

`-

denominator = 10*-exp

`

190

``

`-

if group('sign') == '-':

`

191

``

`-

numerator = -numerator

`

``

149

`+

elif isinstance(numerator, unicode):

`

``

150

`+

numerator, denominator = _parse_fraction(numerator)

`

``

151

`+

fall through to normalisation below

`

``

152

+

``

153

`+

elif PY_MAJOR_VERSION < 3 and isinstance(numerator, bytes):

`

``

154

`+

numerator, denominator = _parse_fraction(numerator)

`

192

155

`# fall through to normalisation below

`

193

156

``

194

157

`elif isinstance(numerator, (Fraction, Rational)):

`

`@@ -234,7 +197,7 @@ cdef class Fraction:

`

234

197

`"Rational instances")

`

235

198

``

236

199

`if denominator == 0:

`

237

``

`-

raise ZeroDivisionError('Fraction(%s, 0)' % numerator)

`

``

200

`+

raise ZeroDivisionError(f'Fraction({numerator}, 0)')

`

238

201

`if _normalize:

`

239

202

` g = _gcd(numerator, denominator)

`

240

203

`# NOTE: 'is' tests on integers are generally a bad idea, but

`

`@@ -268,11 +231,10 @@ cdef class Fraction:

`

268

231

`if isinstance(f, numbers.Integral):

`

269

232

`return cls(f)

`

270

233

`elif not isinstance(f, float):

`

271

``

`-

raise TypeError("%s.from_float() only takes floats, not %r (%s)" %

`

272

``

`-

(cls.name, f, type(f).name))

`

``

234

`+

raise TypeError(f"{cls.name}.from_float() only takes floats, not {f!r} ({type(f).name})")

`

273

235

`if math.isinf(f):

`

274

``

`-

raise OverflowError("Cannot convert %r to %s." % (f, cls.name))

`

275

``

`-

raise ValueError("Cannot convert %r to %s." % (f, cls.name))

`

``

236

`+

raise OverflowError(f"Cannot convert {f!r} to {cls.name}.")

`

``

237

`+

raise ValueError(f"Cannot convert {f!r} to {cls.name}.")

`

276

238

``

277

239

`@classmethod

`

278

240

`def from_decimal(cls, dec):

`

`@@ -281,13 +243,11 @@ cdef class Fraction:

`

281

243

` dec = Decimal(int(dec))

`

282

244

`elif not isinstance(dec, Decimal):

`

283

245

`raise TypeError(

`

284

``

`-

"%s.from_decimal() only takes Decimals, not %r (%s)" %

`

285

``

`-

(cls.name, dec, type(dec).name))

`

``

246

`+

f"{cls.name}.from_decimal() only takes Decimals, not {dec!r} ({type(dec).name})")

`

286

247

`if dec.is_infinite():

`

287

``

`-

raise OverflowError(

`

288

``

`-

"Cannot convert %s to %s." % (dec, cls.name))

`

``

248

`+

raise OverflowError(f"Cannot convert {dec} to {cls.name}.")

`

289

249

`if dec.is_nan():

`

290

``

`-

raise ValueError("Cannot convert %s to %s." % (dec, cls.name))

`

``

250

`+

raise ValueError(f"Cannot convert {dec} to {cls.name}.")

`

291

251

` sign, digits, exp = dec.as_tuple()

`

292

252

` digits = int(''.join(map(str, digits)))

`

293

253

`if sign:

`

`@@ -683,6 +643,10 @@ cdef class Fraction:

`

683

643

`return type(self)(self._numerator, self._denominator)

`

684

644

``

685

645

``

``

646

`+

Register with Python's numerical tower.

`

``

647

`+

Rational.register(Fraction)

`

``

648

+

``

649

+

686

650

`cdef _pow(an, ad, bn, bd):

`

687

651

`if bd == 1:

`

688

652

`if bn >= 0:

`

`@@ -842,4 +806,101 @@ cdef reverse(a, b, math_func monomorphic_operator, str pyoperator_name):

`

842

806

`return NotImplemented

`

843

807

``

844

808

``

845

``

`-

Rational.register(Fraction)

`

``

809

`+

ctypedef fused AnyString:

`

``

810

`+

bytes

`

``

811

`+

unicode

`

``

812

+

``

813

+

``

814

`+

cdef _raise_invalid_input(s):

`

``

815

`+

raise ValueError(f'Invalid literal for Fraction: {s!r}') from None

`

``

816

+

``

817

+

``

818

`+

cdef tuple _parse_fraction(AnyString s):

`

``

819

`+

cdef size_t i, dec_pos = 0, exp_pos = 0, dec_len = 0

`

``

820

`+

cdef Py_UCS4 c

`

``

821

`+

cdef AnyString numerator, denominator

`

``

822

+

``

823

`+

allow_sign = True

`

``

824

`+

require_separator = False

`

``

825

`+

number_seen = False

`

``

826

`+

has_underscores = False

`

``

827

`+

for i, c in enumerate(s):

`

``

828

`+

if c == u'/':

`

``

829

`+

if not number_seen or dec_pos > 0 or exp_pos > 0:

`

``

830

`+

_raise_invalid_input(s)

`

``

831

`+

try:

`

``

832

`+

return int(s[:i].replace('', '') if has_underscores else s[:i]), int(s[i+1:].replace('', ''))

`

``

833

`+

except ValueError:

`

``

834

`+

_raise_invalid_input(s)

`

``

835

`+

elif c == u'.':

`

``

836

`+

if dec_pos > 0 or exp_pos > 0:

`

``

837

`+

_raise_invalid_input(s)

`

``

838

`+

require_separator = False

`

``

839

`+

number_seen = False

`

``

840

`+

allow_sign = False

`

``

841

`+

dec_pos = i

`

``

842

`+

elif c in u'eE':

`

``

843

`+

if exp_pos > 0 or i == 0:

`

``

844

`+

_raise_invalid_input(s)

`

``

845

`+

require_separator = False

`

``

846

`+

number_seen = False

`

``

847

`+

allow_sign = True

`

``

848

`+

exp_pos = i

`

``

849

`+

elif c in u'0123456789':

`

``

850

`+

if require_separator:

`

``

851

`+

_raise_invalid_input(s)

`

``

852

`+

number_seen = True

`

``

853

`+

require_separator = False

`

``

854

`+

allow_sign = False

`

``

855

`+

if dec_pos > 0 and exp_pos == 0:

`

``

856

`+

dec_len += 1

`

``

857

`+

elif c in u'+-':

`

``

858

`+

if require_separator:

`

``

859

`+

_raise_invalid_input(s)

`

``

860

`+

require_separator = False

`

``

861

`+

if not allow_sign:

`

``

862

`+

_raise_invalid_input(s)

`

``

863

`+

allow_sign = False

`

``

864

`+

elif c == u'_':

`

``

865

`+

if not number_seen or require_separator:

`

``

866

`+

_raise_invalid_input(s)

`

``

867

`+

has_underscores = True

`

``

868

`+

else:

`

``

869

`+

if c.isspace():

`

``

870

`+

if number_seen or not allow_sign:

`

``

871

`+

require_separator = True

`

``

872

`+

else:

`

``

873

`+

_raise_invalid_input(s)

`

``

874

+

``

875

`+

if exp_pos > 0:

`

``

876

`+

shift = int(s[exp_pos+1:])

`

``

877

`+

numerator = s[:exp_pos]

`

``

878

`+

else:

`

``

879

`+

shift = 0

`

``

880

`+

numerator = s

`

``

881

+

``

882

`+

neg = False

`

``

883

`+

if dec_pos > 0:

`

``

884

`+

numerator, decimal = numerator[:dec_pos], numerator[dec_pos+1:]

`

``

885

`+

numerator = numerator.strip()

`

``

886

`+

decimal = decimal.strip()

`

``

887

`+

shift -= dec_len

`

``

888

`+

numerator += decimal

`

``

889

+

``

890

`+

if has_underscores:

`

``

891

`+

numerator = numerator.replace('_', '')

`

``

892

+

``

893

`+

try:

`

``

894

`+

num = int(numerator)

`

``

895

`+

except ValueError:

`

``

896

`+

_raise_invalid_input(s)

`

``

897

+

``

898

`+

denom = 1

`

``

899

`+

if neg:

`

``

900

`+

num = -num

`

``

901

`+

if shift > 0:

`

``

902

`+

num *= 10 ** shift

`

``

903

`+

elif shift < 0:

`

``

904

`+

denom = 10 ** -shift

`

``

905

+

``

906

`+

return num, denom

`