Implement floating point conversion with ryu by 0xlwu · Pull Request #365 · haskell/bytestring (original) (raw)

@@ -0,0 +1,272 @@

-- |

-- Module : Data.ByteString.Builder.RealFloat

-- Copyright : (c) Lawrence Wu 2021

-- License : BSD-style

-- Maintainer : lawrencejwu@gmail.com

-- Floating point formatting for @Bytestring.Builder@

-- This module primarily exposes `floatDec` and `doubleDec` which do the

-- equivalent of converting through @'Data.ByteString.Builder.string7' . 'show'@.

-- It also exposes `formatFloat` and `formatDouble` with a similar API as

-- `GHC.Float.formatRealFloat`.

-- NB: The float-to-string conversions exposed by this module match `show`'s

-- output (specifically with respect to default rounding and length). In

-- particular, there are boundary cases where the closest and \'shortest\'

-- string representations are not used. Mentions of \'shortest\' in the docs

-- below are with this caveat.

-- For example, for fidelity, we match `show` on the output below.

-- >>> show (1.0e23 :: Float)

-- "1.0e23"

-- >>> show (1.0e23 :: Double)

-- "9.999999999999999e22"

-- >>> floatDec 1.0e23

-- "1.0e23"

-- >>> doubleDec 1.0e23

-- "9.999999999999999e22"

-- Simplifying, we can build a shorter, lossless representation by just using

-- @"1.0e23"@ since the floating point values that are 1 ULP away are

-- >>> showHex (castDoubleToWord64 1.0e23) []

-- "44b52d02c7e14af6"

-- >>> castWord64ToDouble 0x44b52d02c7e14af5

-- 9.999999999999997e22

-- >>> castWord64ToDouble 0x44b52d02c7e14af6

-- 9.999999999999999e22

-- >>> castWord64ToDouble 0x44b52d02c7e14af7

-- 1.0000000000000001e23

-- In particular, we could use the exact boundary if it is the shortest

-- representation and the original floating number is even. To experiment with

-- the shorter rounding, refer to

-- `Data.ByteString.Builder.RealFloat.Internal.acceptBounds`. This will give us

-- >>> floatDec 1.0e23

-- "1.0e23"

-- >>> doubleDec 1.0e23

-- "1.0e23"

-- For more details, please refer to the

-- <https://dl.acm.org/doi/10.1145/3192366.3192369 Ryu paper>.

module Data.ByteString.Builder.RealFloat

( floatDec

, doubleDec

-- * Custom formatting

, formatFloat

, formatDouble

, FloatFormat

, standard

, standardDefaultPrecision

, scientific

, generic

) where

import Data.ByteString.Builder.Internal (Builder)

import qualified Data.ByteString.Builder.RealFloat.Internal as R

import qualified Data.ByteString.Builder.RealFloat.F2S as RF

import qualified Data.ByteString.Builder.RealFloat.D2S as RD

import qualified Data.ByteString.Builder.Prim as BP

import GHC.Float (roundTo)

import GHC.Word (Word64)

import GHC.Show (intToDigit)

-- | Returns a rendered Float. Matches `show` in displaying in standard or

-- scientific notation

-- @

-- floatDec = 'formatFloat' 'generic'

-- @

{-# INLINABLE floatDec #-}

floatDec :: Float -> Builder

floatDec = formatFloat generic

-- | Returns a rendered Double. Matches `show` in displaying in standard or

-- scientific notation

-- @

-- doubleDec = 'formatDouble' 'generic'

-- @

{-# INLINABLE doubleDec #-}

doubleDec :: Double -> Builder

doubleDec = formatDouble generic

-- | Format type for use with `formatFloat` and `formatDouble`.

data FloatFormat = MkFloatFormat FormatMode (Maybe Int)

-- | Standard notation with `n` decimal places

standard :: Int -> FloatFormat

standard n = MkFloatFormat FStandard (Just n)

-- | Standard notation with the \'default precision\' (decimal places matching `show`)

standardDefaultPrecision :: FloatFormat

standardDefaultPrecision = MkFloatFormat FStandard Nothing

-- | Scientific notation with \'default precision\' (decimal places matching `show`)

scientific :: FloatFormat

scientific = MkFloatFormat FScientific Nothing

-- | Standard or scientific notation depending on the exponent. Matches `show`

generic :: FloatFormat

generic = MkFloatFormat FGeneric Nothing

-- | ByteString float-to-string format

data FormatMode

= FScientific -- ^ scientific notation

| FStandard -- ^ standard notation with `Maybe Int` digits after the decimal

| FGeneric -- ^ dispatches to scientific or standard notation based on the exponent

deriving Show

-- TODO: support precision argument for FGeneric and FScientific

-- | Returns a rendered Float. Returns the \'shortest\' representation in

-- scientific notation and takes an optional precision argument in standard

-- notation. Also see `floatDec`.

-- With standard notation, the precision argument is used to truncate (or

-- extend with 0s) the \'shortest\' rendered Float. The \'default precision\' does

-- no such modifications and will return as many decimal places as the

-- representation demands.

-- e.g

-- >>> formatFloat (standard 1) 1.2345e-2

-- "0.0"

-- >>> formatFloat (standard 10) 1.2345e-2

-- "0.0123450000"

-- >>> formatFloat standardDefaultPrecision 1.2345e-2

-- "0.01234"

-- >>> formatFloat scientific 12.345

-- "1.2345e1"

-- >>> formatFloat generic 12.345

-- "12.345"

{-# INLINABLE formatFloat #-}

formatFloat :: FloatFormat -> Float -> Builder

formatFloat (MkFloatFormat fmt prec) = \f ->

let (RF.FloatingDecimal m e) = RF.f2Intermediate f

e' = R.int32ToInt e + R.decimalLength9 m in

case fmt of

FGeneric ->

case specialStr f of

Just b -> b

Nothing ->

if e' >= 0 && e' <= 7

then sign f `mappend` showStandard (R.word32ToWord64 m) e' prec

else BP.primBounded (R.toCharsScientific (f < 0) m e) ()

FScientific -> RF.f2s f

FStandard ->

case specialStr f of

Just b -> b

Nothing -> sign f `mappend` showStandard (R.word32ToWord64 m) e' prec

-- TODO: support precision argument for FGeneric and FScientific

-- | Returns a rendered Double. Returns the \'shortest\' representation in

-- scientific notation and takes an optional precision argument in standard

-- notation. Also see `doubleDec`.

-- With standard notation, the precision argument is used to truncate (or

-- extend with 0s) the \'shortest\' rendered Float. The \'default precision\'

-- does no such modifications and will return as many decimal places as the

-- representation demands.

-- e.g

-- >>> formatDouble (standard 1) 1.2345e-2

-- "0.0"

-- >>> formatDouble (standard 10) 1.2345e-2

-- "0.0123450000"

-- >>> formatDouble standardDefaultPrecision 1.2345e-2

-- "0.01234"

-- >>> formatDouble scientific 12.345

-- "1.2345e1"

-- >>> formatDouble generic 12.345

-- "12.345"

{-# INLINABLE formatDouble #-}

formatDouble :: FloatFormat -> Double -> Builder

formatDouble (MkFloatFormat fmt prec) = \f ->

let (RD.FloatingDecimal m e) = RD.d2Intermediate f

e' = R.int32ToInt e + R.decimalLength17 m in

case fmt of

FGeneric ->

case specialStr f of

Just b -> b

Nothing ->

if e' >= 0 && e' <= 7

then sign f `mappend` showStandard m e' prec

else BP.primBounded (R.toCharsScientific (f < 0) m e) ()

FScientific -> RD.d2s f

FStandard ->

case specialStr f of

Just b -> b

Nothing -> sign f `mappend` showStandard m e' prec

-- | Char7 encode a 'Char'.

{-# INLINE char7 #-}

char7 :: Char -> Builder

char7 = BP.primFixed BP.char7

-- | Char7 encode a 'String'.

{-# INLINE string7 #-}

string7 :: String -> Builder

string7 = BP.primMapListFixed BP.char7

-- | Encodes a `-` if input is negative

sign :: RealFloat a => a -> Builder

sign f = if f < 0 then char7 '-' else mempty

-- | Special rendering for Nan, Infinity, and 0. See

-- RealFloat.Internal.NonNumbersAndZero

specialStr :: RealFloat a => a -> Maybe Builder

specialStr f

| isNaN f = Just $ string7 "NaN"

| isInfinite f = Just $ sign f `mappend` string7 "Infinity"

| isNegativeZero f = Just $ string7 "-0.0"

| f == 0 = Just $ string7 "0.0"

| otherwise = Nothing

-- | Returns a list of decimal digits in a Word64

digits :: Word64 -> [Int]

digits w = go [] w

where go ds 0 = ds

go ds c = let (q, r) = R.dquotRem10 c

in go ((R.word64ToInt r) : ds) q

-- | Show a floating point value in standard notation. Based on GHC.Float.showFloat

showStandard :: Word64 -> Int -> Maybe Int -> Builder

showStandard m e prec =

case prec of

Nothing

| e <= 0 -> char7 '0'

`mappend` char7 '.'

`mappend` string7 (replicate (-e) '0')

`mappend` mconcat (digitsToBuilder ds)

| otherwise ->

let f 0 s rs = mk0 (reverse s) `mappend` char7 '.' `mappend` mk0 rs

f n s [] = f (n-1) (char7 '0':s) []

f n s (r:rs) = f (n-1) (r:s) rs

in f e [] (digitsToBuilder ds)

Just p

| e >= 0 ->

let (ei, is') = roundTo 10 (p' + e) ds

(ls, rs) = splitAt (e + ei) (digitsToBuilder is')

in mk0 ls `mappend` mkDot rs

| otherwise ->

let (ei, is') = roundTo 10 p' (replicate (-e) 0 ++ ds)

-- ds' should always be non-empty but use redundant pattern

-- matching to silence warning

ds' = if ei > 0 then is' else 0:is'

(ls, rs) = splitAt 1 $ digitsToBuilder ds'

in mk0 ls `mappend` mkDot rs

where p' = max p 0

where

mk0 ls = case ls of [] -> char7 '0'; _ -> mconcat ls

mkDot rs = if null rs then mempty else char7 '.' `mappend` mconcat rs

ds = digits m

digitsToBuilder = fmap (char7 . intToDigit)