(original) (raw)

/* Copyright 2005 Paul Zimmermann. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. This program implements ideas from the paper "Gal's Accurate Tables Method Revisited", by Damien Stehle' and Paul Zimmermann, to appear in the Proceedings of Arith'17. A preliminary version appeared as INRIA Research Report RR-5359, Oct. 2004,. Some results on a 3Ghz Pentium 4 with gcc 3.3.5, for random inputs in [1/2, 1[ (the my_exp2() routine only deals with inputs in that range): - the exp2() routine from the libm takes 890ms for 10^5 tests - the my_exp2() routine takes 461ms for 10^5 tests - exp2() gives 563 incorrect roundings out of 10^7 random tests - my_exp2() gives 4 incorrect roundings out of 10^7 random tests */ #include #include #include #include #include #include "gmp.h" #include "mpfr.h" #ifndef exp2 double exp2 (double); #endif #ifndef exp double exp (double); #endif #define PRINT /* #define TIMING */ /* all double-precision floating-point constants are printed with 17 digits in rounding to nearest, which is enough to read them back exactly, provided the input is done with correct rounding */ /* the following is a table of triples [x0,y0,z0] such that (a) x0 is near from 1/2 + (i+1/2) / 2^(K+1) (b) y0 is near from 2^x0 [at least 2K+4 extra bits after the 1st 53] (c) z0 is near from log(2)*2^x0 [at least K+2 extra bits] */ #define k 7 #define K (1 << k) static const double T[K][3] = { {5.0195312982987272e-1, 1.4161294303586671, 9.8158612196107153e-1}, /* 0 */ {5.0585938195020108e-1, 1.4199689488379839, 9.8424747336971774e-1}, /* 1 */ {5.0976564058148044e-1, 1.4238188837393786, 9.8691604489195883e-1}, /* 2 */ {5.1367188059305502e-1, 1.4276792384704040, 9.8959183888973035e-1}, /* 3 */ {5.1757812643578738e-1, 1.4315500654442086, 9.9227489169305838e-1}, /* 4 */ {5.2148437558012628e-1, 1.4354313905684102, 9.9496522125973519e-1}, /* 5 */ {5.2539062474289833e-1, 1.4393232390486645, 9.9766284506099001e-1}, /* 6 */ {5.2929687526280156e-1, 1.4432256407559683, 1.0003677838018199}, /* 7 */ {5.3320314655301615e-1, 1.4471386437833522, 1.0030800708177736}, /* 8 */ {5.3710937726521302e-1, 1.4510622152827595, 1.0057996833403131}, /* 9 */ {5.4101562088066379e-1, 1.4549964376243589, 1.0085266784640887}, /* 10 */ {5.4492188688577792e-1, 1.4589413493491723, 1.0112610829037010}, /* 11 */ {5.4882813681397058e-1, 1.4628969405565586, 1.0140028897965485}, /* 12 */ {5.5273436890685923e-1, 1.4668632383254137, 1.0167521179122916}, /* 13 */ {5.5664063195364044e-1, 1.4708403213268531, 1.0195088217815922}, /* 14 */ {5.6054686741609894e-1, 1.4748281591315624, 1.0222729803124568}, /* 15 */ {5.6445313240746819e-1, 1.4788268392895747, 1.0250446541899441}, /* 16 */ {5.6835937115189805e-1, 1.4828363340337671, 1.0278238241673510}, /* 17 */ {5.7226561378636920e-1, 1.4868567035987750, 1.0306105319961452}, /* 18 */ {5.7617189976140615e-1, 1.4908880182592450, 1.0334048263869999}, /* 19 */ {5.8007814026553550e-1, 1.4949302159061126, 1.0362066642891923}, /* 20 */ {5.8398437931648228e-1, 1.4989833715260501, 1.0390160976795226}, /* 21 */ {5.8789062042457874e-1, 1.5030475184780572, 1.0418331496806876}, /* 22 */ {5.9179687230403422e-1, 1.5071226956775634, 1.0446578472668075}, /* 23 */ {5.9570312875422404e-1, 1.5112089265964586, 1.0474902067073566}, /* 24 */ {5.9960938162586896e-1, 1.5153062326709583, 1.0503302428607872}, /* 25 */ {6.0351562680585491e-1, 1.5194146395766828, 1.0531779735240832}, /* 26 */ {6.0742186764930295e-1, 1.5235341809102545, 1.0560334219846486}, /* 27 */ {6.1132811697228095e-1, 1.5276649004061909, 1.0588966185569408}, /* 28 */ {6.1523435524726677e-1, 1.5318068076864924, 1.0617675699104225}, /* 29 */ {6.1914061602767589e-1, 1.5359599687432259, 1.0646463217873088}, /* 30 */ {6.2304688358716653e-1, 1.5401243974299228, 1.0675328837901357}, /* 31 */ {6.2695312790475566e-1, 1.5443000922002752, 1.0704272548470843}, /* 32 */ {6.3085937493353206e-1, 1.5484871113211813, 1.0733294753456910}, /* 33 */ {6.3476563826064425e-1, 1.5526855001348321, 1.0762395767147674}, /* 34 */ {6.3867187342967380e-1, 1.5568952415872050, 1.0791575471333661}, /* 35 */ {6.4257812458759778e-1, 1.5611164140644698, 1.0820834409346394}, /* 36 */ {6.4648437804758396e-1, 1.5653490338025291, 1.0850172693724576}, /* 37 */ {6.5039063074073800e-1, 1.5695931285127778, 1.0879590516548958}, /* 38 */ {6.5429688932002605e-1, 1.5738487365603047, 1.0909088143746073}, /* 39 */ {6.5820313505256411e-1, 1.5781158687044994, 1.0938665649894326}, /* 40 */ {6.6210937541756243e-1, 1.5823945643176651, 1.0968323307901726}, /* 41 */ {6.6601561777914653e-1, 1.5866848628186989, 1.0998061390999248}, /* 42 */ {6.6992187694754723e-1, 1.5909868120105957, 1.1027880230532001}, /* 43 */ {6.7382812896950017e-1, 1.5953004170950322, 1.1057779862555264}, /* 44 */ {6.7773437160024752e-1, 1.5996257071426287, 1.1087760488571219}, /* 45 */ {6.8164063410298359e-1, 1.6039627463123871, 1.1117822553296179}, /* 46 */ {6.8554688360492788e-1, 1.6083115299326873, 1.1147966024348943}, /* 47 */ {6.8945313698883326e-1, 1.6126721086393410, 1.1178191252710210}, /* 48 */ {6.9335937630703059e-1, 1.6170444943189555, 1.1208498320771665}, /* 49 */ {6.9726564012935399e-1, 1.6214287622462238, 1.1238887750297719}, /* 50 */ {7.0117186039038126e-1, 1.6258248680810656, 1.1269359233946357}, /* 51 */ {7.0507812417939619e-1, 1.6302329420633603, 1.1299913674471629}, /* 52 */ {7.0898437383941537e-1, 1.6346529515799779, 1.1330550845816545}, /* 53 */ {7.1289062445131701e-1, 1.6390849460385346, 1.1361271090448604}, /* 54 */ {7.1679687510540135e-1, 1.6435289569030291, 1.1392074626459625}, /* 55 */ {7.2070312945826898e-1, 1.6479850209304296, 1.1422961708629498}, /* 56 */ {7.2460937976839956e-1, 1.6524531619560072, 1.1453932502171731}, /* 57 */ {7.2851562413746018e-1, 1.6569334105185201, 1.1484987218764866}, /* 58 */ {7.3242187413076687e-1, 1.6614258127265977, 1.1516126178009569}, /* 59 */ {7.3632810373124735e-1, 1.6659303715243676, 1.1547349400312976}, /* 60 */ {7.4023437018487348e-1, 1.6704471860271464, 1.1578657572690110}, /* 61 */ {7.4414062189686525e-1, 1.6749762297929338, 1.1610050511858991}, /* 62 */ {7.4804687395536917e-1, 1.6795175534508668, 1.1641528568754056}, /* 63 */ {7.5195312416764937e-1, 1.6840711877377119, 1.1673091956426334}, /* 64 */ {7.5585939180250450e-1, 1.6886371885729312, 1.1704741062480000}, /* 65 */ {7.5976561710368595e-1, 1.6932155194634446, 1.1736475633964298}, /* 66 */ {7.6367187573732975e-1, 1.6978063026163095, 1.1768296517954002}, /* 67 */ {7.6757815140508656e-1, 1.7024095527731455, 1.1800203816630233}, /* 68 */ {7.7148436801329123e-1, 1.7070252138089748, 1.1832197140964287}, /* 69 */ {7.7539062444615336e-1, 1.7116534363133677, 1.1864277534763528}, /* 70 */ {7.7929686215324878e-1, 1.7162941849456514, 1.1896444753065076}, /* 71 */ {7.8320308186186749e-1, 1.7209474944144247, 1.1928699036450607}, /* 72 */ {7.8710937468999342e-1, 1.7256135076456369, 1.1961041375607309}, /* 73 */ {7.9101561924313635e-1, 1.7302921139625367, 1.1993471003382399}, /* 74 */ {7.9492188383620022e-1, 1.7349834293502875, 1.2025988723723768}, /* 75 */ {7.9882812192119268e-1, 1.7396874322669753, 1.2058594387314248}, /* 76 */ {8.0273437419830629e-1, 1.7444042061505554, 1.2091288572501671}, /* 77 */ {8.0664062037636852e-1, 1.7491337611184150, 1.2124071349414423}, /* 78 */ {8.1054687102422707e-1, 1.7538761446320001, 1.2156943047030178}, /* 79 */ {8.1445312242609791e-1, 1.7586313869729533, 1.2189903875245287}, /* 80 */ {8.1835937740874498e-1, 1.7633995264652227, 1.2222954099701118}, /* 81 */ {8.2226562294610139e-1, 1.7681805821354089, 1.2256093852280014}, /* 82 */ {8.2617187621897492e-1, 1.7729746100541453, 1.2289323521633992}, /* 83 */ {8.3007813569528055e-1, 1.7777816435563409, 1.2322643338823034}, /* 84 */ {8.3398437560332872e-1, 1.7826016861015537, 1.2356053327826966}, /* 85 */ {8.3789062395278824e-1, 1.7874348075325837, 1.2389553972759189}, /* 86 */ {8.4179686309458090e-1, 1.7922810214408014, 1.2423145367827904}, /* 87 */ {8.4570312517467849e-1, 1.7971404033051550, 1.2456828036213312}, /* 88 */ {8.4960936369710249e-1, 1.8020129309061639, 1.2490601823901710}, /* 89 */ {8.5351562410389570e-1, 1.8068986966398262, 1.2524467371333354}, /* 90 */ {8.5742186686543831e-1, 1.8117976869039070, 1.2558424584224737}, /* 91 */ {8.6132813203194980e-1, 1.8167099878668143, 1.2592474059849748}, /* 92 */ {8.6523437938859582e-1, 1.8216355849927097, 1.2626615697453634}, /* 93 */ {8.6914062475239118e-1, 1.8265745342350324, 1.2660849884876080}, /* 94 */ {8.7304687459465757e-1, 1.8315268799967934, 1.2695176929895307}, /* 95 */ {8.7695310083582012e-1, 1.8364926228862615, 1.2729596836727510}, /* 96 */ {8.8085936842555135e-1, 1.8414718819663010, 1.2764110430653579}, /* 97 */ {8.8476562320839292e-1, 1.8464646248569494, 1.2798717487232716}, /* 98 */ {8.8867187163711081e-1, 1.8514708963089135, 1.2833418316653182}, /* 99 */ {8.9257812485684362e-1, 1.8564907472994727, 1.2868213272262554}, /* 100 */ {8.9648438686032261e-1, 1.8615242198317696, 1.2903102645204430}, /* 101 */ {9.0039060271336668e-1, 1.8665712798247207, 1.2938086199246739}, /* 102 */ {9.0429687871125797e-1, 1.8716321016926274, 1.2973165143337295}, /* 103 */ {9.0820312889988164e-1, 1.8767066113584727, 1.3008338964013344}, /* 104 */ {9.1210937073275289e-1, 1.8817948685175163, 1.3043608075050894}, /* 105 */ {9.1601562859765051e-1, 1.8868969422806052, 1.3078972955489834}, /* 106 */ {9.1992187031534545e-1, 1.8920128280207400, 1.3114433573258246}, /* 107 */ {9.2382811521984576e-1, 1.8971425884954385, 1.3149990363358097}, /* 108 */ {9.2773436820715161e-1, 1.9022862678012578, 1.3185643631443429}, /* 109 */ {9.3164061480582405e-1, 1.9074438846004611, 1.3221393506871193}, /* 110 */ {9.3554687359522204e-1, 1.9126155012667558, 1.3257240421982983}, /* 111 */ {9.3945311523707953e-1, 1.9178011168453379, 1.3293184370160602}, /* 112 */ {9.4335937254783087e-1, 1.9230008129093568, 1.3329225916826035}, /* 113 */ {9.4726560511565827e-1, 1.9282145737361820, 1.3365364952998313}, /* 114 */ {9.5117186956351107e-1, 1.9334425131638273, 1.3401602267742418}, /* 115 */ {9.5507809712702174e-1, 1.9386845774607548, 1.3437937488619711}, /* 116 */ {9.5898437604225761e-1, 1.9439409235472997, 1.3474371703319070}, /* 117 */ {9.6289062278161530e-1, 1.9492114776669585, 1.3510904400599371}, /* 118 */ {9.6679687278456994e-1, 1.9544963261179280, 1.3547536178634132}, /* 119 */ {9.7070311424426314e-1, 1.9597954916412630, 1.3584267195052333}, /* 120 */ {9.7460936434692447e-1, 1.9651090364010853, 1.3621097880742832}, /* 121 */ {9.7851562538928960e-1, 1.9704370025841007, 1.3658028528121591}, /* 122 */ {9.8242186662199560e-1, 1.9757793872603417, 1.3695059116879622}, /* 123 */ {9.8632812535636094e-1, 1.9811362806119217, 1.3732190272111702}, /* 124 */ {9.9023437610188525e-1, 1.9865076870071396, 1.3769422024096571}, /* 125 */ {9.9414063275542497e-1, 1.9918936649223367, 1.3806754778161341}, /* 126 */ {9.9804688968285804e-1, 1.9972942461091661, 1.3844188754391700} /* 127 */ }; #ifdef LITTLE_ENDIAN #define HI(x) *(1+(int*)&x) #define LO(x) *(int*)&x #elif BIG_ENDIAN #define HI(x) *(int*)&x #define LO(x) *(1+(int*)&x) #endif #define LOG2 0.69314718055994531 /* log(2) */ #define INV720 0.0013888888888888889 /* 1/720 */ #define INV120 0.0083333333333333333 /* 1/120 */ #define INV24 0.041666666666666667 /* 1/24 */ #define INV6 0.16666666666666667 /* 1/6 */ /* page 49 of Defour's PhD thesis */ static inline void Dekker (double *r1, double *r2, double u, double v) { double c = 134217729.0; /* 0x41a00000, 0x02000000 */ double up, u1, u2, vp, v1, v2; up = u * c; vp = v * c; u1 = (u - up) + up; v1 = (v - vp) + vp; u2 = u - u1; v2 = v - v1; *r1 = u * v; *r2 = (((u1 * v1 - *r1) + (u1 * v2)) + (u2 * v1)) + (u2 * v2); } /* page 48 */ #define Fast2Sum(s,r,a,b) \ { double z, _a=a, _b=b; \ s = _a + _b; \ z = s - _a; \ r = _b - z; } /* write x = x0 + h 2^x = 2^x0 * 2^h = 2^x0 * (1 + log(2)*h + (log(2)*h)^2/2 + ... + (log(2)*h)^k/k! + ...) = 2^x0 + 2^x0*log(2)*h + 2^x0 * ((log(2)*h)^2/2 + ... ) Up to k=6 is enough to get about 72 accurate bits. */ static double my_exp2 (double x) { unsigned int i; const double *Ti; double y0, h, u[2], s, r, P; if (x < 0.5 || 1.0 <= x) { fprintf (stderr, "Error: x=%1.16e\n", x); exit (1); } i = (HI(x) >> (20 - k)) & (K - 1); Ti = T[i]; y0 = Ti[1]; h = x - Ti[0]; Dekker (u + 1, u, Ti[2], h); Fast2Sum (s, r, y0, u[1]); r = r + u[0]; /* evaluate the tail-polynomial (terms of degree 2 to 6) */ h = LOG2 * h; P = h * INV720; P = INV120 + P; P = h * P; P = INV24 + P; P = h * P; P = INV6 + P; P = h * P; P = 0.5 + P; P = h * P; P = h * P; r = r + y0 * P; return s + r; } #if defined (__i386__) && defined (__GNUC__) # define _FPU_EXTENDED 0x0300 # define _FPU_DOUBLE 0x0200 # define _FPU_DEFAULT 0x137f # define __setfpucw(cw) do { int toto = (cw); __asm__ ("fldcw %0" : : "m" (toto)); } while (0) # define _fpu_ieee ((_FPU_DEFAULT & (~_FPU_EXTENDED)) | _FPU_DOUBLE) # define set_double() __setfpucw(_fpu_ieee) #else # define set_double() #endif #ifndef FUNC /* #define FUNC __libmcr_exp */ #define FUNC my_exp2 /* #define FUNC exp_rn */ /* #define FUNC exp2 */ #endif int badness (mpfr_t x) { mpfr_t y, z; int res; mpfr_init2 (y, mpfr_get_prec (x) + 1); /* +1 for rounding to nearest */ mpfr_init2 (z, 2 * mpfr_get_prec (x)); mpfr_exp2 (z, x, GMP_RNDN); mpfr_set (y, z, GMP_RNDN); mpfr_sub (z, z, y, GMP_RNDN); res = mpfr_get_exp (y) - mpfr_get_prec (y) - mpfr_get_exp (z); mpfr_clear (y); mpfr_clear (z); return res; } static int cputime () { struct rusage rus; getrusage (0, &rus); return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000; } int main (int argc, char *argv[]) { int n = atoi (argv[1]); /* number of tests */ int differ = 0; /* number of differences */ int i; mpfr_t x, y; double d, e, f; gmp_randstate_t state; int st; set_double (); /* set rounding precision to double */ #ifdef TIMING /* compare to exp2() from libm */ st = cputime (); for (i = 0; i < n; i++) { d = (1.0 + drand48 ()) / 2.0; e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); e = FUNC (d); } printf ("exp2 took %dms\n", cputime () - st); #else mpfr_init2 (x, 53); mpfr_init2 (y, 53); gmp_randinit (state, GMP_RAND_ALG_LC, 128); gmp_randseed_ui (state, 0); for (i = 0; i < n; i++) { mpfr_urandomb (x, state); /* uniformly distributed in [0, 1[ */ mpfr_add_ui (x, x, 1, GMP_RNDN); /* [1, 2[ */ mpfr_div_2exp (x, x, 1, GMP_RNDN); /* [1/2, 1[ */ mpfr_exp2 (y, x, GMP_RNDN); d = mpfr_get_d (x, GMP_RNDN); e = FUNC (d); f = mpfr_get_d (y, GMP_RNDN); if (f != e) { #ifdef PRINT fprintf (stderr, "incorrect rounding for x=%1.16e [%d]\n", d, badness (x)); fprintf (stderr, "mpfr_exp2 gives %1.16e\n", f); fprintf (stderr, "my_exp2 gives %1.16e\n", e); #endif differ ++; } } mpfr_clear (x); mpfr_clear (y); gmp_randclear (state); printf ("%u incorrect roundings out of %u\n", differ, n); #endif return 0; }