(original) (raw)
*** longobject.c Mon Sep 8 11:22:47 2008 --- longobject33.c Mon Sep 8 10:25:01 2008 *************** *** 16,21 **** --- 16,26 ---- #define KARATSUBA_CUTOFF 70 #define KARATSUBA_SQUARE_CUTOFF (2 * KARATSUBA_CUTOFF) + /* For long division, use the O(N**2) school algorithm unless the + * denominator contains more than DIV_LIMIT digits. + */ + const int DIV_LIMIT = KARATSUBA_CUTOFF; + /* For exponentiation, use the binary left-to-right algorithm * unless the exponent contains more than FIVEARY_CUTOFF digits. * In that case, do 5 bits at a time. The potential drawback is that *************** *** 1762,1767 **** --- 1767,1774 ---- static PyObject *long_long(PyObject *v); static int long_divrem(PyLongObject *, PyLongObject *, PyLongObject **, PyLongObject **); + static PyLongObject * divmod_pos + (PyLongObject *, PyLongObject *, PyLongObject **); /* Long division with remainder, top-level routine */ *************** *** 1800,1806 **** } } else { ! z = x_divrem(a, b, prem); if (z == NULL) return -1; } --- 1807,1822 ---- } } else { ! if ((size_b < 2*DIV_LIMIT) || ! ((size_b < 4*DIV_LIMIT) && ! (size_a < 0.897 * size_b + 44.97)) || ! (size_a > 0.00343 * pow(size_b,2.9))) ! { ! z = x_divrem(a, b, prem); ! } ! else { ! z = divmod_pos(a, b, prem); ! } if (z == NULL) return -1; } *************** *** 1916,1921 **** --- 1932,2267 ---- return a; } + + /* utilities for the long division algorithm */ + // shift left by shiftby limbs + static PyLongObject * + _long_int_lshift(PyLongObject *a, Py_ssize_t shiftby) + { + PyLongObject *z = NULL; + Py_ssize_t oldsize, newsize, wordshift; + + assert(shiftby >= 0); + wordshift = shiftby; + oldsize = Py_SIZE(a); + newsize = oldsize + wordshift; + z = _PyLong_New(newsize); + if (z == NULL) + goto lshift_error; + bzero(z->ob_digit, wordshift * sizeof(digit)); + memcpy(z->ob_digit + wordshift, a->ob_digit, oldsize * sizeof(digit)); + z = long_normalize(z); + lshift_error: + return z; + } + + + // shift right by shiftby limbs + static PyLongObject * + _long_int_rshift(PyLongObject *a, Py_ssize_t shiftby) + { + PyLongObject *z = NULL; + Py_ssize_t newsize, wordshift; + + assert(shiftby >= 0); + wordshift = shiftby; + newsize = Py_SIZE(a) - wordshift; + if (newsize <= 0) { + z = _PyLong_New(0); + return z; + } + z = _PyLong_New(newsize); + if (z == NULL) + goto rshift_error; + memcpy(z->ob_digit, a->ob_digit + wordshift, newsize * sizeof(digit)); + z = long_normalize(z); + rshift_error: + return z; + } + + // mask for len limbs starting from start + static PyLongObject * + _long_int_mask(PyLongObject *a, Py_ssize_t start, Py_ssize_t len) + { + PyLongObject *z = NULL; + Py_ssize_t wordshift, newsize; + assert(start >=0); + assert(len >= 0); + wordshift = start; + newsize = MIN(len, (long)Py_SIZE(a) - wordshift); + if (newsize < 0) + newsize = 0; + if (newsize == 0) { + z = _PyLong_New(0); + return z; + } + z = _PyLong_New(newsize); + if (z == NULL) + goto int_mask1_error; + memcpy(z->ob_digit, a->ob_digit + wordshift, newsize * sizeof(digit)); + z = long_normalize(z); + int_mask1_error: + return z; + } + + static int + _long_divrem1(PyLongObject *a, PyLongObject *b, + PyLongObject **pdiv, PyLongObject **prem) + { + Py_ssize_t size_a = Py_SIZE(a), size_b = Py_SIZE(b); + PyLongObject *z; + if (size_a < size_b || + (size_a == size_b && + a->ob_digit[size_a-1] < b->ob_digit[size_b-1])) { + /* |a| < |b|. */ + *pdiv = _PyLong_New(0); + if (*pdiv == NULL) + return -1; + Py_INCREF(a); + *prem = (PyLongObject *) a; + return 0; + } + z = x_divrem(a, b, prem); + if (z == NULL) + return -1; + *pdiv = z; + return 0; + } + + /* For long division a/b with with n = PySIZE(q), n > DIV_LIMIT use the + * binary splitting algorithm by Burnikel and Ziegler + * http://cr.yp.to/bib/1998/burnikel.ps + * n is required to be even. + */ + #define STUB_EXC(a) if ((a) == NULL) return NULL; + static PyObject * long_add(PyLongObject *, PyLongObject *); + static PyObject * long_sub(PyLongObject *, PyLongObject *); + static PyObject * long_mul(PyLongObject *, PyLongObject *); + static PyObject * long_div(PyObject *, PyObject *); + static int long_compare(PyLongObject *, PyLongObject *); + static PyObject * long_rshift(PyLongObject *, PyLongObject *); + static PyObject * long_lshift(PyObject *, PyObject *); + static PyObject * long_or(PyObject *, PyObject *); + static PyObject * long_and(PyObject *, PyObject *); + static PyObject * long_lshift(PyObject *, PyObject *); + static PyLongObject * div3n2n(PyLongObject *, digit *, PyLongObject *, + PyLongObject *, PyLongObject *, Py_ssize_t , PyLongObject **); + + static PyLongObject * + div2n1n(PyLongObject *a, PyLongObject *b, Py_ssize_t n, PyLongObject **prem) + { + PyLongObject *q = NULL, *r; + if (n <= DIV_LIMIT || Py_SIZE(a) < n){ + _long_divrem1(a, b, &q, &r); + *prem = r; + return q; + } + PyLongObject *q1, *q2, *t1, *a1, *b1, *b2; + Py_ssize_t half_n = n >> 1; + STUB_EXC(b1 = _long_int_rshift(b, half_n)); + STUB_EXC(b2 = _long_int_mask(b, 0, half_n)); + STUB_EXC(a1 = _long_int_rshift(a, n)); + STUB_EXC(q1 = div3n2n(a1, a->ob_digit+half_n, b, b1, b2, half_n, &t1)); + Py_DECREF(a1); + STUB_EXC(q2 = div3n2n(t1, a->ob_digit, b, b1, b2, half_n, &r)); + Py_DECREF(b1); + Py_DECREF(b2); + Py_DECREF(t1); + if(Py_SIZE(q1)) { + STUB_EXC(q = _long_int_lshift(q1, half_n)); + memcpy(q->ob_digit, q2->ob_digit, Py_SIZE(q2) * sizeof(digit)); + } + else { + q = q2; + Py_INCREF(q2); + } + Py_DECREF(q1); + Py_DECREF(q2); + *prem = r; + return q; + } + + static PyLongObject * k_mul(PyLongObject *, PyLongObject *); + static PyLongObject * x_add(PyLongObject *, PyLongObject *); + static PyLongObject * x_sub(PyLongObject *, PyLongObject *); + + /* Helper function for div2n1n; not intended to be called directly. */ + static PyLongObject * + div3n2n(PyLongObject *a12, digit *a3, PyLongObject *b, + PyLongObject *b1, PyLongObject *b2, Py_ssize_t n, PyLongObject **prem) + { + PyLongObject *q, *r, *t1, *t2, *t3; + STUB_EXC(t1 = _long_int_rshift(a12, n)); + if (long_compare(t1, b1) == 0){ + PyLongObject * one; + STUB_EXC(one = (PyLongObject*) PyLong_FromLong(1)); + STUB_EXC(t2 = _long_int_lshift(one, n)); + STUB_EXC(q = (PyLongObject *) long_sub(t2, one)); + Py_DECREF(t2); + STUB_EXC(t3 = _long_int_lshift(b1, n)); + Py_DECREF(t1); + STUB_EXC(t1 = (PyLongObject *) long_sub(a12, t3)); + Py_DECREF(t3); + STUB_EXC(r = (PyLongObject *) long_add(t1, b1)); + } + else { + STUB_EXC(q = div2n1n(a12, b1, n, &r)); + } + Py_DECREF(t1); + if(Py_SIZE(r)) { + STUB_EXC(t2 = _long_int_lshift(r, n)); + memcpy(t2->ob_digit, a3, n * sizeof(digit)); + } + else { + STUB_EXC(t2 = _PyLong_New(n)); + memcpy(t2->ob_digit, a3, n * sizeof(digit)); + } + + STUB_EXC(t3 = (PyLongObject *) k_mul(q, b2)); + Py_DECREF(r); + STUB_EXC(r = x_sub(t2, t3)); + Py_DECREF(t2); + Py_DECREF(t3); + while (Py_SIZE(r) < 0) { + if (q->ob_digit[0] > 0) + q->ob_digit[0]--; + else { + digit borrow = 1; + q->ob_digit[0] = (-1) & MASK; + int i; + for (i=1; (i < q->ob_size) && borrow; ++i) { + borrow = q->ob_digit[i] - borrow; + q->ob_digit[i] = borrow & MASK; + borrow >>= SHIFT; + borrow &= 1; + } + assert(borrow == 0); + } + t1 = r; STUB_EXC(r = (PyLongObject *) long_add(r, b)); + Py_DECREF(t1); + } + *prem = r; + return q; + } + + /* To perform long division a/b where a has more than 2*n, where n is + * the number of bits of b, split a in chunks of n nits, then call the + * div2n1n algorithm. Since n is required to be even in div2n1n, + * in case it is not pad a and b with zeros on the right till n is + * a multiple of 2**N, where N is the number of times n must be + * divided in the div2n1n algorithm. + */ + static PyLongObject * + divmod_pos(PyLongObject *a, PyLongObject *b, PyLongObject **prem) + { + PyLongObject *a0, *a1, *b1, *q, *r, *q_digit, *t0, *t1, *t2; + int asign = (Py_SIZE(a) < 0); + int bsign = (Py_SIZE(b) < 0); + if (asign) Py_SIZE(a) = - Py_SIZE(a); + if (bsign) Py_SIZE(b) = - Py_SIZE(b); + + int na = _PyLong_NumBits((PyObject *) a); + int n = _PyLong_NumBits((PyObject *) b); + // make n a multiple of PyLong_SHIFT + int nr = n%PyLong_SHIFT; + int pad; + STUB_EXC(t0 = (PyLongObject*) PyLong_FromLong(PyLong_SHIFT - nr)); + if (nr > 0) { + pad = 1; + n = n + PyLong_SHIFT - n%PyLong_SHIFT; + na = na + PyLong_SHIFT - n%PyLong_SHIFT; + STUB_EXC(a1 = (PyLongObject *) long_lshift((PyObject *) a, + (PyObject *) t0)); + STUB_EXC(b1 = (PyLongObject *) long_lshift((PyObject *) b, + (PyObject *) t0)); + } + else { + pad = 0; + a1 = a; + Py_INCREF(a); + b1 = b; + Py_INCREF(b); + } + + /* estimates the number of times n must be divided by to by div2n1n + * before falling back to x_divrem; increase till it can be divided + * that number of times + */ + int nab = na/n + 1; + int n_S = n/PyLong_SHIFT; + PyLongObject *lnn; + STUB_EXC(lnn = (PyLongObject*) PyLong_FromLong(n_S/DIV_LIMIT)); + int nn = _PyLong_NumBits((PyObject *) lnn); + int mask_n = (1 << nn) - 1; + int n1 = n_S; + while(n1 & mask_n) + n1++; + int shift_n = n1 - n_S; + Py_DECREF(lnn); + n_S = n1; + t1 = a1; STUB_EXC(a1 = _long_int_lshift(a1, shift_n)); Py_DECREF(t1); + t1 = b1; STUB_EXC(b1 = _long_int_lshift(b1, shift_n)); Py_DECREF(t1); + + // slit a in chunks sized n_S + PyLongObject ** a_digits = + (PyLongObject **) calloc(nab, sizeof(PyLongObject *)); + STUB_EXC(a_digits); + STUB_EXC(a0 = (PyLongObject *) _PyLong_Copy(a1)); + + int i, top; + for(i=0; i < nab; i++) { + STUB_EXC(a_digits[i] = _long_int_mask(a0, 0, n_S)); + t1 = a0; STUB_EXC(a0 = _long_int_rshift(a0, n_S)); + Py_DECREF(t1); + if (Py_SIZE(a0) == 0) { + break; + } + } + top = i; + Py_DECREF(a0); + if (long_compare(a_digits[i], b1) >= 0) { + STUB_EXC(r = (PyLongObject*) PyLong_FromLong(0)); + } + else { + STUB_EXC(r = (PyLongObject *) _PyLong_Copy(a_digits[i--])); + } + STUB_EXC(q = (PyLongObject*) PyLong_FromLong(0)); + while(i >= 0) { + STUB_EXC(t1 = _long_int_lshift(r, n_S)); + STUB_EXC(t2 = (PyLongObject *) long_add(t1, a_digits[i--])); + Py_DECREF(t1); + Py_DECREF(r); + STUB_EXC(q_digit = div2n1n(t2, b1, n_S, &r)); + Py_DECREF(t2); + STUB_EXC(t1 = _long_int_lshift(q, n_S)); + Py_DECREF(q); + STUB_EXC(q = (PyLongObject *) long_add(t1, q_digit)); + Py_DECREF(t1); + Py_DECREF(q_digit); + } + + for(i=top; i >= 0; i--) { + Py_XDECREF(a_digits[i]); + } + free(a_digits); + + if (pad) { + t1 = r; + STUB_EXC(r = (PyLongObject *) long_rshift(r, t0)); + Py_DECREF(t1); + } + t1 = r; STUB_EXC(r = _long_int_rshift(r, shift_n)); Py_DECREF(t1); + + if (asign) Py_SIZE(a) = - Py_SIZE(a); + if (bsign) Py_SIZE(b) = - Py_SIZE(b); + Py_DECREF(a1); + Py_DECREF(b1); + Py_DECREF(t0); + *prem = r; + return q; + } + + /* Methods */ static void