Actually binary splitting does work very well, if combined with iterated argument reduction to balance the number of terms against the size of the numerators and denominators (this is known as the bit-burst algorithm).
Here is a binary splitting implementation for mpmath based on repeated application of the formula atan(t) = atan(p/2^q) + atan((t*2^q-p) / (2^q+p*t)). This formula was suggested recently by Richard Brent (in fact mpmath's atan already uses a single invocation of this formula at low precision, in order to look up atan(p/2^q) from a cache). If I remember correctly, MPFR also uses the bit-burst algorithm to evaluate atan, but it uses a slightly different formula, which possibly is more efficient (instead of evaluating several different arctangent values, it does analytic continuation using the arctangent differential equation).
from mpmath.libmp import MPZ, bitcount
from mpmath import mp
def bsplit(p, q, a, b):
if b - a == 1:
if a == 0:
P = p
Q = q
else:
P = p * p
Q = q * 2
B = MPZ(1 + 2 * a)
if a % 2 == 1:
B = -B
T = P
return P, Q, B, T
else:
m = a + (b - a) // 2
P1, Q1, B1, T1 = bsplit(p, q, a, m)
P2, Q2, B2, T2 = bsplit(p, q, m, b)
T = ((T1 * B2) << Q2) + T2 * B1 * P1
P = P1 * P2
B = B1 * B2
Q = Q1 + Q2
return P, Q, B, T
def atan_bsplit(p, q, prec):
"""computes atan(p/2^q) as a fixed-point number"""
if p == 0:
return MPZ(0)
# FIXME
nterms = (-prec / (bitcount(p) - q) - 1) * 0.5
nterms = int(nterms) + 1
if nterms < 1:
return MPZ(0)
P, Q, B, T = bsplit(p, q, 0, nterms)
if prec >= Q:
return (T << (prec - Q)) // B
else:
return T // (B << (Q - prec))
def atan_fixed(x, prec):
t = MPZ(x)
s = MPZ(0)
q = 1
while t:
q = min(q, prec)
p = t >> (prec - q)
if p:
s += atan_bsplit(p, q, prec)
u = (t << q) - (p << prec)
v = (MPZ(1) << (q + prec)) + p * t
t = (u << prec) // v
q *= 2
return s
def atan1(x):
prec = mp.prec
man = x.to_fixed(prec)
return mp.mpf((atan_fixed(man, prec), -prec))
def asin1(x):
x = mpf(x)
return atan1(x/sqrt(1-x**2))
With this code, I get:
>>> from mpmath import *
>>> mp.dps = 1000000
>>> val=e/4/(10**7)
>>> from time import time
>>> start = time(); y1 = asin(x); print time() - start
58.8485069275
>>> start = time(); y2 = asin1(x); print time() - start
8.26498985291
>>> nprint(y2 - y1)
-2.31674e-1000000
Warning: atan1 assumes 0 <= x < 1/2, and the determination of the number of terms might not be optimal or correct (fixing these issues is left as an exercise to the reader).