Update:
Taking advantage of the modulus being 2^32 we can eliminate all Python loops and get a speedup of ~91.1.
Allowing for any modulus it is still possible to reduce the linear length loop to a log length loop. For 500,000 samples this gives a speedup of ~17.1. If we precompute the multistep factors and offsets (they are the same for any seed) this goes up to ~44.8.
Code:
import numpy as np
import time
def unif(n, seed):
m = 2**32
a = 1664525
c = 1013904223
result = np.empty(n)
result[0] = seed
for i in range(1,n):
result[i] = (a*result[i-1]+c) % m
return result / m
def precomp(n):
l = n.bit_length()
a, c = np.empty((2, 1+(1<<l)), np.uint64)
m = 2**32
a[:2] = 1, 1664525
c[:2] = 0, 1013904223
p = 1
for j in range(l):
a[1+p:1+(p<<1)] = a[p] * a[1:1+p] % m
c[1+p:1+(p<<1)] = (a[p] * c[1:1+p] + c[p]) % m
p <<= 1
return a, c
def unif_opt(n, seed, a=None, c=None):
if a is None:
a, c = precomp(n)
return (seed * a[:n] + c[:n]) % m / m
def unif_32(n, seed):
out = np.empty((n,), np.uint32)
out[0] = 1
np.broadcast_to(np.uint32(1664525), (n-1,)).cumprod(out=out[1:])
c = out[:-1].cumsum(dtype=np.uint32)
c *= 1013904223
out *= seed
out[1:] += c
return out / m
m = 2**32
seed = int((time.time() * 1e7) % m)
n = 500000
a, c = precomp(n)
print('results equal:', np.allclose(unif(n, seed), unif_opt(n, seed)) and
np.allclose(unif_opt(n, seed), unif_opt(n, seed, a, c)) and
np.allclose(unif_32(n, seed), unif_opt(n, seed, a, c)))
from timeit import timeit
t = timeit('unif(n, seed)', globals=globals(), number=10)
t_opt = timeit('unif_opt(n, seed)', globals=globals(), number=10)
t_prc = timeit('unif_opt(n, seed, a, c)', globals=globals(), number=10)
t_32 = timeit('unif_32(n, seed)', globals=globals(), number=10)
print(f'speedup without precomp: {t/t_opt:.1f}')
print(f'speedup with precomp: {t/t_prc:.1f}')
print(f'speedup special case: {t/t_32:.1f}')
Sample run:
results equal: True
speedup without precomp: 17.1
speedup with precomp: 44.8
speedup special case: 91.1
m=2**32there is actually a very (90x) fast, fully vectorized numpy solution. I've updated my answer.