Created
April 13, 2016 04:03
-
-
Save anonymous/04e6ca50a0c7c56e399968ed912f03dd to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Vitter JS (1987) 'An efficient algorithm for sequential | |
| random sampling.' ACM T. Math. Softw. 13(1): 58--67. | |
| Copied from: https://gist.github.com/ldoddema/bb4ba2d4ad1b948a05e0 | |
| """ | |
| from math import exp, log | |
| import random | |
| import numpy as np | |
| from numpy import arange | |
| def vitter_A(N, n, sample_out, last=0): | |
| """ | |
| Select `n` samples from the integers in population [0, `N`). | |
| The result is placed in integer array `sample_out` (convenient | |
| when testing with Numba). | |
| Optionally start "counting" from integer `last`, to continue | |
| from Vitter_D. I.e. the population has range [last, N+last). | |
| """ | |
| # cdef: | |
| # float V, quot, Nreal, top | |
| # int -> all others? | |
| idx = 0 | |
| top = float(N - n); Nreal = float(N) | |
| while n >= 2: | |
| V = np.random.random(); S = 0; quot = top / Nreal | |
| while quot > V: | |
| S += 1; top -= 1.0; Nreal -= 1.0 | |
| quot = (quot * top) / Nreal | |
| last = last + S | |
| sample_out[idx] = last | |
| idx += 1 | |
| last = last + 1 | |
| Nreal -= 1.0 | |
| n -= 1 | |
| S = int(np.round(Nreal) * np.random.random()) | |
| last += S | |
| sample_out[idx] = last | |
| def test_python(n,k): | |
| return random.sample(xrange(n), k) | |
| def test_vitter(n, k): | |
| sample_out = [0] * k | |
| vitter_A(n, k, sample_out) | |
| return sample_out | |
| # timeit -n 10 test_python(10000000, 400000) | |
| # 10 loops, best of 3: 250 ms per loop | |
| # | |
| # timeit -n 10 test_vitter(10000000, 400000) | |
| # 10 loops, best of 3: 1.72 s per loop |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
FYI https://gist.github.com/ldoddema/bb4ba2d4ad1b948a05e0 404's