nonuniform sampling with replacement

Peter Otten __peter__ at web.de
Sun Mar 21 08:27:44 EDT 2010


Jah_Alarm wrote:

> I've got a vector length n of integers (some of them are repeating),
> and I got a selection probability vector of the same length. How will
> I sample with replacement k (<=n) values with the probabilty vector.
> In Matlab this function is randsample. I couldn't find anything to
> this extent in Scipy or Numpy.

If all else fails you can do it yourself:

import random
import bisect

def iter_sample_with_replacement(values, weights):
    _random = random.random
    _bisect = bisect.bisect

    acc_weights = []
    sigma = 0
    for w in weights:
        sigma += w
        acc_weights.append(sigma)
    while 1:
        yield values[_bisect(acc_weights, _random()*sigma)]

def sample_with_replacement(k, values, weights):
    return list(islice(iter_sample_with_replacement(values, weights), k))

if __name__ == "__main__":
    from itertools import islice
    N = 10**6
    values = range(4)
    weights = [2, 3, 4, 1]

    histo = [0] * len(values)
    for v in islice(iter_sample_with_replacement(values, weights), N):
        histo[v] += 1
    print histo
    print sample_with_replacement(30, values, weights)

Peter

                   




More information about the Python-list mailing list