# Smarter way of doing this?

Max M maxm at mxm.dk
Wed Feb 4 13:53:54 CET 2004

Max M wrote:

> Jim Jewett wrote:

> I finally chose a combination of the suggested approaches, and ended up
> with something I find is both more functional and nicer to look at than
> my first try.

Well, that version had a problem. Whenever a series of values are the
same bisect() allways selects the first one. So I never got a
statistical valid sample of elements.

I solved it by using acumulated probabilities instead. So the final
version is here, if anybody cares.

I used it in a Markov chain class, that is now pretty fast.

regards Max M

#########################

from random import random, randint
from bisect import bisect

class Selector:

"Returns random elements by probability"

def __init__(self, probabilities, elements):
self.sum = sum(probabilities)
acumulated_probabilities = []
acumulated_probability = 0
for probability in probabilities:
acumulated_probability += probability
acumulated_probabilities.append(acumulated_probability)
self.decorated = zip(acumulated_probabilities, elements)
self.decorated.sort()

def get(self):
"Randomly returns an element by probability"
rnd = random() * self.sum
index = bisect(self.decorated, (rnd, 0))
self.decorated[index][-1]
return self.decorated[index][-1]

def get_range(self, n):
"Randomly returns a range of elements by probability"
return [self.get() for itm in range(n)]

if __name__ == '__main__':

probabilities = [1.0, 0.5, 0.25, 0.125, 0.0625]
#    probabilities = [0.1, 0.1, 1.0, 0.1, 0.1, ]
elements = ['a', 'b', 'c', 'd', 'e']
s = Selector(probabilities, elements)
r = s.get_range(10000)
r.sort()

for element in elements:
print element, r.count(element)

##########
>>a 5148
>>b 2594
>>c 1301
>>d 654
>>e 303