How do I sample randomly based on some probability(wightage)?
Scott David Daniels
Scott.Daniels at Acm.Org
Wed May 27 11:08:53 EDT 2009
Sumitava Mukherjee wrote:
> I need to randomly sample from a list where all choices have weights
> attached to them. The probability of them being choosen is dependent
> on the weights.
I am not sure why everybody is normalizing by dividing the weights.
This isa possibility (I had fun writing it).
def sample_without_replacement(choices, weights):
'''Yield elements sampled w/o replacement by weighting'''
if len(weights) != len(choices):
raise ValueError('%d choices, but %d weights?' % (
len(choices), len(weights)))
if min(weights) < 0:
raise ValueError('Negative weights?: %s' % (
[(i, w) for i, w in enumerate(weights) if w < 0]))
# look at only non-zero probabilities
combined = [(w, v) for w, v in zip(weights, choices) if w > 0]
# Go from highest probability down to reduce expected traversal
combined.sort(key=operator.itemgetter(0), reverse=True)
total = sum(w for w, v in combined) # sum(weights) also works
while combined:
spot = sample = random.random() * total
for n, (weight, choice) in enumerate(combined):
spot -= weight
if spot <= 0:
break
else:
# n, (weight, choice) = 0, combined[0] # Highest probability
raise ValueError('%f left after choosing %f/%f?: %s' % (
spot, sample, total, combined))
yield choice
total -= weight
if weight > total * 256: # arbitrary choice for recalculation
# precision affected, rebuild
total = sum(w for w, v in combined)
del combined[n]
raise ValueError('Samplng more than %d without replacement?' % (
sum(1 for w in weights if w > 0)))
for n in range(10):
gen = sample_without_replacement('abcdef', [32,16,8,4,2,1])
print gen.next(), gen.next(), gen.next()
--Scott David Daniels
Scott.Daniels at Acm.Org
More information about the Python-list
mailing list