Strange behavior
Virgil Stokes
vs at it.uu.se
Thu Aug 16 16:31:47 CEST 2012
On 16-Aug-2012 15:02, Peter Otten wrote:
> Virgil Stokes wrote:
>
>>>> def testFunc(startingList):
>>>> xOnlyList = []; j = -1
>>>> for xl in startingList:
>>>> if (xl[0] == 'x'):
>>> That's going to fail in the starting list contains an empty string. Use
>>> xl.startswith('x') instead.
>> Yes, but this was by design (tacitly assumed that startingList was both a
>> list and non-empty).
> You missunderstood it will fail if the list contains an empty string, not if
> the list itself is empty:
>
>>>> words = ["alpha", "", "xgamma"]
>>>> [word for word in words if word[0] == "x"]
> Traceback (most recent call last):
> File "<stdin>", line 1, in <module>
> IndexError: string index out of range
>
> The startswith() version:
>
>>>> [word for word in words if word.startswith("x")]
> ['xgamma']
>
> Also possible:
>
>>>> [word for word in words if word[:1] == "x"]
> ['xgamma']
>
>> def testFunc1(startingList):
>> '''
>> Algorithm-1
>> Note:
>> One should check for an empty startingList before
>> calling testFunc1 -- If this possibility exists!
>> '''
>> return([x for x in startingList if x[0] == 'x'],
>> [x for x in startingList if x[0] != 'x'])
>>
>>
>> I would be interested in seeing code that is faster than algorithm-1
> In pure Python? Perhaps the messy variant:
>
> def test_func(words):
> nox = []
> append = nox.append
> withx = [x for x in words if x[0] == 'x' or append(x)]
> return withx, nox
>
>
Very nice Peter,
Here are the new results for timing with your method added (algorithm-3).
Method
average (sd) time in seconds
algorithm-1 (list comprehension)
0.11774 (0.002968)
algorithm-2 (S. D'Aprano)
0.17573 (0.003385)
algorithm-2A (modified S. D'Aprano)
0.18116 (0.003081)
algorithm-3 (improved list comprehension)
0.06639 (0.001728)
Algorithm-3 is 43% faster than algorithm-1. Again, the code used to obtain
these results is attached.
Thanks Peter for your contribution
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/python-list/attachments/20120816/6358ffd7/attachment.html>
-------------- next part --------------
'''
Purpose: Time four different algorithms for the same task
Author: V. Stokes (vs at it.uu.se, 2012-08-16 (15:46), 2012-08-16)
Refs:
python-list at python.org list
* Strange behavior, 14-Aug-2012 17:38, light1quark at gmail.com
* Re: Strange behavior, 14-Aug-2012 21:40, Stokes, Virgil
* Re: Strange behavior, 15-Aug-2012 02:19, Steven D'Aprano
* Re: Strange behavior, 16-Aug-2012 15:02, Peter Otten
Notes:
1. The mean and standard deviation over the runs (MC simulations)
are estimated using recursive equations.
2. A seed (syd) is used with the RNG for repeatability. Each run is
started with a new seed to force the generation of independent
random sequences.
3. Warning! No checks are made on the parameters passed to
the functions (this was by design).
4. No effort has been made to make this code elegant. My focus was
to make the code clear and easy to understand.
5. This was executed on a Windows Vista 32-bit platform with Python 2.6.6
Processor: Intel(R) core(TM)2 Duo CPU E8500 at 3.16GHz 3.17GHz
6. The estimated time to completion is displayed after each run.
'''
import random as random
import math as math
from time import clock # clock gives good resolution on MS Windows
def testFunc1(startingList):
'''
Algorithm-1
Note:
One should check for an empty startingList before
calling testFunc1 -- If this possibility exists!
'''
return([x for x in startingList if x[0] == 'x'],
[x for x in startingList if x[0] != 'x'])
def testFunc2(words):
'''
Algorithm-2
'''
words_with_x = []
words_without_x = []
for word in words:
if word.startswith('x'):
words_with_x.append(word)
else:
words_without_x.append(word)
words[:] = words_without_x # slice assignment
return words_with_x
def testFunc2A(words):
'''
Algorithm-2A
'''
words_with_x = []
words_without_x = []
for word in words:
if word.startswith('x'):
words_with_x.append(word)
else:
words_without_x.append(word)
#words[:] = words_without_x # slice assignment
return words_with_x, words_without_x
def testFunc3(words):
'''
Algorithm-3 (from: Peter Otten)
'''
nox = []
append = nox.append
withx = [x for x in words if x[0] == 'x' or append(x)]
return withx, nox
def genStrList(NChar,NStrng,Alph,leadChr):
'''
Purpose: Generate a list of NStrng elements with each element a string
of length NChar and constrained such that approx. 50% of the
strings will begin with the character (symbol) leadChr
Inputs:
NChar -- number of characters in each element
NStrng -- number of elements in list (strgList)
Alph -- list of characters to be used (must contain the leadChr)
leadChr -- leading character for strings to be generated from Alph
Otputs:
strgList -- list with NString strings of NChar characters (from Alph) in each
'''
NSym = len(Alph)
strgList = []
for i in range(NStrng):
if random.random() < 0.5:
char = leadChr
else:
while True:
indx = random.randint(0,NSym-1)
char = Alph[indx]
if char != leadChr: break
for j in range(1,NChar):
indx = random.randint(0,NSym-1)
char = char + Alph[indx]
strgList.append(char)
return strgList
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alph = ['a','b','c','d','e','f','g','h','i','j','k','l','m',
'n','o','p','q','r','s','t','u','v','w','x','y','z']
NChar = 4 # 4 chars per string
NStrng = 100000 # 100,000 strings in list
NRuns = 100 # 100 random lists (number of MC simulations)
# For quick testing
#NChar = 4 # 4 chars per string
#NStrng = 10000 # 10,000 strings in list
#NRuns = 20 # 20 random lists
syd = 1071 # initial seed for RNG (for repeatability)
printRunningMean = False
mu1,mu2,mu3,mu4,mug = 0.0,0.0,0.0,0.0,0.0
x1,x2,x3,x4 = 0.0,0.0,0.0,0.0
print '%5d runs with lists containing %7d elements' %(NRuns,NStrng)
for irun in range(NRuns):
tic = clock()
k = irun + 1
print ' run - %2d ...' %k
syd += 19 # new seed for each run
random.seed(syd)
#print 'Genrating list of %7d strings...' %NStrng
strgList = genStrList(NChar,NStrng,Alph,'x')
t0 = clock()
xOnlyList,strgList = testFunc1(strgList) # algorithm-1
t1 = clock()
#print 'Length of xOnlyList = %7d' %len(xOnlyList)
#print ' Length of strgList = %7d' %len(strgList)
etime = t1 - t0
delta = etime - mu1
mu1 += delta/k
if printRunningMean:
print ' algorithm-1 -- %9.5f' %mu1
x1 += delta*(etime-mu1)
#print
random.seed(syd)
#print 'Genrating list of %7d strings...' %NStrng
strgList = genStrList(NChar,NStrng,Alph,'x')
t0 = clock()
xOnlyList = testFunc2(strgList) # algorithm-2
t1 = clock()
#print 'Length of xOnlyList = %7d' %len(xOnlyList)
#print ' Length of strgList = %7d' %len(strgList)
etime = t1 - t0
delta = etime - mu2
mu2 += delta/k
if printRunningMean:
print ' algorithm-2 -- %9.5f' %mu2
x2 += delta*(etime-mu2)
random.seed(syd)
#print 'Genrating list of %7d strings...' %NStrng
strgList = genStrList(NChar,NStrng,Alph,'x')
t0 = clock()
xOnlyList,strgList = testFunc2A(strgList) # algorithm-2A
t1 = clock()
#print 'Length of xOnlyList = %7d' %len(xOnlyList)
#print ' Length of strgList = %7d' %len(strgList)
etime = t1 - t0
delta = etime - mu3
mu3 += delta/k
if printRunningMean:
print ' algorithm-2A -- %9.5f' %mu3
x3 += delta*(etime-mu3)
random.seed(syd)
#print 'Genrating list of %7d strings...' %NStrng
strgList = genStrList(NChar,NStrng,Alph,'x')
t0 = clock()
xOnlyList,strgList = testFunc3(strgList) # algorithm-3
t1 = clock()
#print 'Length of xOnlyList = %7d' %len(xOnlyList)
#print ' Length of strgList = %7d' %len(strgList)
etime = t1 - t0
delta = etime - mu4
mu4 += delta/k
if printRunningMean:
print ' algorithm-3 -- %9.5f' %mu4
x4 += delta*(etime-mu4)
toc = clock()
loopTime = toc - tic
delta = loopTime - mug
mug += delta/k
runsR = NRuns - k
estToComplete = mug*runsR
print ' ETTC = %8.2f seconds' %estToComplete
print
print ' Timing in seconds'
print ' ************************************'
print ' method | avg time (sd)'
print ' ------------------------------------'
print ' algorithm-1 | %9.5f (%8.6f)' %(mu1,math.sqrt(x1/NRuns))
print ' algorithm-2 | %9.5f (%8.6f)' %(mu2,math.sqrt(x2/NRuns))
print ' algorithm-2A | %9.5f (%8.6f)' %(mu3,math.sqrt(x3/NRuns))
print ' algorithm-3 | %9.5f (%8.6f)' %(mu4,math.sqrt(x4/NRuns))
print
print "Th-tha-tha-that's all folks!"
More information about the Python-list
mailing list