# Toy interpolation code [was Re: Draft PEP: string interpolation with backquotes]

Nick Mathewson QnickQm at alum.mit.edu
Wed Dec 5 23:59:05 EST 2001

```One thing I don't understand from the Interpolation Wars is why people
feel Python needs any new syntax for this.  Here's some example code
that shows how to implement four kinds of proposed interpolation.

They are:
"naive backticks"  (i.e., "`a`" == str(a);
"``a`+"b"`" is an error)
"smart backticks"  (i.e., "`a`" == str(a);
"``a`+"b"`" == str(repr(a)+"b"))
"dollar-quoting"   (i.e., "a\$b\$c" == "a"+str(b)+"c")
"perl-style"       (i.e., "1+1=\$(1+1)" == "1+1=2",
"Major \$sys.version_info[0]" == "Major 2",
x=5; "x=\$x" == "x=5".)

The interface works in two ways:
1. Immediate
i(string, quotestyle='d2')
(for quotestyle in('bt1','bt2','d1','d2'))
2. Precompiled
pattern = I(string, quotestyle='d2')
print str(pattern)
print pattern % namespace

BUGS:
1. This code is not tested well enough.
2. It is not optimized either.
3. It is not commented well enough.
4. It doesn't fail gracefully when it sees bad syntax.

============================================================
#!/usr/bin/python

import sys

# Needs version >= 2.
assert sys.version_info[0] >= 2

# If we don't have sys._getframe, make one.
if hasattr(sys, "_getframe"):
_getframe = sys._getframe
else:
def _getframe():
try:
1/0
except:
tb = sys.exc_traceback
return tb.tb_frame.f_back

def _comp_backtick1(s, ch='`'):
"""Given a format string, returns a code object that evaluates to
the format string, with all values between backquotes evaluated
and replaced by their string values.

(The argument ch may be used to provide a different backtick value.)
"""

exp = []
i = 0
while 1:
bt = s.find(ch, i)
if bt < 0:
exp.append(repr(s[i:]))
break
if bt > i:
exp.append(repr(s[i:bt]))

bt2 = s.find(ch, bt+1)
if bt2 < 0:
exp.append("str((%s))" %  s[bt+1:bt2])
i = bt2+1
if i >= len(s):
break

if len(exp) == 1:
exp = exp[0]
else:
exp = '"".join( (%s) )' % ", ".join(exp)

return compile(exp, '<interpolated string>', 'eval')

def _get_btexpr_at(s,i,ch='`', min=None):
"""Returns (expr, end), where expr is the smallest prefix of s[i:]
of length>=(min-i) that compiles into a syntactically
well-formed python expression, and where end is the index of s
immediately following expr."""

if min == None:
bt = i+1
else:
bt = min
while bt < len(s):
bt = s.find(ch, bt)
if bt < 0:
try:
compile(s[i:bt+1], '', 'eval')
return s[i+1:bt], bt+1
except SyntaxError:
pass
bt = bt + 1

def _comp_backtick2(s, ch='`', fn=_get_btexpr_at):
"""Given a format string, returns a code object that evaluates to
the format string, with all backquoted chunks evaluated
and replaced by their string values.

(This differs from _comp_backtick1 in that _comp_backtick1 always
looks for the next backtick, whereas _comp_backtick2 understands
Python expressions.  IOW,
_comp_backtick_1(' `"1"+`2`` ') will turn into
compile('''"".join((' ',"1"+,'2','',' ')'''), whereas
_comp_backtick_1(' `"1"+`2`` ') will turn more sensibly into
compile('''"".join((' ',"1"+`2`,' '))'''. )
"""

exp = []
i = 0
while 1:
bt = s.find(ch, i)
if bt < 0:
exp.append(repr(s[i:]))
break
if bt > i:
exp.append(repr(s[i:bt]))

btexpr, i = fn(s,bt)
exp.append(btexpr)

if i >= len(s):
break

if len(exp) == 1:
exp = exp[0]
else:
exp = '"".join( (%s) )' % ", ".join(exp)

return compile(exp, '<interpolated string>', 'eval')

def _comp_dollars1(s):
"""Given a format string, returns a code object that evaluates to
the format string, with all values between dollars evaluated
and replaced by their string values.
"""
return _comp_backtick1(s,'\$')

_endchars = {
'(' : ')',
'[' : ']'
}

def _isidch(ch):
return ch.isalnum() or ch in '_.'

def _get_dlr_expr_at(s, i):
"""Given a string s and an index i, returns a tuple (expr, end)
such that expr is the longest possible "interpolated expression"
beginning at s[i], and end is the index immedatedly after expr.

An interpolated expression is either:
1.  A parenthesized expression     (1+2+4)
2.  A dotted name                  sys.version
3.  1,2, or 3 followed by an index, an attribute, or a call.
4.  A string.
"""

start = i
assert s[i] == '\$'
i += 1
if s[i] == '\$':
return '"\$"', i+1

#BUG: Handle \$a.b, \$a().b()
if s[i].isalnum() or s[i] == "(":
while i < len(s) and _isidch(s[i]):
i += 1
if i == len(s):
return "str(%s)" % s[start+1:], i
ch = s[i]
endch = _endchars.get(ch)
if not endch or endch == ch:
return "str(%s)" % s[start+1:i], i

e, i2 = _get_btexpr_at(s, start+1, endch)
while i2 < len(s) and (s[i2] =='.' or _endchars.get(s[i2])):
if s[i2] == '.':
while i2 < len(s) and _isidch(s[i2]):
i2 += 1
continue
try:
e, next_i2 = _get_btexpr_at(s,
start+1,
_endchars[s[i2]],
i2+1)
i2 = next_i2
except:
break

return "str(%s)" % s[start+1:i2], i2
elif s[i] in "\"\'`":
e, i2 = _get_btexpr_at(s, i, s[i])
return s[i:i2], i2
else:

def _comp_dollars2(s):
return _comp_backtick2(s,'\$', _get_dlr_expr_at)

_modes = { 'bt1': _comp_backtick1,
'bt2': _comp_backtick2,
'd1':  _comp_dollars1,
'd2':  _comp_dollars2 }

class I:
def __init__(self, s, mode='d2'):
cfn = _modes[mode]
self.compiled = cfn(s)

def __mod__(self, vars):
return eval(self.compiled, vars, vars)

def __str__(self):
f = _getframe().f_back
return eval(self.compiled, f.f_globals, f.f_locals)

def i(s,mode="d2"):
loc = _getframe().f_back.f_locals
return I(s,mode) % loc

if __name__ == '__main__':
assert i("1+1=`1+1`", 'bt1') == '1+1=2'
assert i("1+1=`'1'+`1``", 'bt2') == '1+1=11'
x = 2
assert i("\$x\$+1=\$x+1\$", 'd1') == '2+1=3'
assert i("\$x+1=\$(x+1)", 'd2') == '2+1=3'
assert i("1+1=\$x") == '1+1=2'
xyz=9
assert i("3*3=\$xyz") == '3*3=9'
assert i("3*3+2=\$xyz+2") == '3*3+2=9+2'
assert i("3*3+2=\$(xyz+2)") == '3*3+2=11'
assert i("3*3+2=\$(11)") == '3*3+2=11'
assert i("2**10=\$(2**10)") == '2**10=1024'
x = 3
assert i("\$x**2=\$(x**2)") == '3**2=9'
x = [1,2,3,[4,5]]
assert i("x[1]=\$x[1]") == 'x[1]=2'
assert i("x[3][0]=\$x[3][0]") == 'x[3][0]=4'

def power(e):
def f(n,e=e):
return n**e
return f

assert i("2**10=\$power(10)(2)") == '2**10=1024'

class X:
pass

x = X()
x.y = 3
x.z = lambda q:q*q
x.z = lambda q:q*q
def getsys(): return sys
x.getsys = getsys

assert i("=> \$x.y") == '=> 3'
assert i("=> \$x.z(10)") == '=> 100'
assert i("=> \$x.getsys().version_info[0]") == '=> 2'

print "OK"

============================================================

--
Nick Mathewson    <Q nick Q m at alum dot mit dot edu>
Remove Q's to respond.  No spam.

```