Feature Structures in Python (Part III)
James Tauber
jtauber at jtauber.com
Thu Jun 19 17:57:40 CEST 2003
Here is part three which is it for now.
I'm particularly interested in possible improvements to the final unify()
function and if anyone has ideas as to the issue raised at the very end.
James
###
### FEATURE STRUCTURES IN PYTHON (Part III)
###
### James Tauber - http://jtauber.com/
### Updated: 2003-06-19
#
# This is the third part of a guide to implementing feature structures
# in Python.
#
# In the previous parts, we saw how to use dictionaries for feature
# structures and how to implement reentrancy. We also implemented a
# function to test for subsumption. In this part we'll look at
# unification.
#
# Previously, we defined the following:
def subsume(object1, object2):
if type(object1) == type(object2) == dict:
for feat in object1:
if feat in object2 and \
subsume(object1[feat], object2[feat]):
pass
else:
return 0
return 1
else:
return object1 == object2
##
## UNIFICATION
#
# Unification is a partial operation on two feature structures. By
# "partial" we mean that it is possible that for two given feature
# structures, no unification exists.
#
# The unification of fs1 and fs2 is the smallest feature structure
# that subsumes both fs1 and fs2.
#
# Much like we did with subsume, we'll develop a recursive
# implementation of unification incrementally.
#
# OPEN ISSUE: Should unification failure result in an exception being
# thrown or merely None being returned? For now, I'll assume the
# latter is appropriate.
#
# Let's start with the fact that atomic values will not unify if
# non-equal and will unify to their common value if they are.
def _unify_1(value1, value2):
if value1 == value2:
result = value1
else:
return None # can't unify
assert subsume(value1, result)
assert subsume(value2, result)
return result
assert _unify_1("np", "np") == "np"
assert _unify_1("np", "vp") == None
#
# Now let's define the corresponding function for two
# feature-structures with atomic values:
def _unify_2(structure1, structure2):
result = {}
for feat in structure1:
if feat in structure2:
u = _unify_1(structure1[feat], structure2[feat])
if u == None:
return None # can't unify
else:
result[feat] = u
else:
result[feat] = structure1[feat]
for feat in structure2:
if feat in structure1:
pass # already done it
else:
result[feat] = structure2[feat]
assert subsume(structure1, result)
assert subsume(structure2, result)
return result
fs1 = {"NUMBER": "sg"}
fs2 = {"PERSON": 3}
fs3 = {
"NUMBER" : "sg",
"PERSON" : 3
}
assert _unify_2(fs1, fs2) == _unify_2(fs2, fs1) == fs3
fs4 = {"CAT": "np"}
fs5 = {"CAT": "vp"}
assert _unify_2(fs4, fs5) == _unify_2(fs5, fs4) == None
#
# Now try to define our full function:
def _unify_3(object1, object2):
if type(object1) == type(object2) == dict:
result = {}
for feat in object1:
if feat in object2:
u = _unify_3(object1[feat], object2[feat])
if u == None:
return None # can't unify
else:
result[feat] = u
else:
result[feat] = object1[feat]
for feat in object2:
if feat in object1:
pass # already done it
else:
result[feat] = object2[feat]
else:
if object1 == object2:
result = object1
else:
return None # can't unify
if __debug__:
assert subsume(object1, result)
assert subsume(object2, result)
return result
#
# Repeating previous tests:
assert _unify_3("np", "np") == "np"
assert _unify_3("np", "vp") == None
assert _unify_3(fs1, fs2) == _unify_3(fs2, fs1) == fs3
assert _unify_3(fs4, fs5) == _unify_3(fs5, fs4) == None
#
# And new tests:
fs6 = {
"CAT": "np",
"AGR": {
"NUMBER": "sg"
}
}
fs7 = {
"CAT": "np",
"AGR": {
"PERSON": 3
}
}
fs8 = {
"CAT": "np",
"AGR": {
"NUMBER": "sg",
"PERSON": 3
}
}
assert _unify_3(fs6, fs7) == _unify_3(fs7, fs6) == fs8
fs9 = {
"AGR" : {"NUMBER": "sg"},
"SUBJ": {"AGR": {"NUMBER": "sg"}}
}
fs10 = {
"SUBJ": {"AGR": {"PERSON": 3}}
}
fs11 = {
"AGR": {"NUMBER": "sg"},
"SUBJ": {
"AGR": {
"PERSON": 3,
"NUMBER": "sg"
}
}
}
assert _unify_3(fs9, fs10) == _unify_3(fs10, fs9) == fs11
#
# But now let's try an example with rentrancy:
fs12 = {
"NUMBER": "sg"
}
fs13 = {
"AGR": fs12,
"SUBJ": {
"AGR": fs12
}
}
fs14 = _unify_3(fs13, fs10)
#
# This, in fact, fails.
try:
assert fs14["AGR"] == fs14["SUBJ"]["AGR"]
success = 1
except AssertionError:
success = 0
assert not success
#
# The reason is that we have not fully taken in to account the
# rentrancy in the process of building up our new "result" feature
# structure.
#
# One way around this is to use destructive unification, that modifies
# the existing features in-place. Each input ends up as its
# unification with the other input.
def unify(object1, object2):
if type(object1) == type(object2) == dict:
for feat in object1:
if feat in object2:
if not unify(object1[feat], object2[feat]):
return 0 # can't unify
else:
object2[feat] = object1[feat]
for feat in object2:
if feat in object1:
if not unify(object1[feat], object2[feat]):
return 0 # can't unify
else:
object1[feat] = object2[feat]
else:
if object1 != object2:
return 0 # can't unify
return 1
assert unify("np", "np")
assert not unify("np", "vp")
assert unify(fs1, fs2)
assert fs1 == fs2 == fs3
assert not unify(fs4, fs5)
assert unify(fs6, fs7)
assert fs6 == fs7 == fs8
assert unify(fs9, fs10)
assert fs9 == fs10 == fs11
fs15 = {
"SUBJ": {
"AGR": {
"PERSON": 3,
"NUMBER": "sg"
}
},
"AGR": {
"PERSON": 3,
"NUMBER": "sg"
}
}
assert unify(fs13, fs10)
assert fs13 == fs10 ==fs15
assert fs13["AGR"] == fs13["SUBJ"]["AGR"]
assert fs10["AGR"] == fs10["SUBJ"]["AGR"]
#
# Note however that reentrancy has only been maintained in the input
# to unify() that contained the reentrancy in the first place, so even
# though the two inputs end up *equal* after unification, they are not
# fully equivalent.
assert id(fs13["AGR"]) == id(fs13["SUBJ"]["AGR"])
assert not id(fs10["AGR"]) == id(fs10["SUBJ"]["AGR"])
#
# I do not see a simple way around this.
--
James Tauber
http://jtauber.com/
More information about the Python-list
mailing list