I wonder if anybody has code to reconstruct the content definition of a DTD element. It's needed for some automatic
documentation process.
I inherited a recursive code, but it's clear that it's doing the wrong thing; it's fairly obvious that the parentheses
are being done wrongly. I think that parens ought to go before the occur processing at
"if node.occur == 'plus':", but it's also obvious that they are not always required.
> content reconstruction test
> '<!ELEMENT a (b?)>' content=( b ?)
> '<!ELEMENT a (a|b)+>' content=( a | b +)
> '<!ELEMENT a (a|b|c)+>' content=( a | b | c +)
> '<!ELEMENT a (a|b?|c)+>' content=( a | b | c *)
> '<!ELEMENT a (z)>' content=( z )
> '<!ELEMENT a (#PCDATA)>' content=(#PCDATA)
> '<!ELEMENT a (#PCDATA|b)*>' content=(#PCDATA | b *)
> '<!ELEMENT a (a,b,c)*>' content=( a , b , c *)
> '<!ELEMENT a ANY>' content=ANY
> '<!ELEMENT a EMPTY>' content=EMPTY
The code I have is as follows
def elementContent(node):
return node.name
def _contentRecur(node, elFmt=elementContent):
"""
node.type: ("element" | "seq")
node.occur: ("once" | "opt")
none.name: (str | None)
"""
s = ""
if node is None:
return s
t = node.type
left = node.left
right = node.right
if t == 'or':
if left and right:
s = f"{_contentRecur(left,elFmt=elFmt)} | {_contentRecur(right,elFmt=elFmt)}"
elif left is not None:
s = _contentRecur(left,elFmt=elFmt)
elif right is not None:
s = _contentRecur(right,elFmt=elFmt)
elif t == 'seq':
if left and right:
s = f"{_contentRecur(left,elFmt=elFmt)} , {_contentRecur(right,elFmt=elFmt)}"
elif left is not None:
s = _contentRecur(left,elFmt=elFmt)
elif right is not None:
s = _contentRecur(right,elFmt=elFmt)
elif t == "element":
s = f" {elFmt(node)} "
elif t=='pcdata':
s = '#PCDATA'
if node.occur == 'plus':
s += "+"
elif node.occur == 'opt':
s += '?'
elif node.occur=='mult':
s += '*'
return s
--
Robin Becker