reconstruct a dtd content model

I wonder if anybody has code to reconstruct the content definition of a DTD element. It's needed for some automatic documentation process. I inherited a recursive code, but it's clear that it's doing the wrong thing; it's fairly obvious that the parentheses are being done wrongly. I think that parens ought to go before the occur processing at "if node.occur == 'plus':", but it's also obvious that they are not always required.
The code I have is as follows def elementContent(node): return node.name def _contentRecur(node, elFmt=elementContent): """ node.type: ("element" | "seq") node.occur: ("once" | "opt") none.name: (str | None) """ s = "" if node is None: return s t = node.type left = node.left right = node.right if t == 'or': if left and right: s = f"{_contentRecur(left,elFmt=elFmt)} | {_contentRecur(right,elFmt=elFmt)}" elif left is not None: s = _contentRecur(left,elFmt=elFmt) elif right is not None: s = _contentRecur(right,elFmt=elFmt) elif t == 'seq': if left and right: s = f"{_contentRecur(left,elFmt=elFmt)} , {_contentRecur(right,elFmt=elFmt)}" elif left is not None: s = _contentRecur(left,elFmt=elFmt) elif right is not None: s = _contentRecur(right,elFmt=elFmt) elif t == "element": s = f" {elFmt(node)} " elif t=='pcdata': s = '#PCDATA' if node.occur == 'plus': s += "+" elif node.occur == 'opt': s += '?' elif node.occur=='mult': s += '*' return s -- Robin Becker

On 19/03/2022 09:43, Robin Becker wrote:
def elementContent(node): return node.name def _contentRecur(node, parentType, elFmt): """ node.type: ("element" | "pcdata" | "seq" | "or" ) node.occur: ("once" | "opt" | "plus" | "mult") none.name: (str | None) """ s = "" if node is None: return s t = node.type occur = node.occur if t == "element": s = f"{elFmt(node)}" elif t=='pcdata': s = '#PCDATA' else: right = node.right left = node.left if t == 'or': if left and right: s = f"{_contentRecur(left,t,elFmt)} | {_contentRecur(right,t,elFmt)}" if parentType!=t: s = f"({s})" elif left is not None: s = _contentRecur(left,t,elFmt) elif right is not None: s = _contentRecur(right,t,elFmt) elif t == 'seq': if left and right: s = f"{_contentRecur(left,t,elFmt)}, {_contentRecur(right,t,elFmt)}" if parentType!=t: s = f"({s})" elif left is not None: s = _contentRecur(left,t,elFmt) elif right is not None: s = _contentRecur(right,t,elFmt) if occur == 'plus': s += "+" elif occur == 'opt': s += '?' elif occur=='mult': s += '*' return s def content(el): if el.type=='pcdata': return '(#PCDATA)' elif el.type=='empty': return 'EMPTY' elif el.type=='any': return 'ANY' s = f"{_contentRecur(el.content,None,elementContent)}" if s[0]!='(': s = f"({s})" return s -- Robin Becker

On 19/03/2022 09:43, Robin Becker wrote:
def elementContent(node): return node.name def _contentRecur(node, parentType, elFmt): """ node.type: ("element" | "pcdata" | "seq" | "or" ) node.occur: ("once" | "opt" | "plus" | "mult") none.name: (str | None) """ s = "" if node is None: return s t = node.type occur = node.occur if t == "element": s = f"{elFmt(node)}" elif t=='pcdata': s = '#PCDATA' else: right = node.right left = node.left if t == 'or': if left and right: s = f"{_contentRecur(left,t,elFmt)} | {_contentRecur(right,t,elFmt)}" if parentType!=t: s = f"({s})" elif left is not None: s = _contentRecur(left,t,elFmt) elif right is not None: s = _contentRecur(right,t,elFmt) elif t == 'seq': if left and right: s = f"{_contentRecur(left,t,elFmt)}, {_contentRecur(right,t,elFmt)}" if parentType!=t: s = f"({s})" elif left is not None: s = _contentRecur(left,t,elFmt) elif right is not None: s = _contentRecur(right,t,elFmt) if occur == 'plus': s += "+" elif occur == 'opt': s += '?' elif occur=='mult': s += '*' return s def content(el): if el.type=='pcdata': return '(#PCDATA)' elif el.type=='empty': return 'EMPTY' elif el.type=='any': return 'ANY' s = f"{_contentRecur(el.content,None,elementContent)}" if s[0]!='(': s = f"({s})" return s -- Robin Becker
participants (1)
-
Robin Becker