[Python-checkins] CVS: python/dist/src/Doc/tools/sgmlconv docfixer.py,1.25,1.26

Fri, 23 Mar 2001 09:01:50 -0800

Update of /cvsroot/python/python/dist/src/Doc/tools/sgmlconv
In directory usw-pr-cvs1:/tmp/cvs-serv30077

Modified Files:
	docfixer.py 
Log Message:

Lots of small changes to make this work with the Python DOM bindings
(minidom in particular); it was using PyDOM which is now obsolete.

Only write the output file on success -- this avoids updating the timestamp
on the file on failure, which confuses "make".

Index: docfixer.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Doc/tools/sgmlconv/docfixer.py,v
retrieving revision 1.25
retrieving revision 1.26
diff -C2 -r1.25 -r1.26
*** docfixer.py	2000/05/24 14:33:26	1.25
--- docfixer.py	2001/03/23 17:01:47	1.26
***************
*** 12,21 ****
  import string
  import sys
! import xml.dom.core

! from xml.dom.core import \
!      ELEMENT, \
!      ENTITY_REFERENCE, \
!      TEXT

--- 12,21 ----
  import string
  import sys
! import xml.dom
! import xml.dom.minidom

! ELEMENT = xml.dom.Node.ELEMENT_NODE
! ENTITY_REFERENCE = xml.dom.Node.ENTITY_REFERENCE_NODE
! TEXT = xml.dom.Node.TEXT_NODE

***************
*** 50,79 ****

- # Workaround to deal with invalid documents (multiple root elements).  This
- # does not indicate a bug in the DOM implementation.
- #
- def get_documentElement(doc):
-     docelem = None
-     for n in doc.childNodes:
-         if n.nodeType == ELEMENT:
-             docelem = n
-     return docelem
- 
- xml.dom.core.Document.get_documentElement = get_documentElement
- 
- 
- # Replace get_childNodes for the Document class; without this, children
- # accessed from the Document object via .childNodes (no matter how many
- # levels of access are used) will be given an ownerDocument of None.
- #
- def get_childNodes(doc):
-     return xml.dom.core.NodeList(doc._node.children, doc._node)
- 
- xml.dom.core.Document.get_childNodes = get_childNodes
- 
- 
  def get_first_element(doc, gi):
      for n in doc.childNodes:
!         if n.get_nodeName() == gi:
              return n

--- 50,56 ----

  def get_first_element(doc, gi):
      for n in doc.childNodes:
!         if n.nodeName == gi:
              return n

***************
*** 85,95 ****

  def find_all_elements(doc, gi):
      nodes = []
!     if doc.get_nodeName() == gi:
          nodes.append(doc)
      for child in doc.childNodes:
          if child.nodeType == ELEMENT:
!             if child.get_tagName() == gi:
                  nodes.append(child)
              for node in child.getElementsByTagName(gi):
--- 62,84 ----

+ def get_documentElement(node):
+     result = None
+     for child in node.childNodes:
+         if child.nodeType == ELEMENT:
+             result = child
+     return result
+ 
+ 
+ def set_tagName(elem, gi):
+     elem.nodeName = elem.tagName = gi
+ 
+ 
  def find_all_elements(doc, gi):
      nodes = []
!     if doc.nodeName == gi:
          nodes.append(doc)
      for child in doc.childNodes:
          if child.nodeType == ELEMENT:
!             if child.tagName == gi:
                  nodes.append(child)
              for node in child.getElementsByTagName(gi):
***************
*** 100,115 ****
      nodes = []
      for child in doc.childNodes:
!         if child.get_nodeName() == gi:
              nodes.append(child)
      return nodes

  def find_all_elements_from_set(doc, gi_set):
      return __find_all_elements_from_set(doc, gi_set, [])

  def __find_all_elements_from_set(doc, gi_set, nodes):
!     if doc.get_nodeName() in gi_set:
          nodes.append(doc)
      for child in doc.childNodes:
!         if child.get_nodeType() == ELEMENT:
              __find_all_elements_from_set(child, gi_set, nodes)
      return nodes
--- 89,105 ----
      nodes = []
      for child in doc.childNodes:
!         if child.nodeName == gi:
              nodes.append(child)
      return nodes

+ 
  def find_all_elements_from_set(doc, gi_set):
      return __find_all_elements_from_set(doc, gi_set, [])

  def __find_all_elements_from_set(doc, gi_set, nodes):
!     if doc.nodeName in gi_set:
          nodes.append(doc)
      for child in doc.childNodes:
!         if child.nodeType == ELEMENT:
              __find_all_elements_from_set(child, gi_set, nodes)
      return nodes
***************
*** 130,134 ****
      node = get_first_element(fragment, "document")
      if node is not None:
!         node._node.name = documentclass
      while 1:
          node = extract_first_element(fragment, "input")
--- 120,124 ----
      node = get_first_element(fragment, "document")
      if node is not None:
!         set_tagName(node, documentclass)
      while 1:
          node = extract_first_element(fragment, "input")
***************
*** 144,148 ****
              docelem.insertBefore(node, text)
          docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
!     while fragment.firstChild and fragment.firstChild.get_nodeType() == TEXT:
          fragment.removeChild(fragment.firstChild)

--- 134,138 ----
              docelem.insertBefore(node, text)
          docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
!     while fragment.firstChild and fragment.firstChild.nodeType == TEXT:
          fragment.removeChild(fragment.firstChild)

***************
*** 154,160 ****
          prevskip = skip
          skip = 0
!         if n.get_nodeType() == TEXT and not prevskip:
              discards.append(n)
!         elif n.get_nodeName() == "COMMENT":
              skip = 1
      for node in discards:
--- 144,150 ----
          prevskip = skip
          skip = 0
!         if n.nodeType == TEXT and not prevskip:
              discards.append(n)
!         elif n.nodeName == "COMMENT":
              skip = 1
      for node in discards:
***************
*** 178,183 ****
      children = container.childNodes
      for child in children:
!         if child.get_nodeType() == ELEMENT:
!             tagName = child.get_tagName()
              if tagName in DESCRIPTOR_ELEMENTS:
                  rewrite_descriptor(doc, child)
--- 168,173 ----
      children = container.childNodes
      for child in children:
!         if child.nodeType == ELEMENT:
!             tagName = child.tagName
              if tagName in DESCRIPTOR_ELEMENTS:
                  rewrite_descriptor(doc, child)
***************
*** 201,210 ****
      #
      # 1.
!     descname = descriptor.get_tagName()
      index = 1
      if descname[-2:] == "ni":
          descname = descname[:-2]
          descriptor.setAttribute("index", "no")
!         descriptor._node.name = descname
          index = 0
      desctype = descname[:-4] # remove 'desc'
--- 191,200 ----
      #
      # 1.
!     descname = descriptor.tagName
      index = 1
      if descname[-2:] == "ni":
          descname = descname[:-2]
          descriptor.setAttribute("index", "no")
!         set_tagName(descriptor, descname)
          index = 0
      desctype = descname[:-4] # remove 'desc'
***************
*** 220,224 ****
      descriptor.removeAttribute("name")
      # 2a.
!     if descriptor.attributes.has_key("var"):
          if descname != "opcodedesc":
              raise RuntimeError, \
--- 210,214 ----
      descriptor.removeAttribute("name")
      # 2a.
!     if descriptor.hasAttribute("var"):
          if descname != "opcodedesc":
              raise RuntimeError, \
***************
*** 246,253 ****
      pos = skip_leading_nodes(children, pos)
      while pos < len(children) \
!           and children[pos].get_nodeName() in (linename, "versionadded"):
!         if children[pos].get_tagName() == linename:
              # this is really a supplemental signature, create <signature>
!             sig = methodline_to_signature(doc, children[pos])
              newchildren.append(sig)
          else:
--- 236,248 ----
      pos = skip_leading_nodes(children, pos)
      while pos < len(children) \
!           and children[pos].nodeName in (linename, "versionadded"):
!         if children[pos].tagName == linename:
              # this is really a supplemental signature, create <signature>
!             oldchild = children[pos].cloneNode(1)
!             try:
!                 sig = methodline_to_signature(doc, children[pos])
!             except KeyError:
!                 print oldchild.toxml()
!                 raise
              newchildren.append(sig)
          else:
***************
*** 302,306 ****
      # must be called after simplfy() if document is multi-rooted to begin with
      docelem = get_documentElement(fragment)
!     toplevel = docelem.get_tagName() == "manual" and "chapter" or "section"
      appendices = 0
      nodes = []
--- 297,301 ----
      # must be called after simplfy() if document is multi-rooted to begin with
      docelem = get_documentElement(fragment)
!     toplevel = docelem.tagName == "manual" and "chapter" or "section"
      appendices = 0
      nodes = []
***************
*** 334,338 ****
              continue
          parent = label.parentNode
!         parentTagName = parent.get_tagName()
          if parentTagName == "title":
              parent.parentNode.setAttribute("id", id)
--- 329,333 ----
              continue
          parent = label.parentNode
!         parentTagName = parent.tagName
          if parentTagName == "title":
              parent.parentNode.setAttribute("id", id)
***************
*** 353,358 ****
          node = queue[0]
          del queue[0]
!         if wsmap.has_key(node.get_nodeName()):
!             ws = wsmap[node.get_tagName()]
              children = node.childNodes
              children.reverse()
--- 348,353 ----
          node = queue[0]
          del queue[0]
!         if wsmap.has_key(node.nodeName):
!             ws = wsmap[node.tagName]
              children = node.childNodes
              children.reverse()
***************
*** 362,367 ****
              children.reverse()
              # hack to get the title in place:
!             if node.get_tagName() == "title" \
!                and node.parentNode.firstChild.get_nodeType() == ELEMENT:
                  node.parentNode.insertBefore(doc.createText("\n  "),
                                               node.parentNode.firstChild)
--- 357,362 ----
              children.reverse()
              # hack to get the title in place:
!             if node.tagName == "title" \
!                and node.parentNode.firstChild.nodeType == ELEMENT:
                  node.parentNode.insertBefore(doc.createText("\n  "),
                                               node.parentNode.firstChild)
***************
*** 389,393 ****
          node = queue[0]
          del queue[0]
!         if rewrite_element(node.get_tagName()):
              children = node.childNodes
              if len(children) == 1 \
--- 384,388 ----
          node = queue[0]
          del queue[0]
!         if rewrite_element(node.tagName):
              children = node.childNodes
              if len(children) == 1 \
***************
*** 412,416 ****
              return 0
          if nodeType == ELEMENT:
!             if l.get_tagName() != r.get_tagName():
                  return 0
              # should check attributes, but that's not a problem here
--- 407,411 ----
              return 0
          if nodeType == ELEMENT:
!             if l.tagName != r.tagName:
                  return 0
              # should check attributes, but that's not a problem here
***************
*** 431,435 ****
      if node is None:
          return
!     node._node.name = "synopsis"
      lastchild = node.childNodes[-1]
      if lastchild.nodeType == TEXT \
--- 426,430 ----
      if node is None:
          return
!     set_tagName(node, "synopsis")
      lastchild = node.childNodes[-1]
      if lastchild.nodeType == TEXT \
***************
*** 438,447 ****
      modauthor = extract_first_element(section, "moduleauthor")
      if modauthor:
!         modauthor._node.name = "author"
          modauthor.appendChild(doc.createTextNode(
              modauthor.getAttribute("name")))
          modauthor.removeAttribute("name")
      platform = extract_first_element(section, "platform")
!     if section.get_tagName() == "section":
          modinfo_pos = 2
          modinfo = doc.createElement("moduleinfo")
--- 433,442 ----
      modauthor = extract_first_element(section, "moduleauthor")
      if modauthor:
!         set_tagName(modauthor, "author")
          modauthor.appendChild(doc.createTextNode(
              modauthor.getAttribute("name")))
          modauthor.removeAttribute("name")
      platform = extract_first_element(section, "platform")
!     if section.tagName == "section":
          modinfo_pos = 2
          modinfo = doc.createElement("moduleinfo")
***************
*** 468,472 ****
              children = title.childNodes
              if len(children) >= 2 \
!                and children[0].get_nodeName() == "module" \
                 and children[0].childNodes[0].data == name:
                  # this is it; morph the <title> into <short-synopsis>
--- 463,467 ----
              children = title.childNodes
              if len(children) >= 2 \
!                and children[0].nodeName == "module" \
                 and children[0].childNodes[0].data == name:
                  # this is it; morph the <title> into <short-synopsis>
***************
*** 474,478 ****
                  if first_data.data[:4] == " ---":
                      first_data.data = string.lstrip(first_data.data[4:])
!                 title._node.name = "short-synopsis"
                  if children[-1].nodeType == TEXT \
                     and children[-1].data[-1:] == ".":
--- 469,473 ----
                  if first_data.data[:4] == " ---":
                      first_data.data = string.lstrip(first_data.data[4:])
!                 set_tagName(title, "short-synopsis")
                  if children[-1].nodeType == TEXT \
                     and children[-1].data[-1:] == ".":
***************
*** 512,516 ****
          for i in range(len(children)):
              node = children[i]
!             if node.get_nodeName() == "moduleinfo":
                  nextnode = children[i+1]
                  if nextnode.nodeType == TEXT:
--- 507,511 ----
          for i in range(len(children)):
              node = children[i]
!             if node.nodeName == "moduleinfo":
                  nextnode = children[i+1]
                  if nextnode.nodeType == TEXT:
***************
*** 545,549 ****
      for child in children:
          if child.nodeType == ELEMENT:
!             tagName = child.get_tagName()
              if tagName == "hline" and prev_row is not None:
                  prev_row.setAttribute("rowsep", "1")
--- 540,544 ----
      for child in children:
          if child.nodeType == ELEMENT:
!             tagName = child.tagName
              if tagName == "hline" and prev_row is not None:
                  prev_row.setAttribute("rowsep", "1")
***************
*** 559,569 ****
          if nodeType == TEXT:
              if string.strip(child.data):
!                 raise ConversionError("unexpected free data in table")
              table.removeChild(child)
              continue
          if nodeType == ELEMENT:
!             if child.get_tagName() != "hline":
                  raise ConversionError(
!                     "unexpected <%s> in table" % child.get_tagName())
              table.removeChild(child)
              continue
--- 554,565 ----
          if nodeType == TEXT:
              if string.strip(child.data):
!                 raise ConversionError("unexpected free data in <%s>: %r"
!                                       % (table.tagName, child.data))
              table.removeChild(child)
              continue
          if nodeType == ELEMENT:
!             if child.tagName != "hline":
                  raise ConversionError(
!                     "unexpected <%s> in table" % child.tagName)
              table.removeChild(child)
              continue
***************
*** 594,598 ****
      nodes = []
      for child in source.childNodes:
!         if child.get_nodeName() == name:
              nodes.append(child)
      for node in nodes:
--- 590,594 ----
      nodes = []
      for child in source.childNodes:
!         if child.nodeName == name:
              nodes.append(child)
      for node in nodes:
***************
*** 634,638 ****
  def fixup_paras(doc, fragment):
      for child in fragment.childNodes:
!         if child.get_nodeName() in RECURSE_INTO_PARA_CONTAINERS:
              fixup_paras_helper(doc, child)
      descriptions = find_all_elements(fragment, "description")
--- 630,634 ----
  def fixup_paras(doc, fragment):
      for child in fragment.childNodes:
!         if child.nodeName in RECURSE_INTO_PARA_CONTAINERS:
              fixup_paras_helper(doc, child)
      descriptions = find_all_elements(fragment, "description")
***************
*** 646,650 ****
      start = skip_leading_nodes(children)
      while len(children) > start:
!         if children[start].get_nodeName() in RECURSE_INTO_PARA_CONTAINERS:
              # Something to recurse into:
              fixup_paras_helper(doc, children[start])
--- 642,646 ----
      start = skip_leading_nodes(children)
      while len(children) > start:
!         if children[start].nodeName in RECURSE_INTO_PARA_CONTAINERS:
              # Something to recurse into:
              fixup_paras_helper(doc, children[start])
***************
*** 669,673 ****
          nodeType = child.nodeType
          if nodeType == ELEMENT:
!             if child.get_tagName() in BREAK_ELEMENTS:
                  after = j
                  break
--- 665,669 ----
          nodeType = child.nodeType
          if nodeType == ELEMENT:
!             if child.tagName in BREAK_ELEMENTS:
                  after = j
                  break
***************
*** 743,747 ****
              # all whitespace, just skip
          elif nodeType == ELEMENT:
!             tagName = child.get_tagName()
              if tagName in RECURSE_INTO_PARA_CONTAINERS:
                  return start
--- 739,743 ----
              # all whitespace, just skip
          elif nodeType == ELEMENT:
!             tagName = child.tagName
              if tagName in RECURSE_INTO_PARA_CONTAINERS:
                  return start
***************
*** 773,777 ****
  def fixup_args(doc, arglist):
      for child in arglist.childNodes:
!         if child.get_nodeName() == "optional":
              # found it; fix and return
              arglist.insertBefore(doc.createTextNode("["), child)
--- 769,773 ----
  def fixup_args(doc, arglist):
      for child in arglist.childNodes:
!         if child.nodeName == "optional":
              # found it; fix and return
              arglist.insertBefore(doc.createTextNode("["), child)
***************
*** 790,794 ****
          section = sectauth.parentNode
          section.removeChild(sectauth)
!         sectauth._node.name = "author"
          sectauth.appendChild(doc.createTextNode(
              sectauth.getAttribute("name")))
--- 786,790 ----
          section = sectauth.parentNode
          section.removeChild(sectauth)
!         set_tagName(sectauth, "author")
          sectauth.appendChild(doc.createTextNode(
              sectauth.getAttribute("name")))
***************
*** 796,800 ****
          after = section.childNodes[2]
          title = section.childNodes[1]
!         if title.get_nodeName() != "title":
              after = section.childNodes[0]
          section.insertBefore(doc.createTextNode("\n  "), after)
--- 792,796 ----
          after = section.childNodes[2]
          title = section.childNodes[1]
!         if title.nodeName != "title":
              after = section.childNodes[0]
          section.insertBefore(doc.createTextNode("\n  "), after)
***************
*** 807,815 ****
          if child.nodeType == TEXT \
             and string.lstrip(child.data)[:3] == ">>>":
!             verbatim._node.name = "interactive-session"

  def add_node_ids(fragment, counter=0):
!     fragment._node.node_id = counter
      for node in fragment.childNodes:
          counter = counter + 1
--- 803,811 ----
          if child.nodeType == TEXT \
             and string.lstrip(child.data)[:3] == ">>>":
!             set_tagName(verbatim, "interactive-session")

  def add_node_ids(fragment, counter=0):
!     fragment.node_id = counter
      for node in fragment.childNodes:
          counter = counter + 1
***************
*** 817,821 ****
              counter = add_node_ids(node, counter)
          else:
!             node._node.node_id = counter
      return counter + 1

--- 813,817 ----
              counter = add_node_ids(node, counter)
          else:
!             node.node_id = counter
      return counter + 1

***************
*** 832,836 ****
      for node in nodes:
          parent = node.parentNode
!         d[parent._node.node_id] = parent
      del nodes
      map(fixup_refmodindexes_chunk, d.values())
--- 828,832 ----
      for node in nodes:
          parent = node.parentNode
!         d[parent.node_id] = parent
      del nodes
      map(fixup_refmodindexes_chunk, d.values())
***************
*** 839,843 ****
  def fixup_refmodindexes_chunk(container):
      # node is probably a <para>; let's see how often it isn't:
!     if container.get_tagName() != PARA_ELEMENT:
          bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container)
      module_entries = find_all_elements(container, "module")
--- 835,839 ----
  def fixup_refmodindexes_chunk(container):
      # node is probably a <para>; let's see how often it isn't:
!     if container.tagName != PARA_ELEMENT:
          bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container)
      module_entries = find_all_elements(container, "module")
***************
*** 850,854 ****
          if len(children) != 0:
              bwrite("--- unexpected number of children for %s node:\n"
!                    % entry.get_tagName())
              ewrite(entry.toxml() + "\n")
              continue
--- 846,850 ----
          if len(children) != 0:
              bwrite("--- unexpected number of children for %s node:\n"
!                    % entry.tagName)
              ewrite(entry.toxml() + "\n")
              continue
***************
*** 874,878 ****
      for node in nodes:
          parent = node.parentNode
!         d[parent._node.node_id] = parent
      del nodes
      map(fixup_bifuncindexes_chunk, d.values())
--- 870,874 ----
      for node in nodes:
          parent = node.parentNode
!         d[parent.node_id] = parent
      del nodes
      map(fixup_bifuncindexes_chunk, d.values())
***************
*** 906,910 ****
          parent = queue.pop()
          i = 0
!         children = parent.get_childNodes()
          nchildren = len(children)
          while i < (nchildren - 1):
--- 902,906 ----
          parent = queue.pop()
          i = 0
!         children = parent.childNodes
          nchildren = len(children)
          while i < (nchildren - 1):
***************
*** 915,919 ****
                      child = children[i]
                      nextchild = children[i+1]
!                     nextchildren = nextchild.get_childNodes()
                      while len(nextchildren):
                          node = nextchildren[0]
--- 911,915 ----
                      child = children[i]
                      nextchild = children[i+1]
!                     nextchildren = nextchild.childNodes
                      while len(nextchildren):
                          node = nextchildren[0]
***************
*** 933,937 ****
          nodeType = node.nodeType
          if nodeType == ELEMENT:
!             gi = node.get_tagName()
              if knownempty(gi):
                  if node.hasChildNodes():
--- 929,933 ----
          nodeType = node.nodeType
          if nodeType == ELEMENT:
!             gi = node.tagName
              if knownempty(gi):
                  if node.hasChildNodes():
***************
*** 939,944 ****
                            "declared-empty node <%s> has children" % gi
                  ofp.write("e\n")
!             for k, v in node.attributes.items():
!                 value = v.value
                  if _token_rx.match(value):
                      dtype = "TOKEN"
--- 935,939 ----
                            "declared-empty node <%s> has children" % gi
                  ofp.write("e\n")
!             for k, value in node.attributes.items():
                  if _token_rx.match(value):
                      dtype = "TOKEN"
***************
*** 952,956 ****
              ofp.write("-%s\n" % esistools.encode(node.data))
          elif nodeType == ENTITY_REFERENCE:
!             ofp.write("&%s\n" % node.get_nodeName())
          else:
              raise RuntimeError, "unsupported node type: %s" % nodeType
--- 947,951 ----
              ofp.write("-%s\n" % esistools.encode(node.data))
          elif nodeType == ENTITY_REFERENCE:
!             ofp.write("&%s\n" % node.nodeName)
          else:
              raise RuntimeError, "unsupported node type: %s" % nodeType
***************
*** 958,965 ****

  def convert(ifp, ofp):
!     p = esistools.ExtendedEsisBuilder()
!     p.feed(ifp.read())
!     doc = p.document
!     fragment = p.fragment
      normalize(fragment)
      simplify(doc, fragment)
--- 953,961 ----

  def convert(ifp, ofp):
!     events = esistools.parse(ifp)
!     toktype, doc = events.getEvent()
!     fragment = doc.createDocumentFragment()
!     events.expandNode(fragment)
! 
      normalize(fragment)
      simplify(doc, fragment)
***************
*** 995,1000 ****
      #
      d = {}
!     for gi in p.get_empties():
          d[gi] = gi
      if d.has_key("rfc"):
          del d["rfc"]
--- 991,998 ----
      #
      d = {}
!     for gi in events.parser.get_empties():
          d[gi] = gi
+     if d.has_key("author"):
+         del d["author"]
      if d.has_key("rfc"):
          del d["rfc"]
***************
*** 1020,1028 ****
      elif len(sys.argv) == 3:
          ifp = open(sys.argv[1])
!         ofp = open(sys.argv[2], "w")
      else:
          usage()
          sys.exit(2)
      convert(ifp, ofp)

--- 1018,1032 ----
      elif len(sys.argv) == 3:
          ifp = open(sys.argv[1])
!         import StringIO
!         ofp = StringIO.StringIO()
      else:
          usage()
          sys.exit(2)
      convert(ifp, ofp)
+     if len(sys.argv) == 3:
+         fp = open(sys.argv[2], "w")
+         fp.write(ofp.getvalue())
+         fp.close()
+         ofp.close()