DOM text

Richard Lewis richardlewis at
Tue Aug 30 12:17:25 CEST 2005

On Fri, 26 Aug 2005 11:43:18 +0100, "Richard Lewis"
<richardlewis at> said:
> I'm implementing a Cursor class now which keeps track of the current
> parent Element, text node and character position so that I can easily (I
> hope ;-) work out where the splitting and inserting needs to occur. Wish
> me luck!!
Sorry to revive this thread, but there's something else thats causing me
confusion now!

My cursor class is going quite well and I can insert text and element
nodes. It also has methods to 'move' the 'cursor' forward and backward
by a node at a time. It keeps the current_node in an instance variable
which is initially assigned an element from a DOM tree instance created

The problem I've come up against is when I use the next_node() method,
and the current_node is a (leaf) Text node, the nextSibling property of
current_node is None, where I know (from the document structure) that it
shouldn't be. To make matters more confusing, if I manually create an
instance of my DOM tree (interactively) and check the nextSibling of the
same Text node, it is the correct value (another Element node) while the
nextSibling property of the SectionCursor instance's current_node
property (referring to the same node) is None. I *think* it only applies
to leaf Text nodes.

Here is the *complete* code for my SectionCursor class:
(note that 'sections' are large(ish) document fragments from the main
class SectionCursor:
  def __init__(self, section_element):
    """Create a SectionCursor instance using the 'section_element' as
    the parent element."""
    self.section_element = section_element
    self.current_node = self.section_element.firstChild
    self.char_pos = 0

  def forward(self, skip=1):
    """Move the cursor forward 'skip' character positions."""
    if self.current_node.nodeType == Node.TEXT_NODE:
      self.char_pos += skip
      if self.char_pos > len(
    else: self.next_node()

  def backward(self, skip=1):
    """Move the cursor backward 'skip' character positions."""
    if self.current_node.nodeType == Node.TEXT_NODE:
      self.char_pos -= skip
      if self.char_pos < 0:
    else: self.previous_node()

  def next_node(self):
    """Move the cursor to the next node; either the first child or next
    if self.current_node.hasChildNodes():
      self.current_node = self.current_node.firstChild
    elif self.current_node.nextSibling is not None:
      self.current_node = self.current_node.nextSibling
    else: return False
    self.char_pos = 0
    return True

  def previous_node(self):
    """Move the cursor to the previous node; either the previous sibling
    or the parent."""
    if self.current_node.previousSibling is not None:
      self.current_node = self.current_node.previousSibling
    elif self.current_node.parentNode != self.section_element:
      self.current_node = self.current_node.parentNode
    else: return False
    if self.current_node.nodeType == Node.TEXT_NODE:
      self.char_pos = len( - 1
      self.char_pos = 0
    return True

  def jump_to(self, node, char_pos=0):
    """Jump to a node and character position."""
    self.current_node = node
    self.char_pos = char_pos

  def insert_node(self, ref_doc, new_node):
    """Insert a node (new_node); ref_doc is an instance of the Document
    if self.current_node.nodeType == Node.TEXT_NODE: 
      parent_node = self.current_node.parentNode
      text_node = self.current_node
      next_node = text_node.nextSibling

      preceeding_portion =
      proceeding_portion =

      parent_node.replaceChild(preceeding_portion, text_node)
      parent_node.insertBefore(new_node, next_node)
      parent_node.insertBefore(proceeding_portion, next_node)
      # where is the cursor?
      parent_node = self.current_node.parent_element
      parent_node.insertBefore(new_node, self.current_node)
      # where is the cursor?

  def append_child_node(self, ref_doc, new_node):

  def insert_element(self, ref_doc, tag_name, attrs=None):
    """Insert an element called tag_name and with the attributes in the
    attrs dictionary; ref_doc is an instance of the Document class."""
    new_element = ref_doc.createElement(tag_name)
    if attrs is not None:
      for name, value in attrs.items():
        new_element.setAttribute(name, value)
    self.insert_node(ref_doc, new_element)

  def insert_text(self, ref_doc, text):
    """Insert the text in 'text'; ref_doc is an instance of the Document
    new_text = ref_doc.createTextNode(text)
    self.insert_node(ref_doc, new_text)

  def remove_node(self):
    """Remove the current node."""
    condemned_node = self.current_node
    if not self.next_node():
    parent_node = condemned_node.parentNode
    old_child = parent_node.removeChild(condemned_node)

  def remove_text(self, ref_doc, count=None):
    """Remove count (or all) characters from the current cursor
    if self.current_node.nodeType != Node.TEXT_NODE:
      return False

    text =
    new_text = text[:self.char_pos]
    if count is not None:
      new_text += text[self.char_pos + count:]

    new_text_node = ref_doc.createTextNode(new_text)
    parent_node = self.current_node.parentNode
    self.current_node = parent_node.replaceChild(new_text_node,
    #self.char_pos = 0

I've noticed that when you print any minidom node (except a Text node)
it shows the node's memory address. But it doesn't do this with Text
nodes. Does anyone know why this is? If I assign a Text node from one
DOM tree to a variable, I don't get a copy do I? I hope I just get
another reference to the original node.


More information about the Python-list mailing list