[Spambayes-checkins] website/scripts striphtml.py,1.1.1.1,1.2

Tim Peters tim_one at users.sourceforge.net
Tue Dec 16 00:06:36 EST 2003


Update of /cvsroot/spambayes/website/scripts
In directory sc8-pr-cvs1:/tmp/cvs-serv12349/website/scripts

Modified Files:
	striphtml.py 
Log Message:
Whitespace normalization.


Index: striphtml.py
===================================================================
RCS file: /cvsroot/spambayes/website/scripts/striphtml.py,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -C2 -d -r1.1.1.1 -r1.2
*** striphtml.py	19 Sep 2002 08:40:55 -0000	1.1.1.1
--- striphtml.py	16 Dec 2003 05:06:34 -0000	1.2
***************
*** 30,60 ****
      data = fi.read()
      if eheadsearch.search(data) >= 0:
! 	i = eheadsearch.regs[0][1]
! 	head, data = data[:i], data[i:]
      else:
! 	head = ""
      bstart = bodysearch.search(data)
      if bstart < 0:
! 	bstart = 0
      else:
! 	head = head + data[:bstart]
! 	bstart = bstart + len(bodysearch.group(0))
      if bannersearch.search(data) >= 0:
! 	i, j = bannersearch.regs[0]
! 	print "banner", i, j, `data[i:j]`
! 	data = data[:i] + data[j:]
      end = ebodyearch.search(data, bstart)
      if end < 0:
! 	end = len(data)
      body = string.strip(data[bstart:end])
      if titlesearch.search(head) >= 0:
! 	title = titlesearch.group(1)
      elif h1search.search(body) >= 0:
! 	title = h1search.group(1)
      else:
! 	title = ""
      if title:
! 	title = string.join(string.split(title))
! 	fo.write("Title: %s\n" % title)
      fo.write("\n")
      fo.write(body)
--- 30,60 ----
      data = fi.read()
      if eheadsearch.search(data) >= 0:
!         i = eheadsearch.regs[0][1]
!         head, data = data[:i], data[i:]
      else:
!         head = ""
      bstart = bodysearch.search(data)
      if bstart < 0:
!         bstart = 0
      else:
!         head = head + data[:bstart]
!         bstart = bstart + len(bodysearch.group(0))
      if bannersearch.search(data) >= 0:
!         i, j = bannersearch.regs[0]
!         print "banner", i, j, `data[i:j]`
!         data = data[:i] + data[j:]
      end = ebodyearch.search(data, bstart)
      if end < 0:
!         end = len(data)
      body = string.strip(data[bstart:end])
      if titlesearch.search(head) >= 0:
!         title = titlesearch.group(1)
      elif h1search.search(body) >= 0:
!         title = h1search.group(1)
      else:
!         title = ""
      if title:
!         title = string.join(string.split(title))
!         fo.write("Title: %s\n" % title)
      fo.write("\n")
      fo.write(body)
***************
*** 66,79 ****
  def makedirs(dirname):
      if os.path.exists(dirname):
! 	return 1
      head, tail = os.path.split(dirname)
      if head:
! 	if not makedirs(head):
! 	    return 0
      try:
! 	os.mkdir(dirname, 0777)
! 	return 1
      except os.error:
! 	return 0
  
  def main():
--- 66,79 ----
  def makedirs(dirname):
      if os.path.exists(dirname):
!         return 1
      head, tail = os.path.split(dirname)
      if head:
!         if not makedirs(head):
!             return 0
      try:
!         os.mkdir(dirname, 0777)
!         return 1
      except os.error:
!         return 0
  
  def main():
***************
*** 81,105 ****
      prefix = ""
      for o, a in opts:
! 	if o == "-p": prefix = a
      if not args:
! 	strip(sys.stdin, sys.stdout)
      else:
! 	for file in args:
! 	    name, ext = os.path.splitext(file)
! 	    if ext == ".htp":
! 		error("file %s is already an HTML prototype" % name)
! 		continue
! 	    sys.stderr.write("Processing %s ...\n" % file)
! 	    htpname = prefix + name + ".htp"
! 	    dirname = os.path.dirname(htpname)
! 	    if dirname:
! 		if not makedirs(dirname):
! 		    error("can't create directory %s" % dirname)
! 		    continue
! 	    fi = open(file, "r")
! 	    fo = open(htpname, "w")
! 	    strip(fi, fo)
! 	    fi.close()
! 	    fo.close()
  
  if __name__ == '__main__':
--- 81,105 ----
      prefix = ""
      for o, a in opts:
!         if o == "-p": prefix = a
      if not args:
!         strip(sys.stdin, sys.stdout)
      else:
!         for file in args:
!             name, ext = os.path.splitext(file)
!             if ext == ".htp":
!                 error("file %s is already an HTML prototype" % name)
!                 continue
!             sys.stderr.write("Processing %s ...\n" % file)
!             htpname = prefix + name + ".htp"
!             dirname = os.path.dirname(htpname)
!             if dirname:
!                 if not makedirs(dirname):
!                     error("can't create directory %s" % dirname)
!                     continue
!             fi = open(file, "r")
!             fo = open(htpname, "w")
!             strip(fi, fo)
!             fi.close()
!             fo.close()
  
  if __name__ == '__main__':





More information about the Spambayes-checkins mailing list