[XML-SIG] Bookmark parsers
Lars Marius Garshol
larsga@ifi.uio.no
Sat, 05 Sep 1998 16:37:12 +0200
Here are some scripts to convert from MSIE, Opera and Netscape bookmarks
to Opera, Netscape and XBEL. There's hardly any support for created, visited
and modified. Fredriks code has been looted to get the MSIE support.
Testing has been minimal so far.
(adr_parse.py)
"""
Small utility to parse Opera bookmark files.
"""
import string,bookmark
# --- Constants
short_months={"Jan":"01","Feb":"02","Mar":"03","Apr":"04","May":"05",
"Jun":"06","Jul":"07","Aug":"08","Sep":"09","Oct":"10",
"Nov":"11","Dec":"12"}
# --- Parsing exception
class OperaParseException(Exception):
pass
# --- Methods
def readfield(infile,fieldname):
line=infile.readline()
pos=string.find(line,fieldname+"=")
if pos==-1:
raise OperaParseException("Field '%s' missing" % fieldname)
return line[pos+len(fieldname)+1:-1]
def swallow_rest(infile):
"Reads input until first blank line."
while 1:
line=infile.readline()
if line=="" or line=="\n": break
def parse_date(date):
# CREATED=904923783 (Fri Sep 04 17:43:03 1998)
# VISITED=0 (?)
lp=string.find(date,"(")
rp=string.find(date,")")
if lp==-1 or rp==-1:
raise OperaParseException("Date without parentheses")
if date[lp:rp+1]=="(?)":
return None
month=short_months[date[lp+5:lp+8]]
day=date[lp+9:lp+11]
year=date[rp-4:rp]
return "%s%s%s" % (year,month,day)
def parse_adr(filename):
bms=bookmark.Bookmarks()
infile=open(filename)
version=infile.readline()
while 1:
line=infile.readline()
if line=="": break
if line[:-1]=="#FOLDER":
name=readfield(infile,"NAME")
created=parse_date(readfield(infile,"CREATED"))
visited=parse_date(readfield(infile,"VISITED"))
order=readfield(infile,"ORDER")
swallow_rest(infile)
bms.add_folder(name,created,visited)
elif line[:-1]=="#URL":
name=readfield(infile,"NAME")
url=readfield(infile,"URL")
created=parse_date(readfield(infile,"CREATED"))
visited=parse_date(readfield(infile,"VISITED"))
order=readfield(infile,"ORDER")
swallow_rest(infile)
bms.add_bookmark(name,created,visited,url)
elif line[:-1]=="-":
bms.leave_folder()
return bms
# --- Test-program
bms=parse_adr(r"c:\programfiler\opera\opera3.adr")
bms.dump_netscape()
(msie_parse.py)
"""
Small utility to convert MSIE favourites to an object structure.
Originally written by Fredrik Lundh.
"""
import bookmark,os,string
DIR = "Favoritter" # Norwegian version
#USRDIR = os.environ["USERPROFILE"] # NT version
USRDIR = r"c:\windows" # 95 version
class MSIE:
# internet explorer
def __init__(self,bookmarks):
# FIXME: use registry for this!
self.bms=bookmarks
self.root = None
self.path = os.path.join(USRDIR, DIR)
self.__walk()
def __walk(self, subpath=[]):
# traverse favourites folder
path = os.path.join(self.path, string.join(subpath, os.sep))
for file in os.listdir(path):
fullname = os.path.join(path, file)
if os.path.isdir(fullname):
self.bms.add_folder(file,None,None)
self.__walk(subpath + [file])
else:
url = self.__geturl(fullname)
if url:
self.bms.add_bookmark(os.path.splitext(file)[0],None,
None,url)
def __geturl(self, file):
try:
fp = open(file)
if fp.readline() != "[InternetShortcut]\n":
return None
while 1:
s = fp.readline()
if not s:
break
if s[:4] == "URL=":
return s[4:-1]
except IOError:
pass
return None
# --- Testprogram
msie=MSIE(bookmark.Bookmarks())
msie.bms.dump_xbel()
(ns_parse.py)
"""
Small utility that parses Netscape bookmarks.
"""
from xml.sax import saxexts,saxlib
import bookmark
# --- SAX handler for Netscape bookmarks
class NetscapeHandler(saxlib.HandlerBase):
def __init__(self):
self.bms=bookmark.Bookmarks()
self.cur_elem=None
self.added=None
self.url=None
self.visited=None
self.last_modified=None
def startElement(self,name,attrs):
if name=="h3":
self.cur_elem="h3"
self.added=attrs["add_date"]
elif name=="a":
self.cur_elem="a"
self.added=attrs["add_date"]
self.url=attrs["href"]
self.visited=attrs["last_visit"]
self.last_modified=attrs["last_modified"]
def characters(self,data,start,length):
if self.cur_elem=="h3":
self.bms.add_folder(data[start:start+length],None,None)
elif self.cur_elem=="a":
self.bms.add_bookmark(data[start:start+length],None,None,self.url)
def endElement(self,name):
if name=="h3":
self.cur_elem=None
elif name=="dl":
self.bms.leave_folder()
elif name=="a":
self.cur_elem=None
# --- Main program
ns_handler=NetscapeHandler()
p=saxexts.SGMLParserFactory.make_parser()
p.setDocumentHandler(ns_handler)
p.parseFile(open(r"h:/internet/netscape/bookmark.htm"))
ns_handler.bms.dump_netscape()
(bookmark.py)
"""
Classes to store bookmarks and dump them to XBEL.
"""
import sys,string
# --- Class for bookmark container
class Bookmarks:
def __init__(self):
self.folders=[]
self.folder_stack=[]
def add_folder(self,name,created,visited):
nf=Folder(name,created,visited)
if self.folder_stack==[]:
self.folders.append(nf)
else:
self.folder_stack[-1].add_child(nf)
self.folder_stack.append(nf)
def add_bookmark(self,name,created,visited,url):
nb=Bookmark(name,created,visited,url)
if self.folder_stack!=[]:
self.folder_stack[-1].add_child(nb)
else:
self.folders.append(nb)
def leave_folder(self):
if self.folder_stack!=[]:
del self.folder_stack[-1]
def dump_xbel(self,out=sys.stdout):
out.write("<XBEL>\n")
for folder in self.folders:
folder.dump_xbel(out)
out.write("<XBEL>")
def dump_adr(self,out=sys.stdout):
out.write("Opera Hotlist version 2.0\n\n")
for folder in self.folders:
folder.dump_adr(out)
def dump_netscape(self,out=sys.stdout):
out.write("<!DOCTYPE NETSCAPE-Bookmark-file-1>\n")
out.write("<!-- This is an automatically generated file.\n")
out.write("It will be read and overwritten.\n")
out.write("Do Not Edit! -->\n")
out.write("<TITLE>Skriv HELE NAVNET her's Bookmarks</TITLE>\n")
out.write("<H1>Skriv HELE NAVNET her's Bookmarks</H1>\n\n")
out.write("<DL><p>\n")
for folder in self.folders:
folder.dump_netscape(out)
out.write("</DL><p>\n")
# --- Superclass for folder and bookmarks
class Node:
def __init__(self,name,created,visited):
self.name=name
self.created=created
self.visited=visited
# --- Class for folders
class Folder(Node):
def __init__(self,name,created,visited):
Node.__init__(self,name,created,visited)
self.children=[]
def add_child(self,child):
self.children.append(child)
def dump_xbel(self,out):
out.write(" <NODE>\n")
out.write(" <NAME>%s</NAME>\n" % self.name)
for child in self.children:
child.dump_xbel(out)
out.write(" </NODE>\n")
def dump_adr(self,out):
out.write("#FOLDER\n")
out.write("\tNAME=%s\n" % self.name)
out.write("\tCREATED=%s\n" % "0 (?)")
out.write("\tVISITED=%s\n" % "0 (?)")
out.write("\tORDER=-1\n")
out.write("\n")
for child in self.children:
child.dump_adr(out)
out.write("\n")
out.write("-\n")
def dump_netscape(self,out):
out.write(" <DT><H3 FOLDED>%s</H3>\n" % self.name)
out.write(" <DL><p>\n")
for child in self.children:
child.dump_netscape(out)
out.write(" </DL><p>\n")
# --- Class for bookmarks
class Bookmark(Node):
def __init__(self,name,created,visited,url):
Node.__init__(self,name,created,visited)
self.url=url
def dump_xbel(self,out):
out.write(" <BOOKMARK>\n")
out.write(" <NAME>%s</NAME>\n" % self.name)
out.write(" <URL>%s</URL>\n" % self.url)
if self.created!=None:
out.write(" <ADDED>%s</ADDED>\n" % self.created)
if self.visited!=None:
out.write(" <VISITED>%s</VISITED>\n" % self.visited)
out.write(" </BOOKMARK\n")
def dump_adr(self,out):
out.write("#URL\n")
out.write("\tNAME=%s\n" % self.name)
out.write("\tURL=%s\n" % self.url)
out.write("\tCREATED=%s\n" % "0 (?)")
out.write("\tVISITED=%s\n" % "0 (?)")
out.write("\tORDER=-1\n")
out.write("\n")
def dump_netscape(self,out):
out.write(" <DT><A HREF=\"%s\">%s</A>\n" % (self.url,self.name))
--Lars M.