[XML-SIG] Bookmark parsers

Lars Marius Garshol larsga@ifi.uio.no
Sat, 05 Sep 1998 16:37:12 +0200


Here are some scripts to convert from MSIE, Opera and Netscape bookmarks
to Opera, Netscape and XBEL. There's hardly any support for created, visited
and modified. Fredriks code has been looted to get the MSIE support.

Testing has been minimal so far.

(adr_parse.py)

"""
Small utility to parse Opera bookmark files.
"""

import string,bookmark

# --- Constants

short_months={"Jan":"01","Feb":"02","Mar":"03","Apr":"04","May":"05",
              "Jun":"06","Jul":"07","Aug":"08","Sep":"09","Oct":"10",
              "Nov":"11","Dec":"12"}

# --- Parsing exception

class OperaParseException(Exception):
    pass

# --- Methods
        
def readfield(infile,fieldname):
    line=infile.readline()
    pos=string.find(line,fieldname+"=")
    if pos==-1:
        raise OperaParseException("Field '%s' missing" % fieldname)

    return line[pos+len(fieldname)+1:-1]

def swallow_rest(infile):
    "Reads input until first blank line."
    while 1:
        line=infile.readline()
        if line=="" or line=="\n": break

def parse_date(date):
    # CREATED=904923783 (Fri Sep 04 17:43:03 1998)
    # VISITED=0 (?)
    lp=string.find(date,"(")
    rp=string.find(date,")")
    if lp==-1 or rp==-1:
        raise OperaParseException("Date without parentheses")

    if date[lp:rp+1]=="(?)":
        return None

    month=short_months[date[lp+5:lp+8]]
    day=date[lp+9:lp+11]
    year=date[rp-4:rp]

    return "%s%s%s" % (year,month,day)

def parse_adr(filename):
    bms=bookmark.Bookmarks()
    
    infile=open(filename)
    version=infile.readline()

    while 1:
        line=infile.readline()
        if line=="": break
        
        if line[:-1]=="#FOLDER":
            name=readfield(infile,"NAME")
            created=parse_date(readfield(infile,"CREATED"))
            visited=parse_date(readfield(infile,"VISITED"))
            order=readfield(infile,"ORDER")
            swallow_rest(infile)

            bms.add_folder(name,created,visited)
        elif line[:-1]=="#URL":
            name=readfield(infile,"NAME")
            url=readfield(infile,"URL")
            created=parse_date(readfield(infile,"CREATED"))
            visited=parse_date(readfield(infile,"VISITED"))
            order=readfield(infile,"ORDER")
            swallow_rest(infile)

            bms.add_bookmark(name,created,visited,url)
        elif line[:-1]=="-":
            bms.leave_folder()

    return bms

# --- Test-program

bms=parse_adr(r"c:\programfiler\opera\opera3.adr")
bms.dump_netscape()

(msie_parse.py)

"""
Small utility to convert MSIE favourites to an object structure.

Originally written by Fredrik Lundh.
"""

import bookmark,os,string

DIR = "Favoritter" # Norwegian version

#USRDIR = os.environ["USERPROFILE"] # NT version
USRDIR = r"c:\windows" # 95 version

class MSIE:
    # internet explorer

    def __init__(self,bookmarks):
        # FIXME: use registry for this!

        self.bms=bookmarks
        self.root = None
        self.path = os.path.join(USRDIR, DIR)

        self.__walk()

    def __walk(self, subpath=[]):
        # traverse favourites folder
        path = os.path.join(self.path, string.join(subpath, os.sep))
        for file in os.listdir(path):
            fullname = os.path.join(path, file)
            if os.path.isdir(fullname):
                self.bms.add_folder(file,None,None)
                self.__walk(subpath + [file])
            else:
                url = self.__geturl(fullname)
                if url:
                    self.bms.add_bookmark(os.path.splitext(file)[0],None,
                                          None,url)

    def __geturl(self, file):
        try:
            fp = open(file)
            if fp.readline() != "[InternetShortcut]\n":
                return None
            while 1:
                s = fp.readline()
                if not s:
                    break
                if s[:4] == "URL=":
                    return s[4:-1]
        except IOError:
            pass
        return None

# --- Testprogram
    
msie=MSIE(bookmark.Bookmarks())
msie.bms.dump_xbel()

(ns_parse.py)

"""
Small utility that parses Netscape bookmarks.
"""

from xml.sax import saxexts,saxlib
import bookmark

# --- SAX handler for Netscape bookmarks

class NetscapeHandler(saxlib.HandlerBase):

    def __init__(self):
        self.bms=bookmark.Bookmarks()
        self.cur_elem=None
        self.added=None
        self.url=None
        self.visited=None
        self.last_modified=None

    def startElement(self,name,attrs):
        if name=="h3":
            self.cur_elem="h3"
            self.added=attrs["add_date"]
        elif name=="a":
            self.cur_elem="a"
            self.added=attrs["add_date"]
            self.url=attrs["href"]
            self.visited=attrs["last_visit"]
            self.last_modified=attrs["last_modified"]            

    def characters(self,data,start,length):
        if self.cur_elem=="h3":
            self.bms.add_folder(data[start:start+length],None,None)
        elif self.cur_elem=="a":
            self.bms.add_bookmark(data[start:start+length],None,None,self.url)
            
    def endElement(self,name):
        if name=="h3":
            self.cur_elem=None
        elif name=="dl":
            self.bms.leave_folder()
        elif name=="a":
            self.cur_elem=None

# --- Main program

ns_handler=NetscapeHandler()

p=saxexts.SGMLParserFactory.make_parser()
p.setDocumentHandler(ns_handler)
p.parseFile(open(r"h:/internet/netscape/bookmark.htm"))

ns_handler.bms.dump_netscape()

(bookmark.py)

"""
Classes to store bookmarks and dump them to XBEL.
"""

import sys,string

# --- Class for bookmark container

class Bookmarks:

    def __init__(self):
        self.folders=[]
        self.folder_stack=[]

    def add_folder(self,name,created,visited):
        nf=Folder(name,created,visited)
        if self.folder_stack==[]:
            self.folders.append(nf)
        else:
            self.folder_stack[-1].add_child(nf)

        self.folder_stack.append(nf)

    def add_bookmark(self,name,created,visited,url):
        nb=Bookmark(name,created,visited,url)

        if self.folder_stack!=[]:
            self.folder_stack[-1].add_child(nb)
        else:
            self.folders.append(nb)
        
    def leave_folder(self):
        if self.folder_stack!=[]:
            del self.folder_stack[-1]

    def dump_xbel(self,out=sys.stdout):
        out.write("<XBEL>\n")
        for folder in self.folders:
            folder.dump_xbel(out)
        out.write("<XBEL>")

    def dump_adr(self,out=sys.stdout):
        out.write("Opera Hotlist version 2.0\n\n")
        for folder in self.folders:
            folder.dump_adr(out)

    def dump_netscape(self,out=sys.stdout):
        out.write("<!DOCTYPE NETSCAPE-Bookmark-file-1>\n")
        out.write("<!-- This is an automatically generated file.\n")
        out.write("It will be read and overwritten.\n")
        out.write("Do Not Edit! -->\n")
        out.write("<TITLE>Skriv HELE NAVNET her's Bookmarks</TITLE>\n")
        out.write("<H1>Skriv HELE NAVNET her's Bookmarks</H1>\n\n")

        out.write("<DL><p>\n")
        for folder in self.folders:
            folder.dump_netscape(out)
        out.write("</DL><p>\n")

# --- Superclass for folder and bookmarks
        
class Node:

    def __init__(self,name,created,visited):
        self.name=name
        self.created=created
        self.visited=visited

# --- Class for folders
    
class Folder(Node):

    def __init__(self,name,created,visited):
        Node.__init__(self,name,created,visited)
        self.children=[]

    def add_child(self,child):
        self.children.append(child)

    def dump_xbel(self,out):
        out.write("  <NODE>\n")
        out.write("    <NAME>%s</NAME>\n" % self.name)
        for child in self.children:
            child.dump_xbel(out)
        out.write("  </NODE>\n")

    def dump_adr(self,out):
        out.write("#FOLDER\n")
        out.write("\tNAME=%s\n" % self.name)
        out.write("\tCREATED=%s\n" % "0 (?)")
        out.write("\tVISITED=%s\n" % "0 (?)")
        out.write("\tORDER=-1\n")
        out.write("\n")

        for child in self.children:
            child.dump_adr(out)

        out.write("\n")
        out.write("-\n")

    def dump_netscape(self,out):
        out.write("  <DT><H3 FOLDED>%s</H3>\n" % self.name)
        out.write("  <DL><p>\n")

        for child in self.children:
            child.dump_netscape(out)

        out.write("  </DL><p>\n")

# --- Class for bookmarks
        
class Bookmark(Node):

    def __init__(self,name,created,visited,url):
        Node.__init__(self,name,created,visited)
        self.url=url

    def dump_xbel(self,out):
        out.write("  <BOOKMARK>\n")
        out.write("    <NAME>%s</NAME>\n" % self.name)
        out.write("    <URL>%s</URL>\n" % self.url)

        if self.created!=None:
            out.write("    <ADDED>%s</ADDED>\n" % self.created)

        if self.visited!=None:
            out.write("    <VISITED>%s</VISITED>\n" % self.visited)
            
        out.write("  </BOOKMARK\n")

    def dump_adr(self,out):
        out.write("#URL\n")
        out.write("\tNAME=%s\n" % self.name)
        out.write("\tURL=%s\n" % self.url)
        out.write("\tCREATED=%s\n" % "0 (?)")
        out.write("\tVISITED=%s\n" % "0 (?)")
        out.write("\tORDER=-1\n")
        out.write("\n")

    def dump_netscape(self,out):
        out.write("    <DT><A HREF=\"%s\">%s</A>\n" % (self.url,self.name))

--Lars M.