[Python-checkins] r61394 - doctools/trunk/sphinx/builder.py doctools/trunk/sphinx/linkcheck.py
georg.brandl
python-checkins at python.org
Sat Mar 15 00:47:31 CET 2008
Author: georg.brandl
Date: Sat Mar 15 00:47:30 2008
New Revision: 61394
Added:
doctools/trunk/sphinx/linkcheck.py
Modified:
doctools/trunk/sphinx/builder.py
Log:
Move link checker to its own file. Use different user-agent to enable Wikipedia lookup.
Modified: doctools/trunk/sphinx/builder.py
==============================================================================
--- doctools/trunk/sphinx/builder.py (original)
+++ doctools/trunk/sphinx/builder.py Sat Mar 15 00:47:30 2008
@@ -5,7 +5,7 @@
Builder classes for different output formats.
- :copyright: 2007-2008 by Georg Brandl, Thomas Lamb.
+ :copyright: 2007-2008 by Georg Brandl.
:license: BSD.
"""
@@ -13,11 +13,9 @@
import time
import codecs
import shutil
-import socket
import cPickle as pickle
from os import path
from cgi import escape
-from urllib2 import urlopen, HTTPError
from docutils import nodes
from docutils.io import StringOutput, FileOutput, DocTreeInput
@@ -891,108 +889,7 @@
pass
-class CheckExternalLinksBuilder(Builder):
- """
- Checks for broken external links.
- """
- name = 'linkcheck'
-
- def init(self):
- self.good = set()
- self.broken = {}
- self.redirected = {}
- # set a timeout for non-responding servers
- socket.setdefaulttimeout(5.0)
- # create output file
- open(path.join(self.outdir, 'output.txt'), 'w').close()
-
- def get_target_uri(self, docname, typ=None):
- return ''
-
- def get_outdated_docs(self):
- return self.env.all_docs
-
- def prepare_writing(self, docnames):
- return
-
- def write_doc(self, docname, doctree):
- self.info()
- for node in doctree.traverse(nodes.reference):
- try:
- self.check(node, docname)
- except KeyError:
- continue
-
- def check(self, node, docname):
- uri = node['refuri']
-
- if '#' in uri:
- uri = uri.split('#')[0]
-
- if uri in self.good:
- return
-
- if uri[0:5] == 'http:' or uri[0:6] == 'https:':
- self.info(uri, nonl=1)
- lineno = None
- while lineno is None and node:
- node = node.parent
- lineno = node.line
-
- if uri in self.broken:
- (r, s) = self.broken[uri]
- elif uri in self.redirected:
- (r, s) = self.redirected[uri]
- else:
- (r, s) = self.resolve(uri)
-
- if r == 0:
- self.info(' - ' + darkgreen('working'))
- self.good.add(uri)
- elif r == 2:
- self.info(' - ' + red('broken: ') + s)
- self.broken[uri] = (r, s)
- self.write_entry('broken', docname, lineno, uri + ': ' + s)
- else:
- self.info(' - ' + purple('redirected') + ' to ' + s)
- self.redirected[uri] = (r, s)
- self.write_entry('redirected', docname, lineno, uri + ' to ' + s)
-
- elif len(uri) == 0 or uri[0:7] == 'mailto:' or uri[0:4] == 'ftp:':
- return
- else:
- self.info(uri + ' - ' + red('malformed!'))
- self.write_entry('malformed', docname, lineno, uri)
-
- return
-
- def write_entry(self, what, docname, line, uri):
- output = open(path.join(self.outdir, 'output.txt'), 'a')
- output.write("%s:%s [%s] %s\n" % (self.env.doc2path(docname, None),
- line, what, uri))
- output.close()
-
- def resolve(self, uri):
- try:
- f = urlopen(uri)
- f.close()
- except HTTPError, err:
- if err.code == 403 and uri.startswith('http://en.wikipedia.org/'):
- # Wikipedia blocks requests from urllib User-Agent
- return (0, 0)
- return (2, str(err))
- except Exception, err:
- return (2, str(err))
- if f.url.rstrip('/') == uri.rstrip('/'):
- return (0, 0)
- else:
- return (1, f.url)
-
- def finish(self):
- return
-
-
-
+from sphinx.linkcheck import CheckExternalLinksBuilder
builtin_builders = {
'html': StandaloneHTMLBuilder,
Added: doctools/trunk/sphinx/linkcheck.py
==============================================================================
--- (empty file)
+++ doctools/trunk/sphinx/linkcheck.py Sat Mar 15 00:47:30 2008
@@ -0,0 +1,124 @@
+# -*- coding: utf-8 -*-
+"""
+ sphinx.linkcheck
+ ~~~~~~~~~~~~~~~~
+
+ The CheckExternalLinksBuilder class.
+
+ :copyright: 2008 by Georg Brandl, Thomas Lamb.
+ :license: BSD.
+"""
+
+import socket
+from os import path
+from urllib2 import build_opener, HTTPError
+
+from docutils import nodes
+
+from sphinx.builder import Builder
+from sphinx.util.console import bold, purple, red, darkgreen
+
+# create an opener that will simulate a browser user-agent
+opener = build_opener()
+opener.addheaders = [('User-agent', 'Mozilla/5.0')]
+
+
+class CheckExternalLinksBuilder(Builder):
+ """
+ Checks for broken external links.
+ """
+ name = 'linkcheck'
+
+ def init(self):
+ self.good = set()
+ self.broken = {}
+ self.redirected = {}
+ # set a timeout for non-responding servers
+ socket.setdefaulttimeout(5.0)
+ # create output file
+ open(path.join(self.outdir, 'output.txt'), 'w').close()
+
+ def get_target_uri(self, docname, typ=None):
+ return ''
+
+ def get_outdated_docs(self):
+ return self.env.all_docs
+
+ def prepare_writing(self, docnames):
+ return
+
+ def write_doc(self, docname, doctree):
+ self.info()
+ for node in doctree.traverse(nodes.reference):
+ try:
+ self.check(node, docname)
+ except KeyError:
+ continue
+
+ def check(self, node, docname):
+ uri = node['refuri']
+
+ if '#' in uri:
+ uri = uri.split('#')[0]
+
+ if uri in self.good:
+ return
+
+ if uri[0:5] == 'http:' or uri[0:6] == 'https:':
+ self.info(uri, nonl=1)
+ lineno = None
+ while lineno is None and node:
+ node = node.parent
+ lineno = node.line
+
+ if uri in self.broken:
+ (r, s) = self.broken[uri]
+ elif uri in self.redirected:
+ (r, s) = self.redirected[uri]
+ else:
+ (r, s) = self.resolve(uri)
+
+ if r == 0:
+ self.info(' - ' + darkgreen('working'))
+ self.good.add(uri)
+ elif r == 2:
+ self.info(' - ' + red('broken: ') + s)
+ self.broken[uri] = (r, s)
+ self.write_entry('broken', docname, lineno, uri + ': ' + s)
+ else:
+ self.info(' - ' + purple('redirected') + ' to ' + s)
+ self.redirected[uri] = (r, s)
+ self.write_entry('redirected', docname, lineno, uri + ' to ' + s)
+
+ elif len(uri) == 0 or uri[0:7] == 'mailto:' or uri[0:4] == 'ftp:':
+ return
+ else:
+ self.info(uri + ' - ' + red('malformed!'))
+ self.write_entry('malformed', docname, lineno, uri)
+
+ return
+
+ def write_entry(self, what, docname, line, uri):
+ output = open(path.join(self.outdir, 'output.txt'), 'a')
+ output.write("%s:%s [%s] %s\n" % (self.env.doc2path(docname, None),
+ line, what, uri))
+ output.close()
+
+ def resolve(self, uri):
+ try:
+ f = opener.open(uri)
+ f.close()
+ except HTTPError, err:
+ #if err.code == 403 and uri.startswith('http://en.wikipedia.org/'):
+ # # Wikipedia blocks requests from urllib User-Agent
+ # return (0, 0)
+ return (2, str(err))
+ except Exception, err:
+ return (2, str(err))
+ if f.url.rstrip('/') == uri.rstrip('/'):
+ return (0, 0)
+ else:
+ return (1, f.url)
+
+ def finish(self):
+ return
More information about the Python-checkins
mailing list