[Python-checkins] r66571 - in doctools/trunk/sphinx: builder.py search.py static/searchtools.js

armin.ronacher python-checkins at python.org
Tue Sep 23 23:21:33 CEST 2008


Author: armin.ronacher
Date: Tue Sep 23 23:21:32 2008
New Revision: 66571

Log:
Improved search slightly by adding keyword based lookup



Modified:
   doctools/trunk/sphinx/builder.py
   doctools/trunk/sphinx/search.py
   doctools/trunk/sphinx/static/searchtools.js

Modified: doctools/trunk/sphinx/builder.py
==============================================================================
--- doctools/trunk/sphinx/builder.py	(original)
+++ doctools/trunk/sphinx/builder.py	Tue Sep 23 23:21:32 2008
@@ -386,7 +386,7 @@
     def prepare_writing(self, docnames):
         from sphinx.search import IndexBuilder
 
-        self.indexer = IndexBuilder()
+        self.indexer = IndexBuilder(self.env)
         self.load_indexer(docnames)
         self.docwriter = HTMLWriter(self)
         self.docsettings = OptionParser(

Modified: doctools/trunk/sphinx/search.py
==============================================================================
--- doctools/trunk/sphinx/search.py	(original)
+++ doctools/trunk/sphinx/search.py	Tue Sep 23 23:21:32 2008
@@ -87,7 +87,8 @@
         'pickle':   pickle
     }
 
-    def __init__(self):
+    def __init__(self, env):
+        self.env = env
         self._stemmer = Stemmer()
         # filename -> title
         self._titles = {}
@@ -110,19 +111,28 @@
             format = self.formats[format]
         format.dump(self.freeze(), stream)
 
+    def get_keyword_map(self):
+        """Return a dict of all keywords."""
+        rv = {}
+        for kw, (ref, _, _, _) in self.env.modules.iteritems():
+            rv[kw] = (ref, 'module', 'module-' + kw)
+        for kw, (ref, ref_type) in self.env.descrefs.iteritems():
+            rv[kw] = (ref, ref_type, kw)
+        return rv
+
     def freeze(self):
-        """
-        Create a useable data structure. You can pass this output
-        to the `SearchFrontend` to search the index.
-        """
-        fns, titles = self._titles.keys(), self._titles.values()
-        fn2index = dict((f, i) for (i, f) in enumerate(fns))
-        return [
-            fns,
-            titles,
-            dict((k, [fn2index[fn] for fn in v])
-                 for (k, v) in self._mapping.iteritems()),
-        ]
+        """Create a useable data structure for serializing."""
+        filenames = self._titles.keys()
+        titles = self._titles.values()
+        fn2index = dict((f, i) for (i, f) in enumerate(filenames))
+        return dict(
+            filenames=filenames,
+            titles=titles,
+            terms=dict((k, [fn2index[fn] for fn in v])
+                       for (k, v) in self._mapping.iteritems()),
+            keywords=dict((k, (fn2index[v[0]],) + v[1:]) for k, v in
+                          self.get_keyword_map().iteritems())
+        )
 
     def prune(self, filenames):
         """Remove data for all filenames not in the list."""
@@ -147,45 +157,6 @@
 
         for word in word_re.findall(title):
             add_term(word)
-            add_term(word, 'T')
 
         for word in visitor.found_words:
             add_term(word)
-
-
-class SearchFrontend(object):
-    """
-    This class acts as a frontend for the search index. It can search
-    a searchindex as provided by `IndexBuilder`.
-    """
-
-    def __init__(self, index):
-        self.filenames, self.titles, self.words = index
-        self._stemmer = Stemmer()
-
-    def query(self, required, excluded):
-        file_map = {}
-        for word in required:
-            if word not in self.words:
-                break
-            for fid in self.words[word]:
-                file_map.setdefault(fid, set()).add(word)
-
-        return sorted(((self.filenames[fid], self.titles[fid])
-            for fid, words in file_map.iteritems()
-            if len(words) == len(required) and not
-               any(fid in self.words.get(word, ()) for word in excluded)
-        ), key=lambda x: x[1].lower())
-
-    def search(self, searchstring):
-        required = set()
-        excluded = set()
-        for word in searchstring.split():
-            if word.startswith('-'):
-                storage = excluded
-                word = word[1:]
-            else:
-                storage = required
-            storage.add(self._stemmer.stem(word))
-
-        return self.query(required, excluded)

Modified: doctools/trunk/sphinx/static/searchtools.js
==============================================================================
--- doctools/trunk/sphinx/static/searchtools.js	(original)
+++ doctools/trunk/sphinx/static/searchtools.js	Tue Sep 23 23:21:32 2008
@@ -294,6 +294,7 @@
     var excluded = [];
     var hlwords = [];
     var tmp = query.split(/\s+/);
+    var keyword = (tmp.length == 1) ? tmp[0] : null;
     for (var i = 0; i < tmp.length; i++) {
       // stem the word
       var word = stemmer.stemWord(tmp[i]).toLowerCase();
@@ -317,13 +318,22 @@
     console.info('excluded: ', excluded);
 
     // prepare search
-    var filenames = this._index[0];
-    var titles = this._index[1];
-    var words = this._index[2];
+    var filenames = this._index.filenames;
+    var titles = this._index.titles;
+    var words = this._index.terms;
     var fileMap = {};
     var files = null;
+    var results = [];
+    var regularResults = [];
     $('#search-progress').empty();
 
+    // lookup the keyword
+    if (keyword != null) {
+      var match = this._index.keywords[keyword];
+      if (match)
+        results.push([filenames[match[0]], titles[match[0]], match[2]]);
+    }
+
     // perform the search on the required words
     for (var i = 0; i < searchwords.length; i++) {
       var word = searchwords[i];
@@ -342,7 +352,6 @@
 
     // now check if the files are in the correct
     // areas and if the don't contain excluded words
-    var results = [];
     for (var file in fileMap) {
       var valid = true;
 
@@ -362,20 +371,23 @@
       // if we have still a valid result we can add it
       // to the result list
       if (valid)
-        results.push([filenames[file], titles[file]]);
+        results.push([filenames[file], titles[file], null]);
     }
 
     // delete unused variables in order to not waste
     // memory until list is retrieved completely
     delete filenames, titles, words;
 
-    // now sort the results by title
-    results.sort(function(a, b) {
+    // now sort the regular results by title
+    regularResults.sort(function(a, b) {
       var left = a[1].toLowerCase();
       var right = b[1].toLowerCase();
       return (left > right) ? -1 : ((left < right) ? 1 : 0);
     });
 
+    // combine both
+    results = results.concat(regularResults);
+
     // print the results
     var resultCount = results.length;
     function displayNextItem() {
@@ -386,7 +398,8 @@
         listItem.append($('<a/>').attr(
           'href',
           item[0] + DOCUMENTATION_OPTIONS.FILE_SUFFIX +
-          highlightstring).html(item[1]));
+          highlightstring +
+          (item[2] ? '#' + item[2] : '')).html(item[1]));
         $.get('_sources/' + item[0] + '.txt', function(data) {
           listItem.append($.makeSearchSummary(data, searchwords, hlwords));
           Search.output.append(listItem);


More information about the Python-checkins mailing list