little question

Shagshag shagshag13 at yahoo.fr
Wed May 22 03:51:55 EDT 2002


Here is the kind of code i wish i could find...

As i'm still a newbie in python, comments are welcome.

-----

def build_documents():

	global document
	global invertedIndex

	documents = {}

	documents[0] = 'pease porridge hot pease porridge cold'
	documents[1] = 'pease porridge in the pot'
	documents[2] = 'nine days old'
	documents[3] = 'some like it hot some like it cold'
	documents[4] = 'some like it in the pot'
	documents[5] = 'nine days old'

	invertedIndex = InvertedIndex()

	for i in range(len(documents)):
		terms = documents[i].split()
		added = []
		for t in terms:
			node = PostingListNode(i, terms.count(t))
			if t not in added: invertedIndex.add(t, node)
			added.append(t)

	print 
	print invertedIndex

class PostingListNode:

	def __init__(self, documentID, information = None):
		self._documentID = documentID
		self._information = information

	def get_documentID(self):
		return self._documentID

	def get_information(self):
		return self._information

	def __str__(self):
		s = '(' + str(self._documentID) + ', ' + str(self._information) + ')'
		return s


class InvertedIndex:

	def __init__(self):
		self._hash = {}
		self._list = []
		self._container = []
		self._n = 0

	def add(self, element, node):
		try:
			i = self._hash[element]
			self._container[i].append(node)
			print "Adding word in node list=[%s]" % (element)
		except KeyError:
			print "Adding new word=[%s] i=[%s]" % (element, self._n)
			self._hash[element] = self._n
			self._list.append(element)
			self._container.append([node])
			self._n = self._n + 1

	def get_nodes(self, element):
		try:
			i = self._hash[element]
			return self._container[i]
		except KeyError:
			return None

	def __str__(self):
		s = ''
		for element in self._hash.keys():
			i = self._hash[element]
			s = s + str(element) + ' ->'
			for n in self._container[i]:
				s = s + ' ' + str(n)
			s = s + "\n"
		return s



More information about the Python-list mailing list