No subject

bruce bedouglas at earthlink.net
Fri Aug 29 17:16:12 CEST 2008


Hi.

I'm using mechanize to parse a page/site that uses the meta http-equiv tag
in order to perform a refresh/redirect of the page. I've tried a number of
settings, and read different posts on various threads, but seem to be
missing something.

the test.html page is the page that the url returns, however, i was
expecting the test.py app to go ahead and perform the redirect/refresh
automatically.

does the page (test.html) need to be completely valid html?

Any thoughts on what's screwed up here??


thanks

----------------------------------------------------

test.py
--------
import re
import libxml2dom
import urllib
import urllib2
import sys, string
from  mechanize import Browser
import mechanize
#import tidy
import os.path
import cookielib
from libxml2dom import Node
from libxml2dom import NodeList
import subprocess
import time

########################
#
# Parse pricegrabber.com
########################
cj = "p"
COOKIEFILE = 'cookies.lwp'
#cookielib = 1


urlopen = urllib2.urlopen
#cj = urllib2.cookielib.LWPCookieJar()
cj = cookielib.LWPCookieJar()
Request = urllib2.Request
br = Browser()
br2 = Browser()

if cj != None:
  print "sss"
#install the CookieJar for the default CookieProcessor
  if os.path.isfile(COOKIEFILE):
      cj.load(COOKIEFILE)
      print "foo\n"
  if cookielib:
      opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
      urllib2.install_opener(opener)
      print "foo2\n"

user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
values1 = {'name' : 'Michael Foord',
          'location' : 'Northampton',
          'language' : 'Python' }
headers = { 'User-Agent' : user_agent }

url="http://schedule.psu.edu/"
#=======================================


if __name__ == "__main__":
# main app

	txdata = None

#----------------------------

	##br.set_cookiejar(cj)
	br.set_handle_redirect(True)
	br.set_handle_referer(True)
	br.set_handle_robots(False)
	br.set_handle_refresh(True)
	br.addheaders = [('User-Agent', 'Firefox')]

	#url=str(url)+str("act_main_search.cfm")+"?"
	#url=url+"Semester=FALL%202008%20%20%20&"
	#url=url+"CrseLoc=OZ%3A%3AAbington%20Campus&"
	#url=url+"CECrseLoc=AllOZ%3A%3AAbington%20Campus&"
	#url=url+"CourseAbbrev=ACCTG&CourseNum=&CrseAlpha=&Search=View+schedule"

#url="http://schedule.psu.edu/act_main_search.cfm?Semester=FALL%202008%20%20
%20%20&CrseLoc=OZ%3A%3AAbington%20Campus&CECrseLoc=AllOZ%3A%3AAbington%20Cam
pus&CourseAbbrev=ACCTG&CourseNum=&CrseAlpha="



url="http://schedule.psu.edu/act_main_search.cfm?Semester=FALL%202008%20%20%
20%20&CrseLoc=OZ%3A%3AAbington%20Campus&CECrseLoc=AllOZ%3A%3AAbington%20Camp
us&CourseAbbrev=ACCTG&CourseNum=&CrseAlpha=&CFID=543143&CFTOKEN=71842529"


	print "url =",url
	br.open(url)
	#cj.save(COOKIEFILE)    # resave cookies

	res = br.response()  # this is a copy of response
	s = res.read()
	print "slen=",len(s)
	print s

=========================================
test.html
<html>
<head>
<TITLE></TITLE>
</head>

<BODY BGCOLOR="#FFFFFF">

                        <TD NOWRAP WIDTH="45" VALIGN="top"><A
HREF="javascript:openAWindow('http://www.registrar.psu.edu/faculty_staff/enr
oll_services/clsrooms.html#C','Intent',625,425,1)"><FONT FACE="Arial,
Helvetica, sans-serif" SIZE="2"><strong>Tech Type</strong></FONT></A></TD>


<META HTTP-EQUIV="Refresh" CONTENT="0;url=/soc/fall/Alloz/a-c/acctg.html#">

---------------------------------------------------------




	sys.exit()







More information about the Python-list mailing list