No subject

bruce bedouglas at earthlink.net
Mon Dec 21 21:08:38 EST 2009


hi...

the following sample is an attempt to fetch two subsequent pages from a
sameple site. (it's public) the script attempts to implement a request,
using the POST method, as well as as cookies. Testing using
LiveHttpHeaders/Firefox indicates that the app uses post/cookies, and it
doesn't work if cookies are disabled on the browser.

the query for the post, was obtained via the LiveHttpHeaders app.

I can get the 1st page, but not the 2nd. I'm assuming that I'm somehow
screwing up the use/implementation of the cookies.. Searching the net isn't
shedding any light for now..

After showing the 1sr page, the 2nd page is viewed from the browser, by
selecting a 'next' link, which invokes a jscript submit for the DOM. The
post data is captured via LiveHttpHeaders.. It's this data that forms the
data for the 2nd Post attempt in the test..

Any thoughts/comments/pointers would be helpful... (and yeah the test is
ugly..!)

thanks

-tom

#!/usr/bin/python

#test python script

import re
import urllib
import urllib2
import sys, string, os
from mechanize
import Browser
import mechanize
import cookielib

########################
#
# Parsing App Information
########################
# datafile

cj = "p"
COOKIEFILE = 'cookies.lwp'

#cookielib = 1
urlopen = urllib2.urlopen
#cj = urllib2.cookielib.LWPCookieJar()

cj = cookielib.LWPCookieJar()

#cj = ClientCookie.LWPCookieJar()

Request = urllib2.Request
br = Browser()

if cj != None:
  print "sss"
#install the CookieJar for the default CookieProcessor
  if os.path.isfile(COOKIEFILE):
    cj.load(COOKIEFILE)
    print "foo\n"
  if cookielib:
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
    urllib2.install_opener(opener)
    print "foo2\n"

user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
values1 = {'name' : 'Michael Foord',
           'location' : 'Northampton',
           'language' : 'Python' }
headers = { 'User-Agent' : user_agent }

if __name__ == "__main__":
# main app

  baseurl="https://pisa.ucsc.edu/class_search/index.php"
  print "b = ",baseurl
  print "b = ",headers

query="action=results&binds%5B%3Aterm%5D=2100&binds%5B%3Areg_status%5D=O&bin
ds%5B%3Asubject%5D=&binds%5B%3Acatalog_nbr_op%5D=%3D&binds%5B%3Acatalog_nbr%
5D=&binds%5B%3Atitle%5D=&binds%5B%3Ainstr_name_op%5D=%3D&binds%5B%3Ainstruct
or%5D=&binds%5B%3Age%5D=&binds%5B%3Acrse_units_op%5D=%3D&binds%5B%3Acrse_uni
ts_from%5D=&binds%5B%3Acrse_units_to%5D=&binds%5B%3Acrse_units_exact%5D=&bin
ds%5B%3Adays%5D=&binds%5B%3Atimes%5D=&binds%5B%3Aacad_career%5D="

  request = urllib2.Request(baseurl, query, headers)
  response = urllib2.urlopen(request)

  print "gggg \n"
  #print req
  print "\n gggg 555555\n"

  print "res = ",response
  x1 = response.read()
  #x1 = res.read()
  print x1

  #sys.exit()

  cj.save(COOKIEFILE)

  # resave cookies
  if cj is None:
    print "We don't have a cookie library available - sorry."
    print "I can't show you any cookies."
  else:
    print 'These are the cookies we have received so far :'
    for index, cookie in enumerate (cj):
      print index, ' : ', cookie

  cj.save(COOKIEFILE)
  print "ffgg \n"

  for index, cookie in enumerate (cj):
    print index, ' : ', cookie

  #baseurl ="http://students.yale.edu/oci/resultList.jsp"
  baseurl="https://pisa.ucsc.edu/class_search/index.php"

query="action=next&Rec_Dur=100&sel_col%5Bclass_nbr%5D=1&sel_col%5Bclass_id%5
D=1&sel_col%5Bclass_title%5D=1&sel_col%5Btype%5D=1&sel_col%5Bdays%5D=1&sel_c
ol%5Btimes%5D=1&sel_col%5Binstr_name%5D=1&sel_col%5Bstatus%5D=1&sel_col%5Ben
rl_cap%5D=1&sel_col%5Benrl_tot%5D=1&sel_col%5Bseats_avail%5D=1&sel_col%5Bloc
ation%5D=1"

  request = urllib2.Request(baseurl, query, headers)
  response = urllib2.urlopen(request)

  print "gggg \n"
  #print req
  print "\n gggg 555555\n"
  print "res = ",response

  x1 = response.read()
  #x1 = res.read()
  print x1

  sys.exit()

  req = Request(baseurl, query, headers)
  print "gggg \n"
  #print req
  print "\n gggg 555555\n"

  #br.open(req)
  res = urlopen(req)
  print "gggg 000000000000\n"
  x1 = res.read()
  print x1
  sys.exit()






More information about the Python-list mailing list