download all mib files from a web page

powah wong_powah at yahoo.ca
Wed May 27 14:06:07 EDT 2009


On May 27, 12:29 pm, powah <wong_po... at yahoo.ca> wrote:
> I want to download all mib files from the web page:http://www.juniper.net/techpubs/software/junos/junos94/swconfig-net-m...
>
> All mib filenames are of this format:www.juniper.net/techpubs... .txt
>
> I write this program but has the following error.
> Please help.
> Thanks.
>
> [code]
> #!/usr/bin/env python
> import urllib2,os,urlparse
> url="http://www.juniper.net/techpubs/software/junos/junos94/swconfig-
> net-mgmt/juniper-specific-mibs-junos-nm.html#jN18E19"
> page=urllib2.urlopen(url)
> f=0
> links=[]
> data=page.read().split("\n")
> for item in data:
>     if "www.juniper.net/techpubs" in item:
>         httpind=item.index("www.juniper.net/techpubs")
>         item=item[httpind:]
>         #print "item " + item
>         ind=item.index("<")
>         links.append(item[:ind]) #grab all links
> # download all links
> for link in links:
>     print "link " + link
>     filename=link.split("/")[-1]
>     print "downloading ... " + filename
>     u=urllib2.urlopen(link)
>     p=u.read()
>     open(filename,"w").write(p)
> [/code]
>
> $ ~/python/downloadjuniper.py
> linkwww.juniper.net/techpubs/software/junos/junos94/swconfig-net-mgmt/mib...
> downloading ... mib-jnx-user-aaa.txt
> Traceback (most recent call last):
>   File "/home/powah/python/downloadjuniper.py", line 20, in ?
>     u=urllib2.urlopen(link)
>   File "/usr/lib/python2.4/urllib2.py", line 130, in urlopen
>     return _opener.open(url, data)
>   File "/usr/lib/python2.4/urllib2.py", line 350, in open
>     protocol = req.get_type()
>   File "/usr/lib/python2.4/urllib2.py", line 233, in get_type
>     raise ValueError, "unknown url type: %s" % self.__original
> ValueError: unknown url type:www.juniper.net/techpubs/software/junos/junos94/swconfig-net-mgmt/mib...
>
> $ python
> Python 2.4.4 (#1, Oct 23 2006, 13:58:00)
> [GCC 4.1.1 20061011 (Red Hat 4.1.1-30)] on linux2
> Type "help", "copyright", "credits" or "license" for more information.
>
>
>
> My computer is FC6 linux.

I fixed one error, now if the filename is misspelled, how to ignore
the error and continue?
[code]
#!/usr/bin/env python
import urllib2,os,urlparse
url="http://www.juniper.net/techpubs/software/junos/junos94/swconfig-
net-mgmt/juniper-specific-mibs-junos-nm.html#jN18E19"
page=urllib2.urlopen(url)
f=0
links=[]
data=page.read().split("\n")
for item in data:
    if "www.juniper.net/techpubs" in item:
        httpind=item.index("www.juniper.net/techpubs")
        item=item[httpind:]
        #print "item " + item
        ind=item.index(".txt") + 4
        links.append(item[:ind]) #grab all links
# download all links
for link in links:
    filename=link.split("/")[-1]
    link = "http://" + link
    print "link " + link
    print "downloading ... " + filename
    u=urllib2.urlopen(link)
    p=u.read()
    open(filename,"w").write(p)
[/code]

$ ~/python/downloadjuniper_onepage.py
link http://www.juniper.net/techpubs/software/junos/junos94/swconfig-net-mgmt/mib-jnx-virtual-chassis.txt
downloading ... mib-jnx-virtual-chassis.txt
Traceback (most recent call last):
  File "/home/powah/python/downloadjuniper_onepage.py", line 7, in ?
    u=urllib2.urlopen(link)
  File "/usr/lib/python2.4/urllib2.py", line 130, in urlopen
    return _opener.open(url, data)
  File "/usr/lib/python2.4/urllib2.py", line 364, in open
    response = meth(req, response)
  File "/usr/lib/python2.4/urllib2.py", line 471, in http_response
    response = self.parent.error(
  File "/usr/lib/python2.4/urllib2.py", line 402, in error
    return self._call_chain(*args)
  File "/usr/lib/python2.4/urllib2.py", line 337, in _call_chain
    result = func(*args)
  File "/usr/lib/python2.4/urllib2.py", line 480, in
http_error_default
    raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 404: Not Found



More information about the Python-list mailing list