[Tutor] Python Crawl problem help
CPECC张涛
sccdzt at foxmail.com
Tue May 25 08:47:34 EDT 2021
Dear sir :
Could you please help solve the below problem that can not run and the result is
"E:\Python Data Visualization\Scripts\python.exe" "E:/Python Data Visualization/爬虫/wallpaper2.py"
None
Traceback (most recent call last):
File "E:/Python Data Visualization/爬虫/wallpaper2.py", line 52, in <module>
start()
File "E:/Python Data Visualization/爬虫/wallpaper2.py", line 49, in start
imgs=getimgdata(page)
File "E:/Python Data Visualization/爬虫/wallpaper2.py", line 28, in getimgdata
for item in content_list.find_all('figure'):
AttributeError: 'NoneType' object has no attribute 'find_all'
Process finished with exit code 1
import requests
import re
import time
import os
from bs4 import BeautifulSoup
import urllib
def getpage():
url='https://www.zhihu.com/question/451014453/answer/1797338225'
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1;'
'Win64; x64) AppleWebKit/537.36 (KHTML, like'
'Gecko) Chrome/69.0.3497.100'
'Safari/537.36','Referer': "https://www.zhihu.com"
"/question/37787176"}
try:
r = requests.get(url, headers=headers).content
return r
except:
return print('coneection error')
def getimgdata(data):
soup=BeautifulSoup(data,'lxml')
content_list=soup.find('div',attrs={'class':'list'})
print(content_list)
img_list=[]
for item in content_list.find_all('figure'):
img=item.find('img')['src']
img_list.append(img)
return img_list
def saveToDir(contents):
img=40
try:
path=r'F:\Python crawler'
if not os.path.isdir(path):
os.makedirs(path)
img=0
for item in contents:
paths=path+str(img)+'.jpg'
time.sleep(1)
urllib.request.urlretrieve(item,paths)
img+=1
print('%sDownloaded'%img)
except Exception as e:
print(e)
def start():
page=getpage()
imgs=getimgdata(page)
saveToDir(imgs)
start()
More information about the Tutor
mailing list