#coding:utf-8
import urllib
from bs4 import BeautifulSoup
import urllib2
for pa in range(1,200):
url = "http://www.xxxxx.com/page/{}".format(pa)
html = urllib.urlopen(url).read()
soup = BeautifulSoup(html, "html.parser")
imglist = soup.find_all("img")
lenth = len(imglist)
for i in range(lenth):
Pictu = imglist[i].attrs['src']
req = urllib2.Request(Pictu)
response = None
try:
response = urllib2.urlopen(req, timeout=5)
print response.geturl()
print response.getcode()
except urllib2.URLError as e:
print e
if hasattr(e, 'code'):
print e.geturl()
print 'Error code:', e.code
print e.info
elif hasattr(e, 'reason'):
print 'Reason:', e.reason
except:
pass
finally:
if response:
response.close()
初次编写此代码,实现在网站爬取所有图片(翻页),并判断其状态值,若有错误,欢迎指正!