发现网上很多写的煎蛋爬虫都失效了,自己就写一个,只用到requests、re等基本模块,使用相当简单;
12.png
代码如下
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2019/4/20 12:55
# @Author : Xinru
import requests
import time
import re
import os
new_time = time.strftime('%Y.%m.%d',time.localtime(time.time()))
print('创建文件夹:',new_time,'成功')
if not os.path.exists(new_time):
os.mkdir(new_time)
# url = 'http://jandan.net/ooxx/page-29#comments'
for ii in range(1,31):
url = 'http://jandan.net/ooxx/page-'+str(ii)+'#comments'
html = requests.get(url).text
# print('第'+str(ii)+'页',url)
imges = re.findall('<img src="//(.*?)" /></p>',html)
# print(imges)
# for循环,下载
time_start = time.time()
for img in imges:
img_url = 'http://'+img
name = img.split('/')[-1]
# print(img_url,name)
i = requests.get(img_url)
with open('./' + new_time + '/{}'.format(name),'wb') as f:
f.write(i.content)
f.close()
time_end = time.time()
print('第'+str(ii)+'页用时:', round(time_end - time_start,0),'秒')