最简单的煎蛋OOXX爬取

发现网上很多写的煎蛋爬虫都失效了，自己就写一个，只用到requests、re等基本模块，使用相当简单；

12.png

代码如下

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2019/4/20 12:55
# @Author  : Xinru

import requests
import time
import re
import os

new_time = time.strftime('%Y.%m.%d',time.localtime(time.time()))
print('创建文件夹：',new_time,'成功')
if not os.path.exists(new_time):
    os.mkdir(new_time)
# url = 'http://jandan.net/ooxx/page-29#comments'
for ii in range(1,31):
    url = 'http://jandan.net/ooxx/page-'+str(ii)+'#comments'
    html = requests.get(url).text
    # print('第'+str(ii)+'页',url)
    imges = re.findall('<img src="//(.*?)" /></p>',html)
    # print(imges)
    # for循环，下载
    time_start = time.time()
    for img in imges:
        img_url = 'http://'+img
        name = img.split('/')[-1]
        # print(img_url,name)
        i = requests.get(img_url)
        with open('./' + new_time + '/{}'.format(name),'wb') as f:
            f.write(i.content)
            f.close()
    time_end = time.time()
    print('第'+str(ii)+'页用时：', round(time_end - time_start,0),'秒')