爬虫糗事百科第一页的段子
import requests
import re
def comenzar():
r = requests.get('https://www.qiushibaike.com')pattern = '*?<.*?class="content">.*?(.*?).*?'
result = re.findall(pattern,r.text,re.S)
#对内容进行过滤
num = 1
for subRequest in result:
filterResult = re.sub('|&quat','',subRequest)print(str(num) + '.' + filterResult + '\n')
with open('糗事.txt','a',encoding = 'utf-8') as f:
f.write(str(num) + '.' + filterResult + '\n\n\n')
num += 1
comenzar()