安装第三方库
pip install tqdm
一、代码
# -*-coding:utf-8-*-
"""
@File : 单线程下爬取笔趣小说.py
@Time : 2021.5.11
@Author : 老白
@Software: IntelliJ IDEA 2019.3.5 x64
@python : Python 3.7.3
"""
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
#定义获取文本内容
def get_content(target):
reqs = requests.get(url=target)
reqs.encoding = 'utf-8'
hals = reqs.text
bf = BeautifulSoup(hals, 'lxml')
texts = bf.find('div', id='content')
contents = texts.text.strip().split('\xa0' * 4)
return contents
#输入书名获取目标文本链接
keyword = input("输入书名:")
target = 'https://www.xxbiquge.net/search.php?keyword=' + keyword
req = requests.get(url=target,timeout=(3,7))
req.encoding = 'utf-8'
html = req.text
soup = BeautifulSoup(html, 'lxml')
target = soup.a.attrs['href']
print(target)
server = 'https://www.xxbiquge.net/'
book_name = keyword + '.txt'
req = requests.get(url=target)
req.encoding = 'utf-8'
html = req.text
chapter_bs = BeautifulSoup(html, 'lxml')
chapters = chapter_bs.find('div', id='list')
chapters = chapters.find_all('a')
#循环写入文本内容
for chapter in tqdm(chapters):
chapter_name = chapter.string
url = server + chapter.get('href')
content = get_content(url)
with open(book_name, 'a', encoding='utf-8') as f:
f.write(chapter_name)
f.write('\n')
f.write('\n'.join(content))
f.write('\n')
二、示例效果
有段时间没更新了,我自己的博客服务器配置太拉了就停了。