练手,没有爬取图片,难度不大。
构建urls数组。抓取前6页
select找到各个关键点数据
for循环zip()写入字典。
import requests
from bs4 import BeautifulSoup
import time
urls=['http://liuzhou.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(i)) for i in range(1,7)]
def get_onepage(url):
html=requests.get(url)
soup=BeautifulSoup(html.text,'lxml')
titles= soup.select('#page_list div.result_btm_con.lodgeunitname > div > a > span ')
introduces=soup.select('#page_list div.result_btm_con.lodgeunitname > div > em' )
prices=soup.select('#page_list div.result_btm_con.lodgeunitname > span.result_price > i')
imgs=soup.select('#page_list ul img.lodgeunitpic')
for title ,introduce, price, img in zip(titles,introduces,prices,imgs):
data={
"title":title.get_text(),
"intorduces":introduce.get_text().split('-')[0].strip(),
"price":price.get_text(),
"img":img.get('lazy_src'),
}
print (data)
for url in urls:
get_onepage(url)
time.sleep(2)