第五天
学习延时功能
练习爬取安居客租房信息
from bs4 import BeautifulSoup
import requests
def sexual(n):
if n.get('class') == ['member_girl_ico']:
return '女'
else:
return '男'
def get_rentinfo(url):
web_content = requests.get(url)
soup = BeautifulSoup(web_content.text, 'lxml')
titles = soup.select('div.pho_info > h4 > em')
addresses = soup.select('div.pho_info > p')
prices = soup.select('div.day_l > span')
images = soup.select('#curBigImage')
landlord_pics = soup.select('div.member_pic > a > img')
landlord_sexuals = soup.select('div.w_240 > h6 > span')
landlord_names = soup.select('div.w_240 > h6 > a')
for title, address, price, image, landlord_pic, landlord_sexual, landlord_name in zip(titles, addresses, prices, images,
landlord_pics, landlord_sexuals,
landlord_names):
data = {
'房源:': title.get_text(),
'地址:': address.get_text(),
'价格:': price.get_text(),
'图片:': image.get('src'),
'房东图片:': landlord_pic.get('src'),
'房东性别:': sexual(landlord_sexual),
'房东名字:': landlord_name.get_text(),
}
print(data)
search_page = requests.get('http://hz.xiaozhu.com/?startDate=2016-06-27&endDate=2016-07-31')
list_soup = BeautifulSoup(search_page.text, 'lxml')
house_list = list_soup.select('#page_list > ul > li > a')
for i in house_list:
rent_url = i.get('href')
get_rentinfo(rent_url)