学习Python爬虫的第2天,解析本地网页,获取相关信息。
把本地网页上的信息按照以上要求爬取,分类
我的代码:
#!usr/bin/env python
#coding: utf-8
__author__ = 'lucky'
from bs4 import BeautifulSoup
info = []
with open('index.html') as url:
Soup = BeautifulSoup(url,'lxml')
images = Soup.select('div.thumbnail > img')
prices = Soup.select('div.thumbnail > div.caption > h4.pull-right')
titles = Soup.select('div.caption > h4 > a')
stars = Soup.select('div.ratings > p:nth-of-type(2)')
views = Soup.select('div.ratings > p.pull-right')
for image,price,title,view,star in zip(images,prices,titles,views,stars):
data = {
'image':image.get('src'),
'price':price.get_text(),
'title':title.get_text(),
'star':len(star.find_all('span',class_ = 'glyphicon glyphicon-star')),
'view':view.get_text()
}
info.append(data)
总结: