前言
爬取豆瓣的 top250 电影榜
环境
请自行 安装python 需要的包
BeautifulSoup
MySQL-python
开始
创建一个py文件 demo3.py
分析页面结构
导入一些包
定义一个DouBanSpider类以及一些方法
运行
>>> python demo3.py
创建一个用于生成chart的 py文件, showdata1.py
请自行 安装python 需要的包
pygal
MySQL-python
Flask
小例子
#!/usr/bin/python
#coding=utf-8
import pygal
import json
from urllib2 import urlopen # python 2 syntax
# from urllib.request import urlopen # python 3 syntax
import MySQLdb
from flask import Flask
from pygal.style import DarkSolarizedStyle
import sys,os
reload(sys)
sys.setdefaultencoding('utf8')
app = Flask(__name__)
#----------------------------------------------------------------------
@app.route('/dbmovies')
def demoDBMovies():
try:
conn = MySQLdb.connect(host='localhost',user='hejing',passwd='hejing',db='books',charset='utf8')
except Exception, e:
print e
sys.exit()
cursor = conn.cursor()
sql = "select * from books order by people DESC LIMIT 10"
cursor.execute(sql)
alldata = cursor.fetchall()
title = " This is a testing demo"
line_chart = pygal.HorizontalBar()
line_chart.title = 'Best Top 10 movies in Douban'
if alldata:
for rec in alldata:
#print rec[0], rec[1]
line_chart.add(rec[0], rec[1])
cursor.close()
conn.close()
html = """
<html>
<head>
<title>%s</title>
</head>
<body>
%s
</body>
</html>
""" % (title, line_chart.render())
return html
#----------------------------------------------------------------------
if __name__ == '__main__':
app.run(host='127.0.0.1')