1.Python中的生成表达式
列表推导式
格式[表达式for 临时变量 in 可迭代对象[条件语句]]
用途:快速生成一个列表
#回顾:使用普通for循环创建列表
li = []
for i in range(10):
li.append(i)
print(li)
#列表推导式
li2 = [i for i in range(10)]
print(li2)
练习:生成一个列表['序号:998', '序号:992' ]共10个元素
from random import randint
li = []
for i in range(10):
seq = '序号:{}'.format(randint(100, 999))
li.append(seq)
print(li)
#列表推导式
li2 = ['序号:{}'.format(randint(100, 999)) for i in range(10)]
print(li2)
练习:使用列表推导式生成一个含有10个元素的随即列表[ ]并删选出所有偶数
li = []
for _ in range(10):
li.append(randint(30, 100))
print(li)
result = []
for i in li:
if i%2 == 0:
result.append(i)
else:
continue
print(result)
li2 = [ i for i in li if i%2 == 0]
print(li2)
练习:使用列表推导式生成一个含有20个元素的随即列表[ ]并删选出所有奇数
li = []
for _ in range(20):
li.append(randint(1, 100))
print(li)
result = []
for i in li:
if i%2 != 0:
result.append(i)
print(result)
li2 = [ i for i in li if i%2 != 0]
print(li2)
回顾豆瓣电影案例并优化,增加爬取电影图片保存本地功能
import requests
from lxml import html
import pandas as pd
from xpinyin import Pinyin
def spider(city, movie_list = []):
city_pinyin = Pinyin().get_pinyin(city, splitter = '')
url = 'https://movie.douban.com/cinema/later/{}/'.format(city_pinyin)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36"}
html_data = requests.get(url, headers = headers).text
selector = html.fromstring(html_data)
ul_list = selector.xpath('//div[@id="showing-soon"]/div')
print('{}市将有{}部电影即将上映'.format(city, len(ul_list)))
####################################################
#添加相应电影图片链接地址
for div in ul_list:
name = div.xpath('div/h3/a/text()')
date = div.xpath('div/ul/li[1]/text()')
style = div.xpath('div/ul/li[2]/text()')
country = div.xpath('div/ul/li[3]/text()')
want = div.xpath('div/ul/li[4]/span/text()')
photo = div.xpath('a/img/@src')
movie_list.append({
"name": name[0],
"date": date[0],
"style": style[0],
"country": country[0],
"want": want[0].replace('人想看', ''),
"photo": photo[0]
})
movie = movie_list.sort(key=lambda x: int(x["want"]), reverse=True)
for movie in movie_list:
print(movie)
img_link = movie['photo']
response = requests.get(img_link)
if response.status_code == 200:
with open('{}.jpg'.format(movie['name']), 'wb') as f:
f.write(response.content)
df = pd.DataFrame(movie_list)
df.to_csv("即将上映电影想看人数排行.csv")
if __name__ == '__main__':
city = input('请输入城市民名称')
spider(city)
2.数据可视化
安装 matplotlib库
from matplotlib import pyplot as plt
import numpy as np
#绘制正弦曲线
#选取100个等边距的点(x,y),然后进行绘制曲线
#生成[0,2π]区间100个等间距的点
x = np.linspace(0, 2*np.pi, num = 100)
#print(x)
y = np.sin(x)
plt.plot(x, y)
plt.show()
支持中文显示设置
plt.rcParams["font.sans-serif"] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
曲线的显示设置
x = np.linspace(0, 2*np.pi, num = 100)
#print(x)
y = np.sin(x)
z = np.cos(x)
plt.plot(x, y,
'p',#颜色
linestyle =':',#风格
# - 实线, -- 虚线, :点画线
marker = 'o',#标记点的样式
# o 实心圆, * 星, + 加号
markerfacecolor = 'r',#标记点的颜色
alpha = 0.8,#透明度
label = 'sin(x)'
)
plt.plot(x, z, marker = 'o', label = 'cos(x)')
plt.xlabel('x轴')
plt.ylabel('y轴')
plt.title('标题')
plt.legend()#设置图例
plt.show()
绘制条形图
from random import randint
x = ['口红{}'.format(i) for i in range(1, 7)]
#print(x)
y = [randint(200, 1000) for _ in range(6)]
plt.bar(x, y)
plt.grid()#背景表格
plt.xlabel('口红品牌')
plt.ylabel('口红价格(元)')
plt.title('口红价格表')
plt.show()
作业:
- 三国人物分析top10绘制条形图
#承接三国人物出现频率top10分析代码
#显示出现词语前10部分修改为
role_list, y, x = [], [], []
for i in range(10):
role, count = items[i]
y.append(count)
x.append(role)
for _ in range(count):
role_list.append(role)
plt.bar(x, y, color='purple')
plt.grid()
plt.xlabel('三国人物')
plt.ylabel('出场频次')
plt.title('三国人物出现次数top10')
plt.show()
- 三国人物分析top10绘制饼图
explode = [0.2, 0, 0, 0, 0, 0, 0, 0, 0, 0]
plt.pie(y, labels=x, explode=explode, startangle=90,shadow=True, autopct='%.1f%%')
plt.title('三国人物出现次数top10')
plt.show()
- 豆瓣中最想看的即将上映电影top5条形图
#承接豆瓣爬虫代码
x = [movie['movie_name'] for movie in movie_info_list[:5]]
y = [movie['want_see'] for movie in movie_info_list[:5]]
# 绘制柱状图
plt.bar(x, y, color = 'purple')
plt.xlabel('电影名')
plt.ylabel('想看人数')
plt.title('豆瓣即将上映电影想看人数TOP5')
plt.grid()
plt.show()
# 绘制水平柱状图
plt.barh(x, y, color = 'purple')
plt.xlabel('想看人数')
plt.ylabel('电影名')
plt.grid()
plt.title('豆瓣即将上映电影想看人数TOP5')
plt.show()
- 豆瓣中最想看的即将上映电影top5饼图
explode = [0.2, 0, 0, 0, 0]
plt.pie(y,
labels=x,
explode=explode,
startangle=90,
shadow=True,
autopct='%.1f%%')
plt.title('豆瓣即将上映电影想看人数TOP5')
plt.show()