看了滑块验证码(滑动验证码)相比图形验证码,破解难度如何?中《Python3网络爬虫开发实战》作者 崔庆才丨静觅 的一个回答,里面有详细介绍如何对抗滑块验证码,因此学习一下,对此进行记录。
正文
[1]流程
- 利用 Python selenium 自动化测试工具 直接拟人行为来完成滑块验证码验证
- 分析页面,找出滑块验证码的完整图片,缺口图片。
- 对比原始的图片和带缺口的图片的像素,像素不同的地方就是缺口位置
- 计算出滑块缺口的位置,得到所需要滑动的偏移量
- 使用物理加速度位移公式计算出移动轨迹
- 最后利用 Selenium 进行对滑块的拖拽
[2]分析页面
B站,是一个不错的学习网站,记得很久之前第一次碰到滑块验证码登录时候就是在B站看见的,所以拿它来练手。:smiley:
F12,打开开发者工具,找出登录框中有用的信息。
用户名输入: id="login-username"
密码输入框: id="login-passwd"
登录按钮: class="btn btn-login"
带有缺口的验证码图片: class="geetest_canvas_bg geetest_absolute"
需要滑动的验证码图片: class="geetest_canvas_slice geetest_absolute"
完整的验证码图片: class="geetest_canvas_fullbg geetest_fade geetest_absolute"
滑块按钮: class="geetest_slider_button"
[3]编写代码
导入库文件
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
import time
import random
from PIL import Image
<details>
<summary ><font color=4B0082>USER_AGENT_LIST</font></summary>
USER_AGENT_LIST = [
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36",
"Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2309.372 Safari/537.36",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2117.157 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1866.237 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/4E423F",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36 Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10",
"Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16",
"Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14",
"Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0 Opera 12.14",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0) Opera 12.14",
"Opera/12.80 (Windows NT 5.1; U; en) Presto/2.10.289 Version/12.02",
"Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00",
"Opera/9.80 (Windows NT 5.1; U; zh-sg) Presto/2.9.181 Version/12.00",
"Opera/12.0(Windows NT 5.2;U;en)Presto/22.9.168 Version/12.00",
"Opera/12.0(Windows NT 5.1;U;en)Presto/22.9.168 Version/12.00",
"Mozilla/5.0 (Windows NT 5.1) Gecko/20100101 Firefox/14.0 Opera/12.0",
"Opera/9.80 (Windows NT 6.1; WOW64; U; pt) Presto/2.10.229 Version/11.62",
"Opera/9.80 (Windows NT 6.0; U; pl) Presto/2.10.229 Version/11.62",
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; de) Presto/2.9.168 Version/11.52",
"Opera/9.80 (Windows NT 5.1; U; en) Presto/2.9.168 Version/11.51",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; de) Opera 11.51",
"Opera/9.80 (X11; Linux x86_64; U; fr) Presto/2.9.168 Version/11.50",
"Opera/9.80 (X11; Linux i686; U; hu) Presto/2.9.168 Version/11.50",
]
</details>
初始化函数
def init(self):
"""
初始化变量
:return:
"""
global url, browser, username, password, wait
url = 'https://passport.bilibili.com/login'
path = r'G:\Python3\Scripts\chromedriver.exe'
chrome_options = Options()
#随机选择一个User_Agent
user_agent = random.choice(USER_AGENT_LIST)
#全屏
chrome_options.add_argument('--start-maximized')
chrome_options.add_argument('user-agent=%s'%user_agent)
#开启开发者模式,可以进一步防止selenium被反爬虫识别
chrome_options.add_experimental_option("excludeSwitches", ["ignore-certificate-errors","enable-automation"])
browser = webdriver.Chrome(executable_path=path, chrome_options=chrome_options)
username = '用户名'
password = '密码'
wait = WebDriverWait(browser, 20)
<code>global </code>关键字 定义了 url, browser, username, password, wait等全局变量,随后定义了chrome 的路径。
登录函数
def login(self):
"""
输入帐号密码登录
:return:
"""
browser.get(url)
user = wait.until(EC.presence_of_element_located((By.ID,'login-username')))
passwd = wait.until(EC.presence_of_element_located((By.ID,'login-passwd')))
user.send_keys(username)
passwd.send_keys(password)
#contains 就是包含,根据上面分析得知,登录按钮是的class是 btn btn-login 所以用XPATH 的contains函数就可以只选择其中之一。
login_btn = wait.until(EC.presence_of_element_located((By.XPATH,"//a[contains(@class,'btn-login')]")))
ran_time = random.random() * 2
print("随机睡眠时间: ",ran_time)
time.sleep(ran_time)
login_btn.click()
等待用户名输入框和密码输入框对应的 ID 节点加载出来
获取这两个节点,用户名输入框 id="login-username"
,密码输入框 id="login-passwd"
调用 send_keys()
方法输入用户名和密码
获取登录按钮 class="btn btn-login"
随机产生一个数并将其扩大两倍作为暂停时间
最后调用 click()
方法实现登录按钮的点击
【4】验证码处理模块
验证码元素查找函数
def find_code(self):
"""
查找 验证码图片
:return:
"""
#带有缺口的图片
code_bg = wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR,'canvas.geetest_canvas_bg.geetest_absolute'))
)
#需要滑动的图片
code_slice = wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR,'canvas.geetest_canvas_slice.geetest_absolute'))
)
#完整的图片
code_fullbg = wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR,'canvas.geetest_canvas_fullbg.geetest_fade.geetest_absolute'))
)
#隐藏验证码
self.hide_element(code_slice)
#保存带有缺口的验证码图片
self.save_screenshot(code_bg,'bg')
#显示需要滑动的验证码图片
self.show_element(code_slice)
#保存需要滑动的验证码图片
self.save_screenshot(code_slice,"slice")
#显示完整验证码图片
self.show_element(code_fullbg)
#保存完整验证码图片
self.save_screenshot(code_fullbg,"full")
获取验证码的三张图片,分别是完整的图片、带有缺口的图片和需要滑动的图片
分析页面代码,三张图片是由 3 个 canvas 组成,3 个 canvas 元素包含 CSS display
属性,display:block
为可见,display:none
为不可见,在分别获取三张图片时要将其他两张图片设置为 display:none
,这样做才能单独提取到每张图片
定位三张图片的 class 分别为:带有缺口的图片(code_bg):geetest_canvas_bg geetest_absolute
、需要滑动的图片(code_slice):geetest_canvas_slice geetest_absolute
、完整图片(code_fullbg):geetest_canvas_fullbg geetest_fade geetest_absolute
最后传值给 save_screenshot()
函数,进一步对验证码进行处理
隐藏展示函数
def hide_element(self,element):
"""
隐藏属性
:return:
"""
browser.execute_script("arguments[0].style=arguments[1]",element,"display:none;")
def show_element(self,element):
"""
显示属性
:return:
"""
browser.execute_script("arguments[0].style=arguments[1]",element,"display:block;")
设置元素可见,或隐藏功能。
网页截图函数
def save_screenshot(self,obj,name):
"""
网页截图,获取验证码图片
:param name:图片名字
:return:截图对象
"""
#obj 需要 截图的 位置 name 文件名称
try:
#save_screenshot 对整个网页进行截图
pic_url = browser.save_screenshot('./bilibili.png')
print("%s截图成功"%pic_url)
left,top,right,bottom = obj.location['x'],obj.location['y'],obj.location['x'] + obj.size['width'],obj.location['y'] + obj.size['height']
print('图:' + name)
print('Left %s' % left)
print('Top %s' % top)
print('Right %s' % right)
print('Bottom %s' % bottom)
print('')
# 在整个页面截图的基础上,根据位置信息,分别剪裁出三张验证码图片并保存
im = Image.open('./bilibili.png')
im = im.crop((left, top, right, bottom)) #对浏览器截图进行裁剪
file_name = 'bili_' + name + '.png'
im.save(file_name)
except BaseException as msg:
print("%s:截图失败"%msg)
save_screenshot
方法把网页截图保存为bilibili.png
图片,
obj
为三张验证码图片对象,获取图片的x,y,宽,高
,
接着打开网页的截图,通过三张验证码图片的坐标,
调用crop()
方法将其裁剪出来,在进行保存。
【5】验证码滑动模块
滑动模块主函数
def slide(self):
"""
:return:
"""
distance = self.get_distance(Image.open('./bili_back.png'),Image.open('./bili_full.png'))
print('计算偏移量:%s Px'% distance)
trace = self.get_trace(distance - 5)
self.move_to_gap(trace)
time.sleep(3)
向get_distance
方法传入缺口图片和完整图片,计算滑块偏移量
distance
通过 get_distance
方法返回值,获取验证码缺口偏移量
在把偏移量传入 get_trace
方法中,通过物理加速度位移公式,构造出滑块的移动轨迹。 distance -5
是减去滑块缺口偏移
在把传回来的值传入move_to_gap
方法实现拟人操作。
获取缺口偏移量
def get_distance(self,bg_image,fullbg_image):
"""
获取缺口偏移量
:param bg_image:带缺口图片
:param fullbg_image:不带缺口图片
:return:
"""
#坐标设为60起始位置
distance = 60
for i in range(distance,fullbg_image.size[0]):
for j in range(fullbg_image.size[1]):
if not self.is_pixel_equal(fullbg_image,bg_image,i,j):
distance = i
return distance
return distance
def is_pixel_equal(self,bg_image,fullbg_image,x,y):
"""
判断两个像素是否相同
:param bg_image:带缺口图片
:param fullbg_image:不带缺口图片
:param x:位置x
:param y:位置y
:return:像素是否相同
"""
#获得两章图片对应像素点的RGB数据
bg_pixel = bg_image.load()[x,y]
fullbg_pixel = fullbg_image.load()[x,y]
#设定一个阈值,像素也许存在误差, 60作为容差范围
threshold = 60
#比较两张图 RGB 的 绝对值是否小于定义的阈值
for i in range(0,3):
if (abs(bg_pixel[i] - fullbg_pixel[i]<threshold)):
return True
# if (abs(bg_pixel[0] - fullbg_pixel[0] < threshold)and abs(bg_pixel[1] - fullbg_pixel[1] <threshold) and abs(bg_pixel[2] - fullbg_pixel[2] <threshold)):
#return True
return False
get_distance
方法获取缺口偏移量,就是获取缺口位置,此方法的两个参数为两张图片,一张为带缺口图片,一张为完整图片,在这里遍历两张图片的每个像素,然后利用is_pixel_equal
方法判断两张图片同一位置的像素是否相同,比对时候,比较了两张图片的RGB的绝对值是否均小于阈值threshold
,如果均在阈值之内,则像素点相同,继续遍历,否则遇到不相同的像素点就是缺口的位置。
需要滑动的图片:
完整的图片:
通过观察,其实可以发现,滑块位置会出现在图片左边位置,缺口的位置通常处于图片的右边位置,缺口和滑块会处于同一水平线上,所以要寻找缺口的话,直接从右侧开始寻找即可,所以在遍历开始时候,直接设置了遍历的起始坐标为distance
60,也就是从滑块的右侧开始识别。
模拟拖动
多次试验发现,模拟拖动这个操作不难,但是按照实际操作来说,人为拖动这个模块,是无法做到完全匀速拖动。
人手会因为距离的变短而减慢速度确认位置,可能会出现抖动,往回拖拉等操作,所以如果出现匀速操作,就会被识别出是程序在操作,检测机制会根据其机器学习模型筛选出此类数据,归类为机器操作,就会出现该图片被怪兽吃掉的情况。
要让程序根据距离长短,来为其加速或者减速操作,可以利用物理学的加速度位移公式来完成:
用Python 来表示就是:
#运用物理加速度位移相关公式 X=v0 * t+ 1/2 * a * t*t
#a 加速度 X 位移 v0 初速度
X = v0 * t +1/2 * a * t * t
#当前时刻的速度
v = v0 + a * t
运用这两个公式可以构造一个轨迹移动算法,计算出先加速后减速的运动轨迹:
#模拟人工拉动滑块
def get_trace(self,distance):
"""
根据偏移量获取移动轨迹
:param distance:偏移量
:return:滑动轨迹
"""
trace = []
# 设置加速距离为总距离的 4 /5
mid = distance * (4 / 5)
#设置当前位移 , 初始速度、时间间隔
current,v0 ,t = 0,0,0.1
while current< distance:
if current< mid:
#加速度为正10
a =10
else:
#减速度为负10
a = -10
#运用物理加速度位移相关公式 X=v0 * t+ 1/2 * a * t*t
#a 加速度 X 位移 v0 初速度
X = v0 * t +1/2 * a * t * t
#当前时刻的速度
v = v0 + a * t
v0 =v
current +=X
#记录每个时间间隔移动的多少位移
trace.append(round(X))
return trace
get_trace()
方法传入的参数为移动的总距离,返回的是运动轨迹,用 trace 表示,它是一个列表,列表的每个元素代表每次移动多少距离。
定义了一个变量mid,用来控制减速的阈值,既模拟前 4/5的路程是加速路程,后1/5是减速路程,但是如果偏移量过大时候,会被检测出,可能是前面4/5的路程过于匀速。所以可以设置为 7/8.
在定义当前位移 current
,初始为0,随后进入while循环,循环条件是当前位移小于偏移量,在循环里分段定义了加速度,其中加速过程加速度定义为10,减速度定义为负10,随后再套用物理学加速位移公式算出某个时间段内的位移,同时将该位移更新并记录到轨迹trace
里。
当达到总距离时既停止循环,最后得到的trace既记录了每个时间间隔移动了多少位移,这样滑块的运动轨迹就得到了。
然后在按照该运动轨迹传入move_to_gap
方法实现拖动模块
def move_to_gap(self,trace):
"""
拖动滑块到缺口处
:param trace:轨迹
:return:
"""
slider = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'div.geetest_slider_button')))
ActionChains(browser).click_and_hold(slider).perform()
for x in trace:
ActionChains(browser).move_by_offset(xoffset=x,yoffset=0).perform()
time.sleep(0.5)
ActionChains(browser).release().perform()
在这里传入的参数为运动轨迹
定义了slider
获取滑块对象
调用ActionChains
的 click_and_hold
方法按住拖动底部滑块,随后遍历运动轨迹获取每小段位移距离
在调用move_by_offset
方法移动此位移,最后移动完成之后调用release
方法松开鼠标。
【6】完整代码
# =============================================
# -*- coding: utf-8 -*-
# @Time : 2020-02-06
# @Author : KeyboArd
# @Blog : www.wrpzkb.cn
# @FileName: bilibili_login.py
# @Software: PyCharm
# =============================================
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
import time
import random
from PIL import Image
USER_AGENT_LIST = [
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36",
"Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2309.372 Safari/537.36",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2117.157 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1866.237 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/4E423F",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36 Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10",
"Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16",
"Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14",
"Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0 Opera 12.14",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0) Opera 12.14",
"Opera/12.80 (Windows NT 5.1; U; en) Presto/2.10.289 Version/12.02",
"Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00",
"Opera/9.80 (Windows NT 5.1; U; zh-sg) Presto/2.9.181 Version/12.00",
"Opera/12.0(Windows NT 5.2;U;en)Presto/22.9.168 Version/12.00",
"Opera/12.0(Windows NT 5.1;U;en)Presto/22.9.168 Version/12.00",
"Mozilla/5.0 (Windows NT 5.1) Gecko/20100101 Firefox/14.0 Opera/12.0",
"Opera/9.80 (Windows NT 6.1; WOW64; U; pt) Presto/2.10.229 Version/11.62",
"Opera/9.80 (Windows NT 6.0; U; pl) Presto/2.10.229 Version/11.62",
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; de) Presto/2.9.168 Version/11.52",
"Opera/9.80 (Windows NT 5.1; U; en) Presto/2.9.168 Version/11.51",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; de) Opera 11.51",
"Opera/9.80 (X11; Linux x86_64; U; fr) Presto/2.9.168 Version/11.50",
"Opera/9.80 (X11; Linux i686; U; hu) Presto/2.9.168 Version/11.50",
]
class bilibili_code():
def init(self):
"""
初始化变量
:return:
"""
global url, browser, username, password, wait
url = 'https://passport.bilibili.com/login'
path = r'G:\Python3\Scripts\chromedriver.exe'
chrome_options = Options()
user_agent = random.choice(USER_AGENT_LIST)
#全屏
chrome_options.add_argument('--start-maximized')
chrome_options.add_argument('user-agent=%s'%user_agent)
# chrome_options.add_argument(user_agent)
chrome_options.add_experimental_option("excludeSwitches", ["ignore-certificate-errors","enable-automation"])
browser = webdriver.Chrome(executable_path=path, chrome_options=chrome_options)
username = '用户名'
password = '密码'
wait = WebDriverWait(browser, 20)
def login(self):
"""
输入帐号密码登录
:return:
"""
browser.get(url)
user = wait.until(EC.presence_of_element_located((By.ID,'login-username')))
passwd = wait.until(EC.presence_of_element_located((By.ID,'login-passwd')))
user.send_keys(username)
passwd.send_keys(password)
login_btn = wait.until(EC.presence_of_element_located((By.XPATH,"//a[contains(@class,'btn-login')]")))
ran_time = random.random() * 2
print("随机睡眠时间: ",ran_time)
time.sleep(ran_time)
login_btn.click()
def find_code(self):
"""
查找 验证码图片
:return:
"""
#带有缺口的图片
code_bg = wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR,'canvas.geetest_canvas_bg.geetest_absolute'))
)
#需要滑动的图片
code_slice = wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR,'canvas.geetest_canvas_slice.geetest_absolute'))
)
#完整的图片
code_fullbg = wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR,'canvas.geetest_canvas_fullbg.geetest_fade.geetest_absolute'))
)
#隐藏验证码
self.hide_element(code_slice)
#保存带有缺口的验证码图片
self.save_screenshot(code_bg,'bg')
#显示需要滑动的验证码图片
self.show_element(code_slice)
#保存需要滑动的验证码图片
self.save_screenshot(code_slice,"slice")
#显示完整验证码图片
self.show_element(code_fullbg)
#保存完整验证码图片
self.save_screenshot(code_fullbg,"full")
def hide_element(self,element):
"""
隐藏属性
:return:
"""
browser.execute_script("arguments[0].style=arguments[1]",element,"display:none;")
def show_element(self,element):
"""
显示属性
:return:
"""
browser.execute_script("arguments[0].style=arguments[1]",element,"display:block;")
def save_screenshot(self,obj,name):
"""
网页截图,获取验证码图片
:param name:图片名字
:return:截图对象
"""
#obj 需要 截图的 位置 name 文件名称
try:
#save_screenshot 对整个网页进行截图
pic_url = browser.save_screenshot('./bilibili.png')
print("%s截图成功"%pic_url)
left,top,right,bottom = obj.location['x'],obj.location['y'],obj.location['x'] + obj.size['width'],obj.location['y'] + obj.size['height']
print('图:' + name)
print('Left %s' % left)
print('Top %s' % top)
print('Right %s' % right)
print('Bottom %s' % bottom)
print('')
# 在整个页面截图的基础上,根据位置信息,分别剪裁出三张验证码图片并保存
im = Image.open('./bilibili.png')
im = im.crop((left, top, right, bottom)) #对浏览器截图进行裁剪
file_name = 'bili_' + name + '.png'
im.save(file_name)
except BaseException as msg:
print("%s:截图失败"%msg)
def slide(self):
"""
:return:
"""
distance = self.get_distance(Image.open('./bili_back.png'),Image.open('./bili_full.png'))
print('计算偏移量:%s Px'% distance)
trace = self.get_trace(distance - 5)
self.move_to_gap(trace)
time.sleep(3)
def get_distance(self,bg_image,fullbg_image):
"""
获取缺口偏移量
:param bg_image:带缺口图片
:param fullbg_image:不带缺口图片
:return:
"""
distance = 60
for i in range(distance,fullbg_image.size[0]):
for j in range(fullbg_image.size[1]):
if not self.is_pixel_equal(fullbg_image,bg_image,i,j):
distance = i
return distance
return distance
def is_pixel_equal(self,bg_image,fullbg_image,x,y):
"""
判断两个像素是否相同
:param bg_image:带缺口图片
:param fullbg_image:不带缺口图片
:param x:位置x
:param y:位置y
:return:像素是否相同
"""
#获得两章图片对应像素点的RGB数据
bg_pixel = bg_image.load()[x,y]
fullbg_pixel = fullbg_image.load()[x,y]
#设定一个阈值,像素也许存在误差, 60作为容差范围
threshold = 60
#比较两张图 RGB 的 绝对值是否小于定义的阈值
for i in range(0,3):
if (abs(bg_pixel[i] - fullbg_pixel[i]<threshold)):
return True
# if (abs(bg_pixel[0] - fullbg_pixel[0] < threshold)and abs(bg_pixel[1] - fullbg_pixel[1] <threshold) and abs(bg_pixel[2] - fullbg_pixel[2] <threshold)):
#return True
return False
#模拟人工拉动滑块
def get_trace(self,distance):
"""
根据偏移量获取移动轨迹
:param distance:偏移量
:return:滑动轨迹
"""
trace = []
# 设置加速距离为总距离的 4 /5
mid = distance * (4 / 5)
#设置当前位移, 初始速度、时间间隔
current,v0 ,t = 0,0,0.1
while current< distance:
if current< mid:
#加速度为正10
a =10
else:
#减速度为负10
a = -10
#运用物理加速度位移相关公式 X=v0 * t+ 1/2 * a * t*t
#a 加速度 X 位移 v0 初速度
X = v0 * t +1/2 * a * t * t
#当前时刻的速度
v = v0 + a * t
v0 =v
current +=X
#记录每个时间间隔移动的多少位移
trace.append(round(X))
return trace
def move_to_gap(self,trace):
"""
拖动滑块到缺口处
:param trace:轨迹
:return:
"""
slider = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'div.geetest_slider_button')))
ActionChains(browser).click_and_hold(slider).perform()
for x in trace:
ActionChains(browser).move_by_offset(xoffset=x,yoffset=0).perform()
time.sleep(0.5)
ActionChains(browser).release().perform()
def crack(self):
self.init()
self.login()
self.find_code()
self.slide()
success = browser.current_url
if success == "https://www.bilibili.com/":
print("登录成功")
browser.close()
else:
self.crack()
if __name__ == '__main__':
bi = bilibili_code()
bi.crack()