本文出自: https://www.cnblogs.com/jackzhuo/p/18586281,并非原创,如需要转载请移步自原文并注明出处
1. 安装DrissionPage
pip install DrissionPage
# 指定pip源,如豆瓣
pip install DrissionPage -i https://pypi.doubanio.com/simple
2. 安装ddddocr
pip install ddddocr
# 指定pip源,如豆瓣
pip install ddddocr -i https://pypi.doubanio.com/simple
3. 滑动验证码-带缺口滑块
from DrissionPage import ChromiumPage,ChromiumOptions
import random
import time
import ddddocr
# 浏览器路径
path = r'C:\Program Files\Google\Chrome\Application\chrome.exe'
co = ChromiumOptions().set_browser_path(path)
# 打开浏览器(全局)
page = ChromiumPage(co)
# 目标网站
page.get("https://xxxx.yyyy.ccc.cn/biz/slideCaptchaindex.html",retry=3,interval=2,timeout=15)
# 滑块的运动轨迹
def get_tracks(distance):
value = round(random.uniform(0.55,0.78),2)
v,t,sum1 = 0,0.3,0
plus = []
mid = distance * value
while sum1 < distance:
if sum1 < mid:
a = round(random.uniform(2.5,3.5),1)
else:
a = -round(random.uniform(2.0,3.0),1)
s = v * t + 0.5 * a * (t ** 2)
v = v + a * t
sum1 += s
plus.append(round(s))
return plus
for i in range(5):
# 获取滑块背景图
background_bytes = page.ele('xpath://*[@id="oriImg"]').src()
# 获取缺口图片
cut_bytes = page.ele('xpath://*[@id="cutImg"]').src()
# 识别滑块缺口并获得滑动轨迹
det = ddddocr.DdddOcr(det=False,ocr=False,show_ad=False)
# 获得滑块距离(通过滑块图片和缺口图片计算出滑块的距离)
result = det.slide_match(cut_bytes,background_bytes,simple_target=True)
print("滑块距离:",result)
# 获得滑块运动轨迹
offset = result['target'][0]
tracks = get_tracks(offset)
print("滑动轨迹:",tracks)
# 滑动滑块
# 按动鼠标左键不放
page.actions.hold('xpath://*[@id="slider"]')
# 鼠标滑动
for track in tracks:
# 使鼠标相对当前位置移动若干距离
page.actions.move(offset_x=track,offset_y=round(random.uniform(1.0,3.0),1),duration=.1)
time.sleep(0.1)
# 释放鼠标左键
page.actions.release('xpath://*[@id="slider"]')
# 截图
page.ele('xpath://*[@id="captchadiv"]').get_screenshot(path='./captcha.jpg')
time.sleep(5)
# 如果验证失败
if '验证' in page.ele('xpath:/html/body/div[6]/div/div[2]').text:
print('滑动失败,刷新滑块')
page.ele('xpath://*[@id="captchadiv"]/div[1]/img').click()
else:
print("滑动成功")
break
print('开始采集')
4. 滑动验证码-滑到底
import time
import random
from DrissionPage import Chromium
# 接管已经打开的浏览器
browser = Chromium(9333)
cp = browser.latest_tab
def get_tracks(distance=255):
value = round(random.uniform(0.55,0.78),2)
v,t,sum1 = 0,0.3,0
plus = []
mid = distance * value
while sum1 < distance:
if sum1 < mid:
a = round(random.uniform(2.5,3.5),1)
else:
a = -round(random.uniform(2.0,3.0),1)
s = v * t + 0.5 * a * (t ** 2)
v = v + a * t
sum1 += s
plus.append(round(s))
# 计算 plus当plus值小于distance,加上相差数再加上随机一个值
res = 0
for i in plus:
res += i
if res < distance:
diff_value = distance -res
if diff_value <= 10:
add_value = diff_value + random.randint(3, 6)
else:
add_value = diff_value + random.randint(1, 3)
index3 = random.randint(4, len(plus) - 3)
plus.insert(index3,add_value)
return plus
def get_tracks2(distance=255):
value = round(random.uniform(0.55,0.78),2)
v,t,sum1 = 0,0.3,0
plus = []
mid = distance * value
while sum1 < distance:
if sum1 < mid:
a = round(random.uniform(2.5,3.5),1)
else:
a = -round(random.uniform(2.0,3.0),1)
s = v * t + 0.5 * a * (t ** 2)
v = v + a * t
sum1 += s
plus.append(round(s))
# 强制插入负数
count = 0
zi = random.randint(1,2) # 表示可以随机几个负数
while count <= zi:
index1 = random.randint(1, len(plus) - 3)
if plus[index1] > 0:
value = plus[index1]
print("value",value)
plus[index1] = -value
index2 = random.randint(1, len(plus) - 4)
plus.insert(index2,value*2)
count += 1
# 计算 plus当plus值小于distance,加上相差数再加上随机一个值
res = 0
for i in plus:
res += i
if res < distance:
diff_value = distance -res
if diff_value <= 10:
add_value = diff_value + random.randint(3, 6)
else:
add_value = diff_value + random.randint(1, 3)
index3 = random.randint(4, len(plus) - 3)
plus.insert(index3,add_value)
return plus
def generate_random_number(start=0.01,end=0.03):
return round(random.uniform(start, end),3)
def get_random():
num1_float = random.randint(-5, -1)
num2_float = random.randint(1, 5)
return {"num1":num1_float,"num2":num2_float}
def slide(num=0):
# cp.wait.eles_loaded('@id=nc_1_n1z')
ele = cp.ele('@id=nc_1_n1z')
value_ele1 = cp.ele('@id=`nc_1_refresh1`')
if ele:
print("处理滑动......")
ele.wait.clickable()
# cp.actions.move_to(ele, duration=0.5).hold().move(82, random.uniform(-5, 5), duration=0.1).move(63, random.uniform(-5, 5), duration=0.2).move(41, random.uniform(-5, 5), duration=0.3).move(100, random.uniform(-5, 5),duration=0.5).release()
# time.sleep(2)
cp.actions.hold('xpath://span[@id="nc_1_n1z"]')
# 随机函数名处理轨迹
my_array = ["get_tracks", "get_tracks2"]
random_choice = random.choice(my_array)
if random_choice == "get_tracks":
tracks = get_tracks(255)
else:
tracks = get_tracks2(255)
print("当前处理轨迹函数是: {}".format(random_choice))
print("当前滑动距离列表: {}".format(tracks))
# 随机生成从-5/5的两个数
random_res = get_random()
num1 = random_res.get("num1",-5)
num2 = random_res.get("num2",5)
for key,track in enumerate(tracks):
# 使鼠标相对当前位置移动若干距离
# time.sleep(generate_random_number())
if num == 3 and key == round(len(tracks)/1.3): # 重试次数过多,先执行一半释放鼠标左键
print("尝试次数过多...",round(len(tracks)/1.3),len(tracks))
cp.actions.move(offset_x=track, offset_y=round(random.uniform(num1, num2), 1), duration=0.1).release()
return slide(num+1)
if num == 6:
# 刷新浏览器
cp.refresh()
return slide(num + 1)
if key+1 == len(tracks):
cp.actions.move(offset_x=track, offset_y=round(random.uniform(num1, num2), 1), duration=0.1).release()
else:
cp.actions.move(offset_x=track, offset_y=round(random.uniform(num1, num2), 1), duration=0.1)
value_ele = cp.ele('@id=`nc_1_refresh1`')
value_ele2 = cp.ele('@id=nc_1__scale_text')
if value_ele:
# if "验证失败,点击框体重试" in value_ele.text:
num += 1
print("滑动验证失败,进行重试")
value_ele.click()
return slide(num)
elif value_ele2:
num += 1
print("滑动验证失败,进行重试")
value_ele2.click()
return slide(num)
elif value_ele1:
print("先点击重试再进行识别......")
if value_ele1:
# if "验证失败,点击框体重试" in value_ele1.text:
value_ele1.click()
return slide(num)
else:
return {"res":False,"msg":"加载滑动验证码失败"}
return {"res":True,"msg":"加载滑动验证码处理成功"}
print(slide())