Skip to content

Commit

Permalink
添加基于yolov4深度学习的验证码识别方式 (#74)
Browse files Browse the repository at this point in the history
* 添加深度学习识别方式

* 优化代码,以及微调了配置,小幅提升识别率
  • Loading branch information
dd178 authored Jun 26, 2021
1 parent c55b729 commit d98aa33
Show file tree
Hide file tree
Showing 6 changed files with 1,278 additions and 11 deletions.
29 changes: 23 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@

2. 填写`jd_wstool` 监听地址ip

如:监听地址1: http://192.168.0.101:5201,填在脚本开头 ipaddr= '192.168.0.101'
如:监听地址1: http://192.168.0.101:5201 ,填在脚本开头 ipaddr= '192.168.0.101'

3. 运行脚本

Expand Down Expand Up @@ -97,9 +97,13 @@

3. 第二关:图形验证码(任选以下一种类型,默认采用本地识别)

1. 本地识别(再也不用花钱了👍),来自[@AntonVanke](https://github.com/AntonVanke)
1. 本地识别(再也不用花钱了👍)

* 来自[@AntonVanke](https://github.com/AntonVanke)
大佬提供的 [JDCaptcha](https://github.com/AntonVanke/JDCaptcha) 项目(已集成)
[测试图在最后一张](https://github.com/yqchilde/JDMemberCloseAccount#screenshots)
[测试图在最后一张](https://github.com/yqchilde/JDMemberCloseAccount#screenshots)

* [@dd178](https://github.com/dd178) 使用 [yolov4](https://github.com/AlexeyAB/darknet) 训练的权重

2. 收费的打码平台

Expand Down Expand Up @@ -131,12 +135,19 @@
pip3 install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/
```

3. 下载对应的浏览器驱动放到项目的`drivers`文件夹下面
3. 下载必要文件

* `chrome`请访问`chrome://version/`查看浏览器的版本,然后去 [chromedriver](http://chromedriver.storage.googleapis.com/index.html)
3.1 下载对应的浏览器驱动放到项目的`drivers`文件夹下面

`chrome`请访问`chrome://version/`查看浏览器的版本,然后去 [chromedriver](http://chromedriver.storage.googleapis.com/index.html)
下载对应的版本/系统驱动(只需要保证版本号前三段一致即可,比如`91.0.4472.77`只需要保证`91.0.4472.x`就行),下载后解压,将其可执行文件(mac为`chromedriver`
,win为`chromedriver.exe`放在项目的`drivers`目录下即可)

3.2 如果想使用yolov4识别验证码

下载[权重文件](https://github.com/dd178/JDMemberCloseAccount/releases/download/v1.0.3/yolov4-custom.tar.gz) ,将`yolov4-custom.weights`解压至`yolov4`文件夹下


### 2. 补充配置文件

* `config.yaml`文件
Expand Down Expand Up @@ -193,14 +204,17 @@ sms_captcha:
aliyun_appcode: ""

# image_captcha 图形验证码相关
# image_captcha.type: 图形验证码类型,可选:local、cjy、tj
# image_captcha.type: 图形验证码类型,可选:local、cjy、tj、yolov4
# image_captcha.cjy_username: 超级鹰账号,仅在 image_captcha.type 为 cjy 时需要设置
# image_captcha.cjy_password: 超级鹰密码,仅在 image_captcha.type 为 cjy 时需要设置
# image_captcha.cjy_soft_id: 超级鹰软件ID,仅在 image_captcha.type 为 cjy 时需要设置
# image_captcha.cjy_kind: 超级鹰验证码类型,仅在 image_captcha.type 为 cjy 时需要设置,且该项目指定为 9101
# image_captcha.tj_username: 图鉴账号,仅在 image_captcha.type 为 tj 时需要设置
# image_captcha.tj_password: 图鉴密码,仅在 image_captcha.type 为 tj 时需要设置
# image_captcha.tj_type_id: 图鉴验证码类型,仅在 image_captcha.type 为 tj 时需要设置,且该项目指定为 19
# yolov4_weights: yolov4权重文件路径,仅在 image_captcha.type 为 yolov4 时需要设置
# yolov4_cfg: yolov4配置文件路径,仅在 image_captcha.type 为 yolov4 时需要设置
# CUDA: 尝试使用CUDA加速,据说速度可提升几倍到几十倍,需要编译安装opencv,仅在 image_captcha.type 为 yolov4 时需要设置
image_captcha:
type: "local"
cjy_username: ""
Expand All @@ -210,6 +224,9 @@ image_captcha:
tj_username: ""
tj_password: ""
tj_type_id: 19
yolov4_weights: "yolov4/yolov4-custom.weights"
yolov4_cfg: "yolov4/yolov4-custom.cfg"
CUDA: false

# user-agent 用户代理,可自行配置
user-agent:
Expand Down
72 changes: 72 additions & 0 deletions captcha/jd_yolo_captcha.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Time : 2021-06-21 11:33
# @Author : 178
import cv2
import base64
import numpy as np
import os
import sys

class JDyolocaptcha(object):
"""
yolov4类
"""
def __init__(self, _config):
self.CONFIDENCE_THRESHOLD = 0.8 # 最低置信度
self.NMS_THRESHOLD = 0.01 # 去除重复匹配
from utils.logger import Log
self.logger = Log().logger
weights = _config['yolov4_weights']
cfg = _config['yolov4_cfg']
if os.path.exists(weights):
self.net = cv2.dnn.readNet(weights, cfg)
else:
self.logger.error("找不到权重文件")
sys.exit(1)
if _config['CUDA']:
self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
# 由于两张图片大小不一样,因此要使用两个不同大小的网络去识别,否则识别率极低
self.cpc_model = cv2.dnn_DetectionModel(self.net)
self.pcp_model = cv2.dnn_DetectionModel(self.net)
self.cpc_model.setInputParams(size=(320, 320), scale=1/255, swapRB=True) # size为32的倍数,越大越慢,但不一定识别率越高
self.pcp_model.setInputParams(size=(224, 128), scale=1/255, swapRB=True) # size为32的倍数


def base64_conversion(self, data):
"""
base64转Mat
:param data:
:return:
"""
imgData = base64.b64decode(data.replace("data:image/jpg;base64,", ""))
nparr = np.frombuffer(imgData, np.uint8)
return cv2.imdecode(nparr, cv2.IMREAD_COLOR)


def identify(self, cpc, pcp):
"""
识别验证码并返回坐标
:param cpc:
:param pcp:
:return:
"""
try:
cpc_classes, cpc_scores, cpc_boxes = self.cpc_model.detect(cpc, self.CONFIDENCE_THRESHOLD, self.NMS_THRESHOLD)
pcp_classes, pcp_scores, pcp_boxes = self.pcp_model.detect(pcp, self.CONFIDENCE_THRESHOLD, self.NMS_THRESHOLD)
if pcp_classes[0] in cpc_classes: # 判断识别小图的结果是否在大图里面
x1, y1, x2, y2 = cpc_boxes[cpc_classes.tolist().index(pcp_classes[0])]
if x2 - x1 < 200: # 防止结果为背景,因此要剔除x差值在200以上的结果
r = (x1*2+x2)//2, (y1*2+y2)//2
return True, r
else:
return False, (None, None)
else:
return False, (None, None)
except:
return False, (None, None)


def JDyolo(self, cpc_img_path_base64, pcp_show_picture_path_base64):
return self.identify(self.base64_conversion(cpc_img_path_base64), self.base64_conversion(pcp_show_picture_path_base64))
8 changes: 7 additions & 1 deletion config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,17 @@ sms_captcha:
aliyun_appcode: ""

# image_captcha 图形验证码相关
# image_captcha.type: 图形验证码类型,可选:local、cjy、tj
# image_captcha.type: 图形验证码类型,可选:local、cjy、tj、yolov4
# image_captcha.cjy_username: 超级鹰账号,仅在 image_captcha.type 为 cjy 时需要设置
# image_captcha.cjy_password: 超级鹰密码,仅在 image_captcha.type 为 cjy 时需要设置
# image_captcha.cjy_soft_id: 超级鹰软件ID,仅在 image_captcha.type 为 cjy 时需要设置
# image_captcha.cjy_kind: 超级鹰验证码类型,仅在 image_captcha.type 为 cjy 时需要设置,且该项目指定为 9101
# image_captcha.tj_username: 图鉴账号,仅在 image_captcha.type 为 tj 时需要设置
# image_captcha.tj_password: 图鉴密码,仅在 image_captcha.type 为 tj 时需要设置
# image_captcha.tj_type_id: 图鉴验证码类型,仅在 image_captcha.type 为 tj 时需要设置,且该项目指定为 19
# yolov4_weights: yolov4权重文件路径,仅在 image_captcha.type 为 yolov4 时需要设置
# yolov4_cfg: yolov4配置文件路径,仅在 image_captcha.type 为 yolov4 时需要设置
# CUDA: 尝试使用CUDA加速,据说速度可提升几倍到几十倍,需要编译安装opencv,仅在 image_captcha.type 为 yolov4 时需要设置
image_captcha:
type: "local"
cjy_username: ""
Expand All @@ -66,6 +69,9 @@ image_captcha:
tj_username: ""
tj_password: ""
tj_type_id: 19
yolov4_weights: "yolov4/yolov4-custom.weights"
yolov4_cfg: "yolov4/yolov4-custom.cfg"
CUDA: false

# user-agent 用户代理,可自行配置
user-agent:
Expand Down
13 changes: 10 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from captcha.chaojiying import ChaoJiYing
from captcha.tujian import TuJian
from captcha.jd_captcha import JDcaptcha_base64
from captcha.jd_yolo_captcha import JDyolocaptcha
from utils.logger import Log
from utils.config import get_config
from utils.selenium_browser import get_browser
Expand Down Expand Up @@ -93,6 +94,8 @@ def __init__(self):
self.tj = TuJian(self.image_captcha_cfg)
elif self.image_captcha_cfg["type"] == "local":
pass
elif self.image_captcha_cfg["type"] == "yolov4":
self.JDyolo = JDyolocaptcha(self.image_captcha_cfg)
else:
WARN("请在config.yaml中补充image_captcha.type")

Expand Down Expand Up @@ -451,8 +454,12 @@ def local_auto_identify_captcha_click():
pcp_show_picture_path_base64 = self.wait.until(EC.presence_of_element_located(
(By.XPATH, '//*[@class="pcp_showPicture"]'))).get_attribute('src')
# 正在识别验证码
INFO("正在通过本地引擎识别")
res = JDcaptcha_base64(cpc_img_path_base64, pcp_show_picture_path_base64)
if self.image_captcha_cfg["type"] == "local":
INFO("正在通过本地引擎识别")
res = JDcaptcha_base64(cpc_img_path_base64, pcp_show_picture_path_base64)
else:
INFO("正在通过深度学习引擎识别")
res = self.JDyolo.JDyolo(cpc_img_path_base64, pcp_show_picture_path_base64)
if res[0]:
ActionChains(self.browser).move_to_element_with_offset(
cpc_img, int(res[1][0] * zoom),
Expand All @@ -475,7 +482,7 @@ def local_auto_identify_captcha_click():
return False

# 识别点击,如果有一次失败将再次尝试一次,再失败就跳过
if self.image_captcha_cfg["type"] == "local":
if self.image_captcha_cfg["type"] in ["local", "yolov4"]:
if not local_auto_identify_captcha_click():
INFO("验证码位置点击错误,尝试再试一次")
local_auto_identify_captcha_click()
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ Pillow~=8.2.0
numpy~=1.20.3
urllib3~=1.26.5
baidu-aip==2.2.18.0
websockets~=9.1
websockets~=9.1
opencv_python~=4.5.2.54
Loading

0 comments on commit d98aa33

Please sign in to comment.