在破解验证码的过程中,常常会遇到各种复杂的环境模拟和算法挑战。本文将详细介绍如何通过模拟验证码环境和理解关键算法来成功破解验证码。这些技巧来自于网络上各路大佬的分享,并经过总结归纳,旨在帮助大家更好地理解和应用这些技术。
1. 模拟环境中的 window.crypto.getRandomValues()
在某些验证码中,window.crypto.getRandomValues() 被用于生成随机数。我们可以通过以下代码来模拟该方法:
javascript
window = global;
window.crypto = {
getRandomValues: getRandomValues_
};
function randoms(min, max) {
return Math.floor(Math.random() * (max - min + 1) + min);
}
function getRandomValues_(buf) {
var min = 0,
max = 255;
if (buf.length > 65536) {
var e = new Error();
e.code = 22;
e.message = 'Failed to execute \'getRandomValues\' : The ' +
'ArrayBufferView\'s byte length (' + buf.length + ') ' +
'exceeds the number of bytes of entropy available via this API (65536).';
e.name = 'QuotaExceededError';
throw e;
}
if (buf instanceof Uint16Array) {
max = 65535;
} else if (buf instanceof Uint32Array) {
max = 4294967295;
}
for (var element in buf) {
buf[element] = randoms(min, max);
}
return buf;
}
测试代码
javascript
// 测试代码
var a = new Uint32Array(256);
console.log(window.crypto.getRandomValues(a));
2. 模拟 window.performance.timing
验证码还会使用 window.performance.timing 来获取一些性能指标。我们可以用以下代码来模拟这些性能数据:
javascript
function timing() {
var now = Date.now();
var tim = {
"navigationStart": now,
"unloadEventStart": now + 200,
"unloadEventEnd": now + 200,
"redirectStart": 0,
"redirectEnd": 0,
"fetchStart": now + 100,
"domainLookupStart": now + 150,
"domainLookupEnd": now + 250,
"connectStart": now + 30,
"connectEnd": now + 50,
"secureConnectionStart": now + 52,
"requestStart": now + 72,
"responseStart": now + 91,
"responseEnd": now + 92,
"domLoading": now + 99,
"domInteractive": now + 105,
"domContentLoadedEventStart": now + 105,
"domContentLoadedEventEnd": now + 111,
"domComplete": now + 111,
"loadEventStart": now + 111,
"loadEventEnd": now + 111
};
return tim;
}
3. 动态获取 window.gct
某些验证码会动态生成一些随机键值对,这些值会被用于加密或其他校验。我们可以通过以下代码动态获取这些键值对:
python
import re
import execjs
import requests
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
}
# 获取 gct.js 文件内容
gct_path = "https://static.geetest.com/static/js/gct.b71a9027509bc6bcfef9fc6a196424f5.js"
gct_js = requests.get(gct_path, headers=headers).text
# 提取需要调用的方法名称
function_name = re.findall(r"\)\)\{return (.*?)\(", gct_js)[0]
# 查找插入全局导出代码的位置
break_position = gct_js.find("return function(t){")
# 修改 gct.js 内容以导出全局方法
gct_js_new = gct_js[:break_position] + "window.gct=" + function_name + ";" + gct_js[break_position:]
gct_js_new = "window = global;" + gct_js_new + """
function getGct(){
var e = {"lang": "zh", "ep": "test data"};
window.gct(e);
delete e["lang"];
delete e["ep"];
return e;
}"""
# 执行修改后的 JS 代码获取键值对
gct = execjs.compile(gct_js_new).call("getGct")
print(gct) # 输出动态生成的键值对
4. 破解滑块验证码的轨迹生成
滑块验证码需要模拟用户拖动滑块的轨迹。我们可以利用缓动函数生成逼真的轨迹,例如 easeOutExpo:
python
import random
def __ease_out_expo(sep):
"""缓动函数 easeOutExpo"""
if sep == 1:
return 1
else:
return 1 - pow(2, -10 * sep)
def get_slide_track(distance):
"""根据滑动距离生成滑动轨迹"""
if not isinstance(distance, int) or distance < 0:
raise ValueError(f"distance类型必须是大于等于0的整数: distance: {distance}, type: {type(distance)}")
# 初始化轨迹列表
slide_track = [
[random.randint(-50, -10), random.randint(-50, -10), 0],
[0, 0, 0],
]
# 共记录count次滑块位置信息
count = 30 + int(distance / 2)
# 初始化滑动时间
t = random.randint(50, 100)
# 记录上一次滑动的距离
_x = 0
_y = 0
for i in range(count):
# 已滑动的横向距离
x = round(__ease_out_expo(i / count) * distance)
# 滑动过程消耗的时间
t += random.randint(10, 20)
if x == _x:
continue
slide_track.append([x, _y, t])
_x = x
slide_track.append(slide_track[-1])
return slide_track
5. 验证码识别方法
验证码识别主要有三种方法:
深度学习:使用 OpenCV 等工具进行验证码识别。
第三方开源库:如 ddddocr。
打码平台
以下是使用 OpenCV 识别滑块缺口的示例代码:
python
import cv2
import numpy as np
from PIL import Image
def imshow(img, winname='test', delay=0):
"""cv2展示图片"""
cv2.imshow(winname, img)
cv2.waitKey(delay)
cv2.destroyAllWindows()
def pil_to_cv2(img):
"""pil转cv2图片"""
img = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR)
return img
def bytes_to_cv2(img):
"""二进制图片转cv2"""
img_buffer_np = np.frombuffer(img, dtype=np.uint8)
img_np = cv2.imdecode(img_buffer_np, 1)
return img_np
def cv2_open(img, flag=None):
"""统一输出图片格式为cv2图像"""
if isinstance(img, bytes):
img = bytes_to_cv2(img)
elif isinstance(img, (str, Path)):
img = cv2.imread(str(img))
elif isinstance(img, np.ndarray):
img = img
elif isinstance(img, PIL.Image.Image):
img = pil_to_cv2(img)
else:
raise ValueError(f'输入的图片类型无法解析: {type(img)}')
if flag is not None:
img = cv2.cvtColor(img, flag)
return img
def get_distance(bg, tp, im_show=False, save_path=None):
"""计算滑块缺口位置"""
bg_img = cv2_open(bg)
tp_gray = cv2_open(tp, flag=cv2.COLOR_BGR2GRAY)
bg_shift = cv2.pyrMeanShiftFiltering(bg_img, 5, 50)
tp_gray = cv2.Canny(tp_gray, 255, 255)
bg_gray = cv2.Canny(bg_shift, 255, 255)
result = cv2.matchTemplate(bg_gray, tp_gray, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
distance = max_loc[0]
if save_path or im_show:
tp_height, tp_width = tp_gray.shape[:2]
x, y = max_loc
_x, _y = x + tp_width, y + tp_height
bg_img = cv2_open(bg)
cv2.rectangle(bg_img, (x, y), (_x, _y), (0, 0, 255), 2)
if save_path:
cv2.imwrite(save_path, bg_img)
if im_show:
imshow(bg_img)
return distance
更多内容联系1436423940