流程图:
1、页面分析:
可以发现post请求,经过试验,query=兄弟、sign='xxx'、token值(token值是固定的,sign数字签名加密,通过百度翻译开发文档寻找发现) 这三个参数最为重要,同时将需要翻译的源语言以及目标语言代码两个参数传入,这里为zh,en
1)首先寻找token值:(通过全局搜索,可以发现token值是固定的)
2)寻找v2transapi资源:首先观察可以发现先是通过langdetect的post请求生成v2transapi的post请求,因此需要关注这个资源下面是否存在sign的加密方法,通过寻找top整个资源找到v2transapi资源所在文件:index_xxx.js
3)寻找sign加密:在index_xxx.js中通过不断打断点寻找sign加密函数,寻找到与类似加密的函数(可以通过不断的断点去尝试),通过断点校验寻找
点击上述js资源链接跳转,通过断点调试可以寻找需要初始定义的i值,i值为固定值,(可能会更新,反爬):
4)js代码:sign加密所需的函数
function e(r) {
var i = '320305.131321201' ##i值初始化,与u值一致
var o = r.match(/[uD800-uDBFF][uDC00-uDFFF]/g);
if (null === o) {
var t = r.length;
t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))
} else {
for (var e = r.split(/[uD800-uDBFF][uDC00-uDFFF]/), C = 0, h = e.length, f = []; h > C; C++)
"" !== e[C] && f.push.apply(f, a(e[C].split(""))),
C !== h - 1 && f.push(o[C]);
var g = f.length;
g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))
}
var u = void 0
, l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
u = null !== i ? i : (i = window[l] || "") || "";
for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
var A = r.charCodeAt(v);
128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)),
S[c++] = A >> 18 | 240,
S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224,
S[c++] = A >> 6 & 63 | 128),
S[c++] = 63 & A | 128)
}
for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++)
p += S[b],
p = n(p, F);
return p = n(p, D), ##需要调用n()函数
p ^= s,
0 > p && (p = (2147483647 & p) + 2147483648),
p %= 1e6,
p.toString() + "." + (p ^ m)
}
##需要调用n()函数
function n(r, o) {
for (var t = 0; t < o.length - 2; t += 3) {
var a = o.charAt(t + 2);
a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a),
a = "+" === o.charAt(t + 1) ? r >>> a : r << a,
r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a
}
return r
}
5)python执行js代码:利用execjs模块执行
import execjs
def load_js(path):
with open(path,'r',encoding='utf-8') as f:
jscode = f.read()
return jscode
##加载文件
jscode = load_js('./jj.js')
rst = execjs.compile(jscode).call('e','兄弟')
print(rst)
发现sign值加密与页面分析一致,同时经过试验发现,sign加密只与r(这里即需要翻译的源内容有关,与语言类型无关)
6)观察返回结果的页面,可以从json数据中发现,dst:'brother' 即为所需结果
2、代码实现:
import requests
import json
import execjs
class trans(object):
#初始化
def __init__(self,yuan,target,url,word):
self.headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36',
'cookie': 'BIDUPSID=B3C094FD6A0FEE417C2703340B4AF2D7; PSTM=1603963404; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; BAIDUID=64509CE041C10A411C12FAC982B8D254:FG=1; __yjs_duid=1_c5abf95293c6246880dbd87c0ba25d5d1621231890175; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BDSFRCVID=Hx_OJeC62Cz2s3QHjIHqU6pq9mK0ML5TH6aoh4Mt40LUCjQoD1SUEG0P8x8g0Ku-S2-5ogKK3gOTH4DF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF=tb4qVCL5tI-3MDjYb4n_5bJH-UnLqhTZbgOZ0l8KtDI-qJj9jf74MRbW5fLL2-j-QItObn7mWIQHDU5k35OqebIFKl3rX4Qzban4KKJxLbLWeIJo5tKByxP4hUJiB5JMBan7_pbIXKohJh7FM4tW3J0ZyxomtfQxtNRJ0DnjtnLhbC_Cj5K2D6j-eUreaR3-HDrKBRbaHJOoDDvNyUQcy4LdjG5N3hLLfIo7-qc1MP5HKJ7PbtvKbt-V3-Aq54RGJCJD54TP2fbhVJOpXTohQfbQ02cPqP-jW5TahCLEtR7JOpkRbUnxy50vQRPH-Rv92DQMVU52QqcqEIQHQT3m5-5bbN3ht6IDJbKj_KKKfbo5KRopMtOhq4tehH4qbn39WDTOQJ7TtPQs8JcSMx7fbPDe5U7--47-WRvX-pbwBPbcfUnMKn05XM-pXbjMLtnK3mkjbPbOKl3fffAzhnAWQt4syP4eKMRnWnnRKfA-b4ncjRcTehoM3xI8LNj405OTbIFO0KJzJCF5hD09DjKKjjPDbfoJa46MHjn-WJOHat-_Hn7zeTQsyf4pbt-qJtrHbKFf3hn_WUTaHR76M-_BQ5_8bUbnBT5Ka-jMaU5uWII2slO4XKcHb5-kQN3TbhLO5bRiLRoEXbctDn3oyTbJXp0nj-Oly5jtMgOBBJ0yQ4b4OR5JjxonDh83bG7MJPKtfJCeVID2tKP3ejrP-trf5DCShUFsBnvdB2Q-5KL-0-FKqIOhy-6zyb_Ajl3Dq4ritgjr_xbdJJjojKbE5582XhLdMRQy0TogQmTxoUtyMInJhhvGqq-KqjLebPRiJ-b9Qg-JKpQ7tt5W8ncFbT7l5hKpbt-q0x-jLTnhVn0M5DK0HPonHjD2ejv03J; BCLID_BFESS=10263449539173583399; BDSFRCVID_BFESS=Hx_OJeC62Cz2s3QHjIHqU6pq9mK0ML5TH6aoh4Mt40LUCjQoD1SUEG0P8x8g0Ku-S2-5ogKK3gOTH4DF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF_BFESS=tb4qVCL5tI-3MDjYb4n_5bJH-UnLqhTZbgOZ0l8KtDI-qJj9jf74MRbW5fLL2-j-QItObn7mWIQHDU5k35OqebIFKl3rX4Qzban4KKJxLbLWeIJo5tKByxP4hUJiB5JMBan7_pbIXKohJh7FM4tW3J0ZyxomtfQxtNRJ0DnjtnLhbC_Cj5K2D6j-eUreaR3-HDrKBRbaHJOoDDvNyUQcy4LdjG5N3hLLfIo7-qc1MP5HKJ7PbtvKbt-V3-Aq54RGJCJD54TP2fbhVJOpXTohQfbQ02cPqP-jW5TahCLEtR7JOpkRbUnxy50vQRPH-Rv92DQMVU52QqcqEIQHQT3m5-5bbN3ht6IDJbKj_KKKfbo5KRopMtOhq4tehH4qbn39WDTOQJ7TtPQs8JcSMx7fbPDe5U7--47-WRvX-pbwBPbcfUnMKn05XM-pXbjMLtnK3mkjbPbOKl3fffAzhnAWQt4syP4eKMRnWnnRKfA-b4ncjRcTehoM3xI8LNj405OTbIFO0KJzJCF5hD09DjKKjjPDbfoJa46MHjn-WJOHat-_Hn7zeTQsyf4pbt-qJtrHbKFf3hn_WUTaHR76M-_BQ5_8bUbnBT5Ka-jMaU5uWII2slO4XKcHb5-kQN3TbhLO5bRiLRoEXbctDn3oyTbJXp0nj-Oly5jtMgOBBJ0yQ4b4OR5JjxonDh83bG7MJPKtfJCeVID2tKP3ejrP-trf5DCShUFsBnvdB2Q-5KL-0-FKqIOhy-6zyb_Ajl3Dq4ritgjr_xbdJJjojKbE5582XhLdMRQy0TogQmTxoUtyMInJhhvGqq-KqjLebPRiJ-b9Qg-JKpQ7tt5W8ncFbT7l5hKpbt-q0x-jLTnhVn0M5DK0HPonHjD2ejv03J; H_PS_PSSID=34835_34068_31660_34712_34599_34584_34505_34830_34579_34812_26350_34826_34736_34691_34672; delPer=0; ZD_ENTRY=baidu; BAIDUID_BFESS=64509CE041C10A411C12FAC982B8D254:FG=1; PSINO=6; __yjs_st=2_OWNhM2Y3YWUyMjM4Y2YxOWZmNWE4MGE1NzYzZjhiNDkxOGMwN2QzOGFkOGU1MTJiM2E1YjE0MmEzY2QyM2IwNDJiNmI4ODRiNzAwOTk5MDAxMTU3MDYwNWFmNzRkMmU3ODExZmVhOTM0ZTE0NDgxYmQxZGFiOTFjNGY0NGZlNjBiOWQ4YTY1ZjAyMWZlMDlmOTVlMDllM2Y2ZjU4ODM3MjJjMDBjYjk1YjJjYjJhZDJkNjBkZjIxNzE2YTcxNGE5OTQwZDM3OTRiOGFkYWFkMjI5ZTFlZmJhNDFmN2E2NDM4NjMxMzYxMDkwOTQwYTRjNWRkYjFhOTVjYjA3YTljMF83X2Y3YTJlYjY5; ab_sr=1.0.1_MDJjYzhlNWYyZmIwOGNlZDAwOTQwOGExZWZiNGI4OWM4MGFiZDk4N2M2NTlkMTA1OGRiZDdhMzY1NWQ5YzZhN2ZmYzA0Y2ZkN2ViYzcwMmY5NDhjYjQwYjg4ZDcyMmQyODg2NGE0MTE2MmRiZDc1MDRjNzBiNzRiNzJkNzczYWNjYTY2ZDYwY2I1NGIyZmQxMzNjMGFlZTAyOWI2ZjFkYw==; BA_HECTOR=0g010l058ga42180au1gmgtgf0r; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1634231163,1634231236,1634233204,1634237535; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1634237535'
}
##js加密
with open('./jj.js', 'r', encoding='utf-8') as f:
jscode = f.read()
sign = execjs.compile(jscode).call('e', word)
self.data = {
'from':yuan,
'to':target,
'query':word,
'token':'e74813bab0a8fd9f7fd385e807b79321',
'sign':sign
}
self.url = url
##post请求
def get_data(self):
response = requests.post(self.url,headers=self.headers,data=self.data)
return response.content
##json解析
def parse_data(self,data):
dict_data = json.loads(data)
return dict_data['trans_result']['data'][0]['dst']
def run(self):
response = self.get_data()
fanyi = self.parse_data(response)
return fanyi
##通用语言
def tongyong(language):
if language in ('汉语','中文','中文简体'):
return 'zh'
elif language in ('英语','英文'):
return 'en'
elif language in ('韩语','韩文'):
return 'kor'
elif language in ('日文','日语'):
return 'jp'
elif language in ('白话','粤语','广东话'):
return 'yue'
elif language in ('法语'):
return 'fra'
else:
return False
##输入语言校验
def check(language):
j = tongyong(language)
if j:
return j
else:
l = input("请重新输入:")
return check(l)
if __name__ == '__main__':
url = 'https://fanyi.baidu.com/v2transapi'
yuan = input("请输入需要翻译语言:")
##校验
yuan = check(yuan)
target = input("请输入目标语言:")
##校验
target = check(target)
word = input('请输入需要翻译内容:')
tran = trans(yuan,target,url,word)
a = tran.run()
print("翻译结果为: "+a)
结果(互动式)
3、UI界面实现(暂未实现)
4、参考链接:
百度翻译开放平台
Python反反爬系列(五)---小白学JS逆向(一)----百度翻译的sign值_郑德帅-CSDN博客



