Python爬虫高级案例,JS逆向,某手机反馈专区
前言
前段时间看到有人js逆向了某手机的反馈专区,我也第一时间学习了一下,学完后一直想着凭借自己的能力,看能不能单独完成一次,拿下js逆向真正第一血,所以就有了今天的受害者,某蓝厂手机圈子的逆向。
网站分析
既然选定了目标,那就开始抓包,分析网页。
这是抓包后的情况,通过对多个包进行比较发现,实际变化的参数只有lastId、nonce、timestamp、pageNum。具体分析了一下,lastId:前一页最后一个发言用户的tid;pageNum:当前页码;timestamp:13位时间戳;nonce:不知道是什么,但是看他长了一副加密的脸。至此,目标就很清晰了,重点针对nonce。
js逆向分析
通过全局搜过,断点定位,最终将目标锁定在这一行代码上。
e.params.nonce = Object(u["md5"])(t + "" + parseInt(1e7 * Math.random(), 10) + 1, 32)
通过观察发现,这一行代码的最终输出结果正是我们今天的目标。分析一下代码:
“t”:13位的时间戳
“+ "" +”:为将时间戳转为字符串
“1e7” :10000000,固定值
“Math.random()”:随机数
“parseInt”:取整
“t + "" +parseInt(1e7 * Math.random(), 10)+ 1”,这里的意思就很明显了,时间戳+取整的随机数+1,最终的结果是一个21位数。
继续分析Object(u["md5"])和32发现,这里是调用了u的[md5]方法,将前面的21位字符串和32作为参数,传给了MD5,网页源代码如下:
e.md5 = function(e, t) {
function n(e, t) {
return e << t | e >>> 32 - t
}
function i(e, t) {
var n, i, a, r, o;
return a = 2147483648 & e,
r = 2147483648 & t,
n = 1073741824 & e,
i = 1073741824 & t,
o = (1073741823 & e) + (1073741823 & t),
n & i ? 2147483648 ^ o ^ a ^ r : n | i ? 1073741824 & o ? 3221225472 ^ o ^ a ^ r : 1073741824 ^ o ^ a ^ r : o ^ a ^ r
}
…………//此处省略
目前为止,思路已经很清晰了,这里我们可以通过Python实现MD5加密,也可以抠源代码改写。为了保证百分百不出错,这里我选择了抠代码。
js代码改写
首先是源代码:
e.md5 = function(e, t) {
function n(e, t) {
return e << t | e >>> 32 - t
}
function i(e, t) {
var n, i, a, r, o;
return a = 2147483648 & e,
r = 2147483648 & t,
n = 1073741824 & e,
i = 1073741824 & t,
o = (1073741823 & e) + (1073741823 & t),
n & i ? 2147483648 ^ o ^ a ^ r : n | i ? 1073741824 & o ? 3221225472 ^ o ^ a ^ r : 1073741824 ^ o ^ a ^ r : o ^ a ^ r
}
function a(e, t, a, r, o, s, l) {
return e = i(e, i(i(function(e, t, n) {
return e & t | ~e & n
}(t, a, r), o), l)),
i(n(e, s), t)
}
function r(e, t, a, r, o, s, l) {
return e = i(e, i(i(function(e, t, n) {
return e & n | t & ~n
}(t, a, r), o), l)),
i(n(e, s), t)
}
function o(e, t, a, r, o, s, l) {
return e = i(e, i(i(function(e, t, n) {
return e ^ t ^ n
}(t, a, r), o), l)),
i(n(e, s), t)
}
function s(e, t, a, r, o, s, l) {
return e = i(e, i(i(function(e, t, n) {
return t ^ (e | ~n)
}(t, a, r), o), l)),
i(n(e, s), t)
}
function l(e) {
var t, n = "", i = "";
for (t = 0; t <= 3; t++)
n += (i = "0" + (e >>> 8 * t & 255).toString(16)).substr(i.length - 2, 2);
return n
}
var c, u, p, m, d, h, f, v, y, g = e, b = Array();
for (b = function(e) {
for (var t, n = e.length, i = n + 8, a = 16 * ((i - i % 64) / 64 + 1), r = Array(a - 1), o = 0, s = 0; s < n; )
o = s % 4 * 8,
r[t = (s - s % 4) / 4] = r[t] | e.charCodeAt(s) << o,
s++;
return t = (s - s % 4) / 4,
o = s % 4 * 8,
r[t] = r[t] | 128 << o,
r[a - 2] = n << 3,
r[a - 1] = n >>> 29,
r
}(g),
h = 1732584193,
f = 4023233417,
v = 2562383102,
y = 271733878,
c = 0; c < b.length; c += 16)
u = h,
p = f,
m = v,
d = y,
f = s(f = s(f = s(f = s(f = o(f = o(f = o(f = o(f = r(f = r(f = r(f = r(f = a(f = a(f = a(f = a(f, v = a(v, y = a(y, h = a(h, f, v, y, b[c + 0], 7, 3614090360), f, v, b[c + 1], 12, 3905402710), h, f, b[c + 2], 17, 606105819), y, h, b[c + 3], 22, 3250441966), v = a(v, y = a(y, h = a(h, f, v, y, b[c + 4], 7, 4118548399), f, v, b[c + 5], 12, 1200080426), h, f, b[c + 6], 17, 2821735955), y, h, b[c + 7], 22, 4249261313), v = a(v, y = a(y, h = a(h, f, v, y, b[c + 8], 7, 1770035416), f, v, b[c + 9], 12, 2336552879), h, f, b[c + 10], 17, 4294925233), y, h, b[c + 11], 22, 2304563134), v = a(v, y = a(y, h = a(h, f, v, y, b[c + 12], 7, 1804603682), f, v, b[c + 13], 12, 4254626195), h, f, b[c + 14], 17, 2792965006), y, h, b[c + 15], 22, 1236535329), v = r(v, y = r(y, h = r(h, f, v, y, b[c + 1], 5, 4129170786), f, v, b[c + 6], 9, 3225465664), h, f, b[c + 11], 14, 643717713), y, h, b[c + 0], 20, 3921069994), v = r(v, y = r(y, h = r(h, f, v, y, b[c + 5], 5, 3593408605), f, v, b[c + 10], 9, 38016083), h, f, b[c + 15], 14, 3634488961), y, h, b[c + 4], 20, 3889429448), v = r(v, y = r(y, h = r(h, f, v, y, b[c + 9], 5, 568446438), f, v, b[c + 14], 9, 3275163606), h, f, b[c + 3], 14, 4107603335), y, h, b[c + 8], 20, 1163531501), v = r(v, y = r(y, h = r(h, f, v, y, b[c + 13], 5, 2850285829), f, v, b[c + 2], 9, 4243563512), h, f, b[c + 7], 14, 1735328473), y, h, b[c + 12], 20, 2368359562), v = o(v, y = o(y, h = o(h, f, v, y, b[c + 5], 4, 4294588738), f, v, b[c + 8], 11, 2272392833), h, f, b[c + 11], 16, 1839030562), y, h, b[c + 14], 23, 4259657740), v = o(v, y = o(y, h = o(h, f, v, y, b[c + 1], 4, 2763975236), f, v, b[c + 4], 11, 1272893353), h, f, b[c + 7], 16, 4139469664), y, h, b[c + 10], 23, 3200236656), v = o(v, y = o(y, h = o(h, f, v, y, b[c + 13], 4, 681279174), f, v, b[c + 0], 11, 3936430074), h, f, b[c + 3], 16, 3572445317), y, h, b[c + 6], 23, 76029189), v = o(v, y = o(y, h = o(h, f, v, y, b[c + 9], 4, 3654602809), f, v, b[c + 12], 11, 3873151461), h, f, b[c + 15], 16, 530742520), y, h, b[c + 2], 23, 3299628645), v = s(v, y = s(y, h = s(h, f, v, y, b[c + 0], 6, 4096336452), f, v, b[c + 7], 10, 1126891415), h, f, b[c + 14], 15, 2878612391), y, h, b[c + 5], 21, 4237533241), v = s(v, y = s(y, h = s(h, f, v, y, b[c + 12], 6, 1700485571), f, v, b[c + 3], 10, 2399980690), h, f, b[c + 10], 15, 4293915773), y, h, b[c + 1], 21, 2240044497), v = s(v, y = s(y, h = s(h, f, v, y, b[c + 8], 6, 1873313359), f, v, b[c + 15], 10, 4264355552), h, f, b[c + 6], 15, 2734768916), y, h, b[c + 13], 21, 1309151649), v = s(v, y = s(y, h = s(h, f, v, y, b[c + 4], 6, 4149444226), f, v, b[c + 11], 10, 3174756917), h, f, b[c + 2], 15, 718787259), y, h, b[c + 9], 21, 3951481745),
h = i(h, u),
f = i(f, p),
v = i(v, m),
y = i(y, d);
return 32 == t ? l(h) + l(f) + l(v) + l(y) : l(f) + l(v)
}
其次是改写后的代码,这里遵循的是改的越少越好的原则:
function MD5 (e, t) {
function n(e, t) {
return e << t | e >>> 32 - t
}
function i(e, t) {
var n, i, a, r, o;
return a = 2147483648 & e,
r = 2147483648 & t,
n = 1073741824 & e,
i = 1073741824 & t,
o = (1073741823 & e) + (1073741823 & t),
n & i ? 2147483648 ^ o ^ a ^ r : n | i ? 1073741824 & o ? 3221225472 ^ o ^ a ^ r : 1073741824 ^ o ^ a ^ r : o ^ a ^ r
}
function a(e, t, a, r, o, s, l) {
return e = i(e, i(i(function(e, t, n) {
return e & t | ~e & n
}(t, a, r), o), l)),
i(n(e, s), t)
}
function r(e, t, a, r, o, s, l) {
return e = i(e, i(i(function(e, t, n) {
return e & n | t & ~n
}(t, a, r), o), l)),
i(n(e, s), t)
}
function o(e, t, a, r, o, s, l) {
return e = i(e, i(i(function(e, t, n) {
return e ^ t ^ n
}(t, a, r), o), l)),
i(n(e, s), t)
}
function s(e, t, a, r, o, s, l) {
return e = i(e, i(i(function(e, t, n) {
return t ^ (e | ~n)
}(t, a, r), o), l)),
i(n(e, s), t)
}
function l(e) {
var t, n = "",
i = "";
for (t = 0; t <= 3; t++)
n += (i = "0" + (e >>> 8 * t & 255).toString(16)).substr(i.length - 2, 2);
return n
}
var c, u, p, m, d, h, f, v, y, g = e,
b = Array();
for (b = function(e) {
for (var t, n = e.length, i = n + 8, a = 16 * ((i - i % 64) / 64 + 1), r = Array(a - 1), o = 0, s = 0; s < n;)
o = s % 4 * 8,
r[t = (s - s % 4) / 4] = r[t] | e.charCodeAt(s) << o,
s++;
return t = (s - s % 4) / 4,
o = s % 4 * 8,
r[t] = r[t] | 128 << o,
r[a - 2] = n << 3,
r[a - 1] = n >>> 29,
r
}(g),
h = 1732584193,
f = 4023233417,
v = 2562383102,
y = 271733878,
c = 0; c < b.length; c += 16)
u = h,
p = f,
m = v,
d = y,
f = s(f = s(f = s(f = s(f = o(f = o(f = o(f = o(f = r(f = r(f = r(f = r(f = a(f = a(f = a(f = a(f, v = a(v, y = a(y, h = a(h, f, v, y, b[c + 0], 7, 3614090360), f, v, b[c + 1], 12, 3905402710), h, f, b[c + 2], 17, 606105819), y, h, b[c + 3], 22, 3250441966), v = a(v, y = a(y, h = a(h, f, v, y, b[c + 4], 7, 4118548399), f, v, b[c + 5], 12, 1200080426), h, f, b[c + 6], 17, 2821735955), y, h, b[c + 7], 22, 4249261313), v = a(v, y = a(y, h = a(h, f, v, y, b[c + 8], 7, 1770035416), f, v, b[c + 9], 12, 2336552879), h, f, b[c + 10], 17, 4294925233), y, h, b[c + 11], 22, 2304563134), v = a(v, y = a(y, h = a(h, f, v, y, b[c + 12], 7, 1804603682), f, v, b[c + 13], 12, 4254626195), h, f, b[c + 14], 17, 2792965006), y, h, b[c + 15], 22, 1236535329), v = r(v, y = r(y, h = r(h, f, v, y, b[c + 1], 5, 4129170786), f, v, b[c + 6], 9, 3225465664), h, f, b[c + 11], 14, 643717713), y, h, b[c + 0], 20, 3921069994), v = r(v, y = r(y, h = r(h, f, v, y, b[c + 5], 5, 3593408605), f, v, b[c + 10], 9, 38016083), h, f, b[c + 15], 14, 3634488961), y, h, b[c + 4], 20, 3889429448), v = r(v, y = r(y, h = r(h, f, v, y, b[c + 9], 5, 568446438), f, v, b[c + 14], 9, 3275163606), h, f, b[c + 3], 14, 4107603335), y, h, b[c + 8], 20, 1163531501), v = r(v, y = r(y, h = r(h, f, v, y, b[c + 13], 5, 2850285829), f, v, b[c + 2], 9, 4243563512), h, f, b[c + 7], 14, 1735328473), y, h, b[c + 12], 20, 2368359562), v = o(v, y = o(y, h = o(h, f, v, y, b[c + 5], 4, 4294588738), f, v, b[c + 8], 11, 2272392833), h, f, b[c + 11], 16, 1839030562), y, h, b[c + 14], 23, 4259657740), v = o(v, y = o(y, h = o(h, f, v, y, b[c + 1], 4, 2763975236), f, v, b[c + 4], 11, 1272893353), h, f, b[c + 7], 16, 4139469664), y, h, b[c + 10], 23, 3200236656), v = o(v, y = o(y, h = o(h, f, v, y, b[c + 13], 4, 681279174), f, v, b[c + 0], 11, 3936430074), h, f, b[c + 3], 16, 3572445317), y, h, b[c + 6], 23, 76029189), v = o(v, y = o(y, h = o(h, f, v, y, b[c + 9], 4, 3654602809), f, v, b[c + 12], 11, 3873151461), h, f, b[c + 15], 16, 530742520), y, h, b[c + 2], 23, 3299628645), v = s(v, y = s(y, h = s(h, f, v, y, b[c + 0], 6, 4096336452), f, v, b[c + 7], 10, 1126891415), h, f, b[c + 14], 15, 2878612391), y, h, b[c + 5], 21, 4237533241), v = s(v, y = s(y, h = s(h, f, v, y, b[c + 12], 6, 1700485571), f, v, b[c + 3], 10, 2399980690), h, f, b[c + 10], 15, 4293915773), y, h, b[c + 1], 21, 2240044497), v = s(v, y = s(y, h = s(h, f, v, y, b[c + 8], 6, 1873313359), f, v, b[c + 15], 10, 4264355552), h, f, b[c + 6], 15, 2734768916), y, h, b[c + 13], 21, 1309151649), v = s(v, y = s(y, h = s(h, f, v, y, b[c + 4], 6, 4149444226), f, v, b[c + 11], 10, 3174756917), h, f, b[c + 2], 15, 718787259), y, h, b[c + 9], 21, 3951481745),
h = i(h, u),
f = i(f, p),
v = i(v, m),
y = i(y, d);
return 32 == t ? l(h) + l(f) + l(v) + l(y) : l(f) + l(v)
}
经过测试,代码能完美实现我想要的功能,
然后将代码保存为.js文件。
Python代码编写
常规操作
import requests
import random
import execjs
import json
import pandas as pd
import time
url = 'https://bbs.vivo.com.cn/api/community/forum/threads'
headers = {
'accept': 'application/json, text/plain, */*',
'content-type': 'application/json;charset=UTF-8',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4621.0 Safari/537.36',
'sec-ch-ua': '"Chromium";v="21", " Not;A Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'origin': 'https://bbs.vivo.com.cn',
'referer': 'https://bbs.vivo.com.cn/newbbs/forum/9',
'cookie': 'cookieId=e1c6727a-9b29-1c13-a417-1b74440b9d521639290997482; KL9d_2132_saltkey=pU2Rr4AV; KL9d_2132_lastvisit=1639287439; Hm_lvt_9ef7debb81babe8b94af7f2c274869fd=1639291140,1639713347; Hm_lvt_a7471116b9007c038d41873ab9121a9e=1639291040,1639713440; sessionId=b6c66b37-b88e-f74d-fa6b-b7e526d5e5f7'
}
这里虽然导入了好多包,但其实都是根据使用需要一个个导入的。
Python生成js需要的参数
def get_timestamp():
timestamp = int(time.time() * 1000) # 获取13位时间戳
return timestamp
def get_str_():
num = int(float(str(random.random() * 10000000)[:10])) # 获取随机数
str_ = str(get_timestamp()) + str(num) + '1' # 获取21位随机数
return str_
这里分别生成时间戳和21位拼接字符串
导入js文件,获取最重要的参数nonce
def get_cxt():
with open("1.js") as file: # 打开js文件
cxt = execjs.compile(file.read()) # 导入js文件
return cxt
def get_nonce():
nonce = get_cxt().call('md5', get_str_(), '32') # 调用js文件md5函数加密,获取nonce
return nonce
获取data 万事具备,下一步生成data,这里我选择了第一页作为测试。
def get_data(): # 获取第一页data
data = {
'forumId': "9",
'imgSpecs': ["t577x324", "t577x4096"],
'lastId': "",
'nonce': get_nonce(),
'order': '1',
'pageNum': '1',
'pageSize': '10',
'timestamp': get_timestamp(),
'topicId': ""
}
return data
发起请求,拿到数据
def main():
res = requests.post(url, headers=headers, data=json.dumps(get_data())).text # 请求第一页数据
datss = json.loads(res)['data']['list']
data_list = []
for data in datss:
bbsname = data['author']['bbsName']
name = data['forum']['name']
summary = data['summary']
tid = data['tid']
data_list.append({
'bbsname': bbsname,
'name': name,
'summary': summary,
'tid': tid
})
return data_list
if __name__ == '__main__':
df = pd.DataFrame(main())
# df.index = df.index + 1
print(df)
df.to_excel('手机圈子0.xlsx')
全部代码展示
import requests
import random
import execjs
import json
import pandas as pd
import time
url = 'https://bbs.vivo.com.cn/api/community/forum/threads'
headers = {
'accept': 'application/json, text/plain, */*',
'content-type': 'application/json;charset=UTF-8',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4621.0 Safari/537.36',
'sec-ch-ua': '"Chromium";v="21", " Not;A Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'origin': 'https://bbs.vivo.com.cn',
'referer': 'https://bbs.vivo.com.cn/newbbs/forum/9',
'cookie': 'cookieId=e1c6727a-9b29-1c13-a417-1b74440b9d521639290997482; KL9d_2132_saltkey=pU2Rr4AV; KL9d_2132_lastvisit=1639287439; Hm_lvt_9ef7debb81babe8b94af7f2c274869fd=1639291140,1639713347; Hm_lvt_a7471116b9007c038d41873ab9121a9e=1639291040,1639713440; sessionId=b6c66b37-b88e-f74d-fa6b-b7e526d5e5f7'
}
def get_timestamp():
timestamp = int(time.time() * 1000) # 获取13位时间戳
return timestamp
def get_str_():
num = int(float(str(random.random() * 10000000)[:10])) # 获取随机数
str_ = str(get_timestamp()) + str(num) + '1' # 获取21位随机数
return str_
def get_cxt():
with open("1.js") as file: # 打开js文件
cxt = execjs.compile(file.read()) # 导入js文件
return cxt
def get_nonce():
nonce = get_cxt().call('md5', get_str_(), '32') # 调用js文件md5函数加密,获取nonce
return nonce
def get_data(): # 获取第一页data
data = {
'forumId': "9",
'imgSpecs': ["t577x324", "t577x4096"],
'lastId': "",
'nonce': get_nonce(),
'order': '1',
'pageNum': '1',
'pageSize': '10',
'timestamp': get_timestamp(),
'topicId': ""
}
return data
def main():
res = requests.post(url, headers=headers, data=json.dumps(get_data())).text # 请求第一页数据
datss = json.loads(res)['data']['list']
data_list = []
for data in datss:
bbsname = data['author']['bbsName']
name = data['forum']['name']
summary = data['summary']
tid = data['tid']
data_list.append({
'bbsname': bbsname,
'name': name,
'summary': summary,
'tid': tid
})
return data_list
if __name__ == '__main__':
df = pd.DataFrame(main())
# df.index = df.index + 1
print(df)
df.to_excel('手机圈子0.xlsx')
成果展示
写在最后
1 这是我自己真正意义上第一次单独完成的js逆向,过程也很曲折,至此成功,发个文章,纪念一下,心里不由得WK一声。 2 特别感谢馒头哥的帮助,所有代码完成后,只要请求就报“客户端参数错误”,被这个问题困扰了很久,后来还是馒头哥发现,data的格式不对,需要用json.dumps()将数据编码,在这里再一次感谢。 3 至此虽然js逆向的工作已经结束了,但是这个网站的data里面还有一个lastid,要获取前一页最后一位用户的tid,但也不是什么难事,后续有时间再写个循环。