Python爬取酷狗MP3音频的步骤

时间:2021-05-23

分析问题

音频url

点入某个音乐的播放界面,通过F12-Network,分析数据,可以看到有一个index.php?..返回数据中有一个play_url,打开后正是我们需要的音频。

查看该url的headers,其params参数如下,通过反复不同的几次尝试,得知r、callback、dfid、mid、platid这几项不变,而通过初步的requests尝试,发现最后一项'_'可有可无,改变的只有hash和album_id两项。

r: play/getdatacallback: jQuery1910861615852090795_1612578519454hash: EF0DA656831F08B1FD2CB855BC38ED2Cdfid: 0Q0Clh1IcZaG3ey1J70RaTiLmid: b6cf66837b18642cc269390b066649dcplatid: 4album_id: 41669581_: 1612578519455

搜索url

得知改变的只有两项后,那就容易多了,在搜索歌曲界面Network,发现song?...该url返回值中有hash和album_id存在,我们之后只用搜索结果第一项(一般要搜的歌曲排名第一)。

同样分析其params参数,改变的只有keyword、signature、clienttime、mid、uuid。后三者可以比较容易看出其为毫秒级时间戳(13位),keyword也挺容易明白,signature哪里找呢?通过全局搜索signature,发现有一个js文件中含有该关键词。

callback: callback123keyword: 花海page: 1pagesize: 30bitrate: 0isfuzzy: 0tag: eminputtype: 0platform: WebFilteruserid: -1clientver: 2000iscorrection: 1privilege_filter: 0srcappid: 2919clienttime: 1612579100435mid: 1612579100435uuid: 1612579100435dfid: -signature: 472F60133C23184CAFC5005350C90229

JS

找到的js代码如下

"undefined" == typeof faultylabs && (faultylabs = {}),faultylabs.MD5 = function(a) { function b(a) { var b = (a >>> 0).toString(16); return "00000000".substr(0, 8 - b.length) + b } function c(a) { for (var b = [], c = 0; c < a.length; c++) b = b.concat(k(a[c])); return b } function d(a) { for (var b = [], c = 0; 8 > c; c++) b.push(255 & a), a >>>= 8; return b } function e(a, b) { return a << b & 4294967295 | a >>> 32 - b } function f(a, b, c) { return a & b | ~a & c } function g(a, b, c) { return c & a | ~c & b } function h(a, b, c) { return a ^ b ^ c } function i(a, b, c) { return b ^ (a | ~c) } function j(a, b) { return a[b + 3] << 24 | a[b + 2] << 16 | a[b + 1] << 8 | a[b] } function k(a) { for (var b = [], c = 0; c < a.length; c++) if (a.charCodeAt(c) <= 127) b.push(a.charCodeAt(c)); else for (var d = encodeURIComponent(a.charAt(c)).substr(1).split("%"), e = 0; e < d.length; e++) b.push(parseInt(d[e], 16)); return b } function l() { for (var a = "", c = 0, d = 0, e = 3; e >= 0; e--) d = arguments[e], c = 255 & d, d >>>= 8, c <<= 8, c |= 255 & d, d >>>= 8, c <<= 8, c |= 255 & d, d >>>= 8, c <<= 8, c |= d, a += b(c); return a } function m(a) { for (var b = new Array(a.length), c = 0; c < a.length; c++) b[c] = a[c]; return b } function n(a, b) { return 4294967295 & a + b } function o() { function a(a, b, c, d) { var f = v; v = u, u = t, t = n(t, e(n(s, n(a, n(b, c))), d)), s = f } var b = p.length; p.push(128); var c = p.length % 64; if (c > 56) { for (var k = 0; 64 - c > k; k++) p.push(0); c = p.length % 64 } for (k = 0; 56 - c > k; k++) p.push(0); p = p.concat(d(8 * b)); var m = 1732584193 , o = 4023233417 , q = 2562383102 , r = 271733878 , s = 0 , t = 0 , u = 0 , v = 0; for (k = 0; k < p.length / 64; k++) { s = m, t = o, u = q, v = r; var w = 64 * k; a(f(t, u, v), 3614090360, j(p, w), 7), a(f(t, u, v), 3905402710, j(p, w + 4), 12), a(f(t, u, v), 606105819, j(p, w + 8), 17), a(f(t, u, v), 3250441966, j(p, w + 12), 22), a(f(t, u, v), 4118548399, j(p, w + 16), 7), a(f(t, u, v), 1200080426, j(p, w + 20), 12), a(f(t, u, v), 2821735955, j(p, w + 24), 17), a(f(t, u, v), 4249261313, j(p, w + 28), 22), a(f(t, u, v), 1770035416, j(p, w + 32), 7), a(f(t, u, v), 2336552879, j(p, w + 36), 12), a(f(t, u, v), 4294925233, j(p, w + 40), 17), a(f(t, u, v), 2304563134, j(p, w + 44), 22), a(f(t, u, v), 1804603682, j(p, w + 48), 7), a(f(t, u, v), 4254626195, j(p, w + 52), 12), a(f(t, u, v), 2792965006, j(p, w + 56), 17), a(f(t, u, v), 1236535329, j(p, w + 60), 22), a(g(t, u, v), 4129170786, j(p, w + 4), 5), a(g(t, u, v), 3225465664, j(p, w + 24), 9), a(g(t, u, v), 643717713, j(p, w + 44), 14), a(g(t, u, v), 3921069994, j(p, w), 20), a(g(t, u, v), 3593408605, j(p, w + 20), 5), a(g(t, u, v), 38016083, j(p, w + 40), 9), a(g(t, u, v), 3634488961, j(p, w + 60), 14), a(g(t, u, v), 3889429448, j(p, w + 16), 20), a(g(t, u, v), 568446438, j(p, w + 36), 5), a(g(t, u, v), 3275163606, j(p, w + 56), 9), a(g(t, u, v), 4107603335, j(p, w + 12), 14), a(g(t, u, v), 1163531501, j(p, w + 32), 20), a(g(t, u, v), 2850285829, j(p, w + 52), 5), a(g(t, u, v), 4243563512, j(p, w + 8), 9), a(g(t, u, v), 1735328473, j(p, w + 28), 14), a(g(t, u, v), 2368359562, j(p, w + 48), 20), a(h(t, u, v), 4294588738, j(p, w + 20), 4), a(h(t, u, v), 2272392833, j(p, w + 32), 11), a(h(t, u, v), 1839030562, j(p, w + 44), 16), a(h(t, u, v), 4259657740, j(p, w + 56), 23), a(h(t, u, v), 2763975236, j(p, w + 4), 4), a(h(t, u, v), 1272893353, j(p, w + 16), 11), a(h(t, u, v), 4139469664, j(p, w + 28), 16), a(h(t, u, v), 3200236656, j(p, w + 40), 23), a(h(t, u, v), 681279174, j(p, w + 52), 4), a(h(t, u, v), 3936430074, j(p, w), 11), a(h(t, u, v), 3572445317, j(p, w + 12), 16), a(h(t, u, v), 76029189, j(p, w + 24), 23), a(h(t, u, v), 3654602809, j(p, w + 36), 4), a(h(t, u, v), 3873151461, j(p, w + 48), 11), a(h(t, u, v), 530742520, j(p, w + 60), 16), a(h(t, u, v), 3299628645, j(p, w + 8), 23), a(i(t, u, v), 4096336452, j(p, w), 6), a(i(t, u, v), 1126891415, j(p, w + 28), 10), a(i(t, u, v), 2878612391, j(p, w + 56), 15), a(i(t, u, v), 4237533241, j(p, w + 20), 21), a(i(t, u, v), 1700485571, j(p, w + 48), 6), a(i(t, u, v), 2399980690, j(p, w + 12), 10), a(i(t, u, v), 4293915773, j(p, w + 40), 15), a(i(t, u, v), 2240044497, j(p, w + 4), 21), a(i(t, u, v), 1873313359, j(p, w + 32), 6), a(i(t, u, v), 4264355552, j(p, w + 60), 10), a(i(t, u, v), 2734768916, j(p, w + 24), 15), a(i(t, u, v), 1309151649, j(p, w + 52), 21), a(i(t, u, v), 4149444226, j(p, w + 16), 6), a(i(t, u, v), 3174756917, j(p, w + 44), 10), a(i(t, u, v), 718787259, j(p, w + 8), 15), a(i(t, u, v), 3951481745, j(p, w + 36), 21), m = n(m, s), o = n(o, t), q = n(q, u), r = n(r, v) } return l(r, q, o, m).toUpperCase() } var p = null , q = null; return "string" == typeof a ? p = k(a) : a.constructor == Array ? 0 === a.length ? p = a : "string" == typeof a[0] ? p = c(a) : "number" == typeof a[0] ? p = a : q = typeof a[0] : "undefined" != typeof ArrayBuffer ? a instanceof ArrayBuffer ? p = m(new Uint8Array(a)) : a instanceof Uint8Array || a instanceof Int8Array ? p = m(a) : a instanceof Uint32Array || a instanceof Int32Array || a instanceof Uint16Array || a instanceof Int16Array || a instanceof Float32Array || a instanceof Float64Array ? p = m(new Uint8Array(a.buffer)) : q = typeof a : q = typeof a, q && alert("MD5 type mismatch, cannot process " + q), o()},function() { function a(a) { if (window.KgMobileCall) a && a(); else { var b = document.createElement("script"); b.src = "https://m3ws.kugou.com/static/js/common/mobilecall_3.0.js", b.onload = function() { this.readyState && "loaded" != this.readyState && "complete" != this.readyState || a && a() } , document.body.appendChild(b) } } function b(b, c, d) { b = b || {}, c = c || "", d = d || {}; var e, f = !1, g = "json"; "function" == typeof d ? e = d : (e = d.callback, f = d.useH5 || !1, g = d.postType || "json"); var h = {}; for (var i in b) !h[i] && (h[i] = b[i]); var j = function() { var a = navigator.userAgent.match(/KGBrowser/gi) ? !0 : !1 , b = navigator.userAgent.match(/kugouandroid/gi) ? !0 : !1 , c = "undefined" == typeof external ? !1 : "undefined" == typeof external.superCall ? !1 : !0; return c || b || a ? !0 : !1 }() , k = (new Date).getTime() , l = [] , m = {} , n = [] , o = [] , p = "NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt" , q = { appid: function(a) { return a() }, srcappid: function(a) { return a("2919") }, clientver: function(a) { return a("20000") }, "clienttime,mid,uuid,dfid": function(a) { return a({ clienttime: k, mid: k, uuid: k, dfid: "-" }) } } , r = function() { for (var a in q) l.push(a); !function(a) { function b(a) { if (a < l.length) q[l[a]](function(c) { if (c) if ("[object Object]" == Object.prototype.toString.call(c)) for (var d in c) m[d] = c[d]; else m[l[a]] = c; b(a + 1) }); else { for (var d in m) !h[d] && (h[d] = m[d]); for (var d in h) n.push(d); if (n.sort(), n.forEach(function(a) { o.push(a + "=" + h[a]) }), c) if ("[object Object]" == Object.prototype.toString.call(c)) if ("json" == g) o.push(JSON.stringify(c)); else { var f = []; for (var d in c) f.push(d + "=" + c[d]); o.push(f.join("&")) } else o.push(c); o.unshift(p), o.push(p), h.signature = faultylabs.MD5(o.join("")), e && e(h) } } b(a) }(0) }; if (c && ("[object Object]" != Object.prototype.toString.call(c) ? j = !1 : "urlencoded" == g && (j = !1)), j && !f) { var s = !1; a(function() { KgMobileCall.callCmd({ cmd: 764, jsonStr: JSON.stringify({ get: h, post: c }), callback: function(a) { if (s) return !1; if (s = !0, a && a.status) { delete a.status; for (var b in a) !h[b] && (h[b] = a[b]); return e && e(h) } j = !1, r() } }) }) } else j = !1, r() } "undefined" != typeof module && module.exports ? module.exports = b : "function" == typeof define && define.amd ? define(function() { return b }) : window.getInterFacePublic = b}();

在274行发现h.signature = faultylabs.MD5(o.join("")),初步理解为signature是o内部元素拼接成字符串,对其加上断点并将o加入watch。

0: "NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt"1: "bitrate=0"2: "callback=callback123"3: "clienttime=1612580098162"4: "clientver=2000"5: "dfid=-"6: "inputtype=0"7: "iscorrection=1"8: "isfuzzy=0"9: "keyword=花海"10: "mid=1612580098162"11: "page=1"12: "pagesize=30"13: "platform=WebFilter"14: "privilege_filter=0"15: "srcappid=2919"16: "tag=em"17: "userid=-1"18: "uuid=1612580098162"19: "NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt"length: 20

在watch里不难发现o为一个长度为20的数组,之后我们按之前理解将字符串拼接。

NVPh5oo715z5DIWAeQlhMDsWXXQV4hwtbitrate=0callback=callback123clienttime=1612580098162clientver=2000dfid=-inputtype=0iscorrection=1isfuzzy=0keyword=花海mid=1612580098162page=1pagesize=30platform=WebFilterprivilege_filter=0srcappid=2919tag=emuserid=-1uuid=1612580098162NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt

之后要js解密,这谁会?反正我不会 0.0,那也有办法,用python调用js文件。在274行分析为md5加密,往前找看看有没有相关函数,果真有一个,将其保存为kugou.js

"undefined" == typeof faultylabs && (faultylabs = {}),faultylabs.MD5 = function(a) { function b(a) { var b = (a >>> 0).toString(16); return "00000000".substr(0, 8 - b.length) + b } function c(a) { for (var b = [], c = 0; c < a.length; c++) b = b.concat(k(a[c])); return b } function d(a) { for (var b = [], c = 0; 8 > c; c++) b.push(255 & a), a >>>= 8; return b } function e(a, b) { return a << b & 4294967295 | a >>> 32 - b } function f(a, b, c) { return a & b | ~a & c } function g(a, b, c) { return c & a | ~c & b } function h(a, b, c) { return a ^ b ^ c } function i(a, b, c) { return b ^ (a | ~c) } function j(a, b) { return a[b + 3] << 24 | a[b + 2] << 16 | a[b + 1] << 8 | a[b] } function k(a) { for (var b = [], c = 0; c < a.length; c++) if (a.charCodeAt(c) <= 127) b.push(a.charCodeAt(c)); else for (var d = encodeURIComponent(a.charAt(c)).substr(1).split("%"), e = 0; e < d.length; e++) b.push(parseInt(d[e], 16)); return b } function l() { for (var a = "", c = 0, d = 0, e = 3; e >= 0; e--) d = arguments[e], c = 255 & d, d >>>= 8, c <<= 8, c |= 255 & d, d >>>= 8, c <<= 8, c |= 255 & d, d >>>= 8, c <<= 8, c |= d, a += b(c); return a } function m(a) { for (var b = new Array(a.length), c = 0; c < a.length; c++) b[c] = a[c]; return b } function n(a, b) { return 4294967295 & a + b } function o() { function a(a, b, c, d) { var f = v; v = u, u = t, t = n(t, e(n(s, n(a, n(b, c))), d)), s = f } var b = p.length; p.push(128); var c = p.length % 64; if (c > 56) { for (var k = 0; 64 - c > k; k++) p.push(0); c = p.length % 64 } for (k = 0; 56 - c > k; k++) p.push(0); p = p.concat(d(8 * b)); var m = 1732584193 , o = 4023233417 , q = 2562383102 , r = 271733878 , s = 0 , t = 0 , u = 0 , v = 0; for (k = 0; k < p.length / 64; k++) { s = m, t = o, u = q, v = r; var w = 64 * k; a(f(t, u, v), 3614090360, j(p, w), 7), a(f(t, u, v), 3905402710, j(p, w + 4), 12), a(f(t, u, v), 606105819, j(p, w + 8), 17), a(f(t, u, v), 3250441966, j(p, w + 12), 22), a(f(t, u, v), 4118548399, j(p, w + 16), 7), a(f(t, u, v), 1200080426, j(p, w + 20), 12), a(f(t, u, v), 2821735955, j(p, w + 24), 17), a(f(t, u, v), 4249261313, j(p, w + 28), 22), a(f(t, u, v), 1770035416, j(p, w + 32), 7), a(f(t, u, v), 2336552879, j(p, w + 36), 12), a(f(t, u, v), 4294925233, j(p, w + 40), 17), a(f(t, u, v), 2304563134, j(p, w + 44), 22), a(f(t, u, v), 1804603682, j(p, w + 48), 7), a(f(t, u, v), 4254626195, j(p, w + 52), 12), a(f(t, u, v), 2792965006, j(p, w + 56), 17), a(f(t, u, v), 1236535329, j(p, w + 60), 22), a(g(t, u, v), 4129170786, j(p, w + 4), 5), a(g(t, u, v), 3225465664, j(p, w + 24), 9), a(g(t, u, v), 643717713, j(p, w + 44), 14), a(g(t, u, v), 3921069994, j(p, w), 20), a(g(t, u, v), 3593408605, j(p, w + 20), 5), a(g(t, u, v), 38016083, j(p, w + 40), 9), a(g(t, u, v), 3634488961, j(p, w + 60), 14), a(g(t, u, v), 3889429448, j(p, w + 16), 20), a(g(t, u, v), 568446438, j(p, w + 36), 5), a(g(t, u, v), 3275163606, j(p, w + 56), 9), a(g(t, u, v), 4107603335, j(p, w + 12), 14), a(g(t, u, v), 1163531501, j(p, w + 32), 20), a(g(t, u, v), 2850285829, j(p, w + 52), 5), a(g(t, u, v), 4243563512, j(p, w + 8), 9), a(g(t, u, v), 1735328473, j(p, w + 28), 14), a(g(t, u, v), 2368359562, j(p, w + 48), 20), a(h(t, u, v), 4294588738, j(p, w + 20), 4), a(h(t, u, v), 2272392833, j(p, w + 32), 11), a(h(t, u, v), 1839030562, j(p, w + 44), 16), a(h(t, u, v), 4259657740, j(p, w + 56), 23), a(h(t, u, v), 2763975236, j(p, w + 4), 4), a(h(t, u, v), 1272893353, j(p, w + 16), 11), a(h(t, u, v), 4139469664, j(p, w + 28), 16), a(h(t, u, v), 3200236656, j(p, w + 40), 23), a(h(t, u, v), 681279174, j(p, w + 52), 4), a(h(t, u, v), 3936430074, j(p, w), 11), a(h(t, u, v), 3572445317, j(p, w + 12), 16), a(h(t, u, v), 76029189, j(p, w + 24), 23), a(h(t, u, v), 3654602809, j(p, w + 36), 4), a(h(t, u, v), 3873151461, j(p, w + 48), 11), a(h(t, u, v), 530742520, j(p, w + 60), 16), a(h(t, u, v), 3299628645, j(p, w + 8), 23), a(i(t, u, v), 4096336452, j(p, w), 6), a(i(t, u, v), 1126891415, j(p, w + 28), 10), a(i(t, u, v), 2878612391, j(p, w + 56), 15), a(i(t, u, v), 4237533241, j(p, w + 20), 21), a(i(t, u, v), 1700485571, j(p, w + 48), 6), a(i(t, u, v), 2399980690, j(p, w + 12), 10), a(i(t, u, v), 4293915773, j(p, w + 40), 15), a(i(t, u, v), 2240044497, j(p, w + 4), 21), a(i(t, u, v), 1873313359, j(p, w + 32), 6), a(i(t, u, v), 4264355552, j(p, w + 60), 10), a(i(t, u, v), 2734768916, j(p, w + 24), 15), a(i(t, u, v), 1309151649, j(p, w + 52), 21), a(i(t, u, v), 4149444226, j(p, w + 16), 6), a(i(t, u, v), 3174756917, j(p, w + 44), 10), a(i(t, u, v), 718787259, j(p, w + 8), 15), a(i(t, u, v), 3951481745, j(p, w + 36), 21), m = n(m, s), o = n(o, t), q = n(q, u), r = n(r, v) } return l(r, q, o, m).toUpperCase() } var p = null , q = null; return "string" == typeof a ? p = k(a) : a.constructor == Array ? 0 === a.length ? p = a : "string" == typeof a[0] ? p = c(a) : "number" == typeof a[0] ? p = a : q = typeof a[0] : "undefined" != typeof ArrayBuffer ? a instanceof ArrayBuffer ? p = m(new Uint8Array(a)) : a instanceof Uint8Array || a instanceof Int8Array ? p = m(a) : a instanceof Uint32Array || a instanceof Int32Array || a instanceof Uint16Array || a instanceof Int16Array || a instanceof Float32Array || a instanceof Float64Array ? p = m(new Uint8Array(a.buffer)) : q = typeof a : q = typeof a, q && alert("MD5 type mismatch, cannot process " + q), o()}

之后用python的pyexecjs库调用, 但是注意调用的时候的名字是execjs。

代码实现

"""data: 2021/02/05通过搜索爬取酷狗音乐,付费音乐暂时只能爬取试听部分。"""import requestsimport reimport jsonimport timeimport execjsdef get_signature(text): """ 获取signature值 :param text: 格式化之后的字符串 :return: 返回酷狗网站上加密后的signature """ # 读取js文件内容 with open("kugou.js", "r", encoding='utf-8') as f: js_str = f.read() # 通过js文件中逻辑数据,对文件进行加密 if js_str: js_obj = execjs.compile(js_str) return js_obj.call('faultylabs.MD5', text)def get_url(keyword): """ 获取搜索之后的url :param keyword: 搜索词,如晴天 :return: 返回完整的url地址 """ search = "https://complexsearch.kugou.com/v2/search/song?callback=callback123&keyword={keyword}&page=1&pagesize=30&bitrate=0&isfuzzy=0&tag=em&inputtype=0&platform=WebFilter&userid=-1&clientver=2000&iscorrection=1&privilege_filter=0&srcappid=2919&clienttime={time}&mid={time}&uuid={time}&dfid=-&signature={signature}" key_code = "NVPh5oo715z5DIWAeQlhMDsWXXQV4hwtbitrate=0callback=callback123clienttime={time}clientver=2000dfid=-inputtype=0iscorrection=1isfuzzy=0keyword={keyword}mid={time}page=1pagesize=30platform=WebFilterprivilege_filter=0srcappid=2919tag=emuserid=-1uuid={time}NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt" # 获得13位时间戳 millis = str(round(time.time() * 1000)) p = key_code.format(time=millis, keyword=keyword) signature = get_signature(p) # print(signature) search_url = search.format(keyword=keyword, time=millis, signature=signature) return search_urldef get_data(url): headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.146 Safari/537.36', 'referer': 'https://', } res = requests.post(url=url, params=params, headers=headers) data = re.findall('jQuery191019800824574510756_1612519333214\((.*?)\);', res.text, re.S)[0] json_data = json.loads(data) audio_name = json_data['data']['audio_name'] play_url = json_data['data']['play_url'] save_mp3(audio_name, play_url)def save_mp3(audio_name, play_url): """ 保持MP3文件 :param audio_name: 传入命名 :param play_url: 传入音频url :return: none """ content = requests.get(play_url).content with open(audio_name + '.mp3', mode='ab') as f: f.write(content)if __name__ == '__main__': try: keyword = input('请输入要搜索的歌曲名称:') hash_value, album_id = get_data(get_url(keyword)) get_mp3(hash_value, album_id) except Exception as e: print('请输入正确歌曲名称。')

以上就是Python爬取酷狗MP3音频的步骤的详细内容,更多关于Python爬取酷狗MP3音频的资料请关注其它相关文章!

声明:本页内容来源网络,仅供用户参考;我单位不保证亦不表示资料全面及准确无误,也不保证亦不表示这些资料为最新信息,如因任何原因,本网内容或者用户因倚赖本网内容造成任何损失或损害,我单位将不会负任何法律责任。如涉及版权问题,请提交至online#300.cn邮箱联系删除。

相关文章