网站开发 网页设计北京师范大学出版社,湖北正规网站建设检修,wordpress上传svg,个人养老金制度将推注意#xff1a;本次爬取的音乐仅有1分钟试听#xff0c;仅作学习爬虫的原理#xff0c;完整音乐需要自行下载客户端。 一、 初步分析
登陆酷狗音乐后随机选取一首歌#xff0c;在请求里发现一段mp3文件#xff0c;复制网址#xff0c;确实是我们需要的url。 复制音频的… 注意本次爬取的音乐仅有1分钟试听仅作学习爬虫的原理完整音乐需要自行下载客户端。 一、 初步分析
登陆酷狗音乐后随机选取一首歌在请求里发现一段mp3文件复制网址确实是我们需要的url。 复制音频的名字搜索找到发起请求的网址发现是在songinfo里 查看参数和请求头刷新一次查看是否有哪些参数是变化的。可以发现图中两次请求的这些参数都不同接下来就寻找这些参数的生成方式参数clienttime为时间戳那么就只需要找到signature的生成方式就可以了 二、分析参数signature
1. 分析过程
搜索参数signature并在可能生成的位置打上断点然后刷新网页 网页断在了此处可以看见参数signature跟函数d与数组s有关。 补充如果s的长度不为13需要放行一下点击这个按钮 查看函数d的定义 发现函数内部没有wordsToBytes函数和函数i的相关定义那么打下断点查看具体函数的位置 获得wordsToBytes函数的具体定义 获得i函数的具体定义 i函数里没有r.stringToBytes(t)的相关定义继续打下断点 找到r.stringToBytes(t)的相关定义 接着查看s的内容在console里查看s的内容发现s的值跟之前请求的参数类似。而且下标为0和下标为12的值跟u的值相同 往上查找u的定义发现u的值是固定的 2. 代码实现
那么就开始实现生成signature的代码以下为JavaScript代码
function bytesToWords(t) {for (var n [], r 0, e 0; r t.length; r,e 8)n[e 5] | t[r] 24 - e % 32;return n
}function rotl(t, n) {return t n | t 32 - n
}function endian(t) {if (t.constructor Number)return 16711935 rotl(t, 8) | 4278255360 rotl(t, 24);for (var n 0; n t.length; n)t[n] endian(t[n]);return t
}function i(t, c) {var l {utf8: {stringToBytes: function (t) {return l.bin.stringToBytes(unescape(encodeURIComponent(t)))},bytesToString: function (t) {return decodeURIComponent(escape(l.bin.bytesToString(t)))}},bin: {stringToBytes: function (t) {for (var n [], r 0; r t.length; r)n.push(255 t.charCodeAt(r));return n},bytesToString: function (t) {for (var n [], r 0; r t.length; r)n.push(String.fromCharCode(t[r]));return n.join()}}};i._ff function (t, n, r, e, o, i, c) {var s t (n r | ~n e) (o 0) c;return (s i | s 32 - i) n},i._gg function (t, n, r, e, o, i, c) {var s t (n e | r ~e) (o 0) c;return (s i | s 32 - i) n},i._hh function (t, n, r, e, o, i, c) {var s t (n ^ r ^ e) (o 0) c;return (s i | s 32 - i) n},i._ii function (t, n, r, e, o, i, c) {var s t (r ^ (n | ~e)) (o 0) c;return (s i | s 32 - i) n};t.constructor String ? t c binary c.encoding ? o.stringToBytes(t) : l.utf8.stringToBytes(t) : e(t) ? t Array.prototype.slice.call(t, 0) : Array.isArray(t) || (t t.toString());for (var s bytesToWords(t), a 8 * t.length, l 1732584193, u -271733879, f -1732584194, d 271733878, g 0; g s.length; g)s[g] 16711935 (s[g] 8 | s[g] 24) | 4278255360 (s[g] 24 | s[g] 8);s[a 5] | 128 a % 32,s[14 (a 64 9 4)] a;for (var b i._ff, p i._gg, h i._hh, m i._ii, g 0; g s.length; g 16) {var y l, j u, S f, v d;u m(u m(u m(u m(u h(u h(u h(u h(u p(u p(u p(u p(u b(u b(u b(u b(u, f b(f, d b(d, l b(l, u, f, d, s[g 0], 7, -680876936), u, f, s[g 1], 12, -389564586), l, u, s[g 2], 17, 606105819), d, l, s[g 3], 22, -1044525330), f b(f, d b(d, l b(l, u, f, d, s[g 4], 7, -176418897), u, f, s[g 5], 12, 1200080426), l, u, s[g 6], 17, -1473231341), d, l, s[g 7], 22, -45705983), f b(f, d b(d, l b(l, u, f, d, s[g 8], 7, 1770035416), u, f, s[g 9], 12, -1958414417), l, u, s[g 10], 17, -42063), d, l, s[g 11], 22, -1990404162), f b(f, d b(d, l b(l, u, f, d, s[g 12], 7, 1804603682), u, f, s[g 13], 12, -40341101), l, u, s[g 14], 17, -1502002290), d, l, s[g 15], 22, 1236535329), f p(f, d p(d, l p(l, u, f, d, s[g 1], 5, -165796510), u, f, s[g 6], 9, -1069501632), l, u, s[g 11], 14, 643717713), d, l, s[g 0], 20, -373897302), f p(f, d p(d, l p(l, u, f, d, s[g 5], 5, -701558691), u, f, s[g 10], 9, 38016083), l, u, s[g 15], 14, -660478335), d, l, s[g 4], 20, -405537848), f p(f, d p(d, l p(l, u, f, d, s[g 9], 5, 568446438), u, f, s[g 14], 9, -1019803690), l, u, s[g 3], 14, -187363961), d, l, s[g 8], 20, 1163531501), f p(f, d p(d, l p(l, u, f, d, s[g 13], 5, -1444681467), u, f, s[g 2], 9, -51403784), l, u, s[g 7], 14, 1735328473), d, l, s[g 12], 20, -1926607734), f h(f, d h(d, l h(l, u, f, d, s[g 5], 4, -378558), u, f, s[g 8], 11, -2022574463), l, u, s[g 11], 16, 1839030562), d, l, s[g 14], 23, -35309556), f h(f, d h(d, l h(l, u, f, d, s[g 1], 4, -1530992060), u, f, s[g 4], 11, 1272893353), l, u, s[g 7], 16, -155497632), d, l, s[g 10], 23, -1094730640), f h(f, d h(d, l h(l, u, f, d, s[g 13], 4, 681279174), u, f, s[g 0], 11, -358537222), l, u, s[g 3], 16, -722521979), d, l, s[g 6], 23, 76029189), f h(f, d h(d, l h(l, u, f, d, s[g 9], 4, -640364487), u, f, s[g 12], 11, -421815835), l, u, s[g 15], 16, 530742520), d, l, s[g 2], 23, -995338651), f m(f, d m(d, l m(l, u, f, d, s[g 0], 6, -198630844), u, f, s[g 7], 10, 1126891415), l, u, s[g 14], 15, -1416354905), d, l, s[g 5], 21, -57434055), f m(f, d m(d, l m(l, u, f, d, s[g 12], 6, 1700485571), u, f, s[g 3], 10, -1894986606), l, u, s[g 10], 15, -1051523), d, l, s[g 1], 21, -2054922799), f m(f, d m(d, l m(l, u, f, d, s[g 8], 6, 1873313359), u, f, s[g 15], 10, -30611744), l, u, s[g 6], 15, -1560198380), d, l, s[g 13], 21, 1309151649), f m(f, d m(d, l m(l, u, f, d, s[g 4], 6, -145523070), u, f, s[g 11], 10, -1120210379), l, u, s[g 2], 15, 718787259), d, l, s[g 9], 21, -343485551),l l y 0,u u j 0,f f S 0,d d v 0}return endian([l, u, f, d])
}function wordsToBytes(t) {for (var n [], r 0; r 32 * t.length; r 8)n.push(t[r 5] 24 - r % 32 255);return n
}function bytesToHex(t) {for (var n [], r 0; r t.length; r)n.push((t[r] 4).toString(16)),n.push((15 t[r]).toString(16));return n.join()
}function d(t, r) {if (void 0 t || null t)throw new Error(Illegal argument t);var e wordsToBytes(i(t, r));return r r.asBytes ? e : r r.asString ? o.bytesToString(e) : bytesToHex(e)
}function getsianature() {var s [NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt,appid1014,clienttime1741411989613,clientver20000,dfid3Mm61k0WDxvm033Epz2worRG,encode_album_audio_idj410q60,mid70789bebe63fb74c52e4a911853f5450,platid4,srcappid2919,tokencbfe2e174e4b97fd6aca35682cdba3d2b431c4ed95e2dbd1779e37a7975b672c,userid2307902397,uuid70789bebe63fb74c52e4a911853f5450,NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt];return d(s.join())
}console.log(getsianature());经过一系列调试可以发现生成的结果与浏览器生成的值一样那么生成signature的代码就没问题了。 将一些会变的值改为变量可以看到参数s的值里只有clienttime的值是会变的因此修改上述代码中getsianature函数将参数s的值放在python代码中getsianature函数改完如下图所示
function getsianature(s) {return d(s.join())
}三、获取多首歌
1. 分析过程
点击不同的歌可以发现每首歌的参数encode_album_audio_id都不同因此需要获取encode_album_audio_id 搜索歌名找到歌曲的id 接着查看请求参数同样有一个signature参数刷新多次网页发现signature参数会变化那么重复之前分析signature的步骤 打下断点查看中断的位置参数s有所变化 打印s查看s的内容除此之外没有变化那么就沿用先前的代码 2. 代码实现
完整代码如下注意这里调用了JavaScript代码需要安装PyExecJS模块pip install PyExecJS -i https://pypi.tuna.tsinghua.edu.cn/simple。
本代码中JavaScript文件名为kugou.jsJavaScript代码在参数signature的分析中有写到以下为python代码
import json
import re
import time
import requests
import execjsclass kugou_music:def __init__(self):self.headers {User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36,Referer: https://www.kugou.com/,}def get_signature(self, s):with open(kugou.js, r, encodingutf-8) as f:js f.read()ctx execjs.compile(js)signature ctx.call(getsianature, s)return signaturedef get_one_song_url(self, audio_id):timestamp str(int(time.time() * 1000))s [NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt,appid1014,fclienttime{timestamp},clientver20000,dfid3Mm61k0WDxvm033Epz2worRG,fencode_album_audio_id{audio_id},mid70789bebe63fb74c52e4a911853f5450,platid4,srcappid2919,tokencbfe2e174e4b97fd6aca35682cdba3d2b431c4ed95e2dbd1779e37a7975b672c,userid2307902397,uuid70789bebe63fb74c52e4a911853f5450,NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt]signature self.get_signature(s)params {srcappid: 2919,clientver: 20000,clienttime: timestamp,mid: 70789bebe63fb74c52e4a911853f5450,uuid: 70789bebe63fb74c52e4a911853f5450,dfid: 3Mm61k0WDxvm033Epz2worRG,appid: 1014,platid: 4,encode_album_audio_id: audio_id,token: cbfe2e174e4b97fd6aca35682cdba3d2b431c4ed95e2dbd1779e37a7975b672c,userid: 2307902397,signature: signature}one_song_url https://wwwapi.kugou.com/play/songinforesponse requests.get(one_song_url, headersself.headers, paramsparams)song_url response.json()[data][play_url]return song_urldef get_signal_music(self, audio_id, audio_name):song_url self.get_one_song_url(audio_id)response requests.get(song_url, headersself.headers)with open(f{audio_name}.mp3, wb) as f:f.write(response.content)print(f{audio_name}.mp3下载完成)def get_song_id(self,keyword):timestamp str(int(time.time() * 1000))s [NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt,appid1014,bitrate0,callbackcallback123,fclienttime{timestamp},clientver1000,dfid3Mm61k0WDxvm033Epz2worRG,filter10,inputtype0,iscorrection1,isfuzzy0,fkeyword{keyword},mid70789bebe63fb74c52e4a911853f5450,page1,pagesize30,platformWebFilter,privilege_filter0,srcappid2919,tokencbfe2e174e4b97fd6aca35682cdba3d2b431c4ed95e2dbd1779e37a7975b672c,userid2307902397,uuid70789bebe63fb74c52e4a911853f5450,NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt]signature self.get_signature(s)params {callback: callback123,srcappid: 2919,clientver: 1000,clienttime: timestamp,mid: 70789bebe63fb74c52e4a911853f5450,uuid: 70789bebe63fb74c52e4a911853f5450,dfid: 3Mm61k0WDxvm033Epz2worRG,keyword: keyword,page: 1,pagesize: 30,bitrate: 0,isfuzzy: 0,inputtype: 0,platform: WebFilter,userid: 2307902397,iscorrection: 1,privilege_filter: 0,filter: 10,token: cbfe2e174e4b97fd6aca35682cdba3d2b431c4ed95e2dbd1779e37a7975b672c,appid: 1014,signature: signature}song_id_url https://complexsearchretry.kugou.com/v2/search/songresponse requests.get(song_id_url, headersself.headers, paramsparams, verifyFalse)temp re.findall(rcallback123(.*), response.text)[0][1:-1]temp json.loads(temp)song temp[data][lists]return songdef get_all_song(self,keyword):song self.get_song_id(keyword)for i in song:song_id i.get(EMixSongID)song_name i.get(FileName)# print(song_name, song_id)try:self.get_signal_music(song_id, song_name)except Exception as e:print(f{song_name}下载失败:, e)if __name__ __main__:kugou kugou_music()# kugou.get_signal_music(j410q60)keyword周杰伦kugou.get_all_song(keyword)