常州网站制作推广,wordpress 跑马灯插件,常州模板建站哪家好,线上推广有哪些平台效果好分享一个我自己写的pythonB站视频爬虫#xff0c;写的比较粗糙
当然网上一堆B站视频获取的工具#xff0c;也不差我这个粗糙的python脚本#xff0c;就是分享出来大家一起讨论学习#xff0c;如果大家有什么好的想法和功能我们可以一起聊聊。
这里分享一个我自己用的B站视…分享一个我自己写的pythonB站视频爬虫写的比较粗糙
当然网上一堆B站视频获取的工具也不差我这个粗糙的python脚本就是分享出来大家一起讨论学习如果大家有什么好的想法和功能我们可以一起聊聊。
这里分享一个我自己用的B站视频下载的工具BBDown很好用作者也是在一直更新。
必要工具ffmpeg建议还是放在你的python项目目录下我不知道为什么配置的环境变量没有生效
这个如果想爬取高清视频就把自己的cookie加到api_headers。这里进度条加载有点问题就是视频太小了进度条可能加载不完全还有就是视频合成也有点问题有时视频合成不了
代码如下
import argparseimport requests, re, sys, os, time
from contextlib import closing
from urllib import parse
from lxml import etree
import subprocess
from tqdm import tqdmclass BiliBili:def __init__(self, dirname):self.search_headers {authority: search.bilibili.com,Accept: */*,Referer: https://www.bilibili.com/,Accept-Encoding: gzip, deflate, br,Accept-Language: zh-CN,zh;q0.9,User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.61,}self.video_headers {authority: www.bilibili.com,Referer: https://www.bilibili.com/,User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36}self.api_headers {authority: api.bilibili.com,Accept: */*,Referer: https://www.bilibili.com/,Accept-Encoding: gzip, deflate, br,Accept-Language: zh-CN,zh;q0.9,# cookie:,User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36}self.sess requests.Session()self.dir dirnamedef downloader(self, data_url, title):数据下载Parameters:data_url: 数据地址title: 标题if self.dir not in os.listdir():os.mkdir(self.dir)size 0with closing(self.sess.get(data_url, headersself.video_headers, streamTrue)) as response:chunk_size 1000content_size int(response.headers[content-length])content_mb content_size / 1000 / 1000if response.status_code 200:sys.stdout.write( [开始下载]\n)sys.stdout.write( [文件大小]: %0.2f MB\n % content_mb)video_name os.path.join(self.dir, title)# 保存视频并输出进度with tqdm(totalcontent_size, desc [下载进度],leaveFalse, ncols100, unitB,unit_scaleTrue) as pbar:with open(video_name, wb) as file:if content_mb 3:file.write(response.content)for i in range(5):pbar.update(content_size/5)else:for data in response.iter_content(chunk_sizechunk_size):file.write(data)pbar.update(len(data))size len(data)file.flush()sys.stdout.write(\n)sys.stdout.write( [下载完成] \r)sys.stdout.flush()if size / content_size 1:print(\n)else:print(~~~链接异常~~~\r)time.sleep(1)def search_video(self, keyword, page1):搜索页视频信息Parameters:keyword: 关键词page: 页码Returns:videos[titles,bvs]titles:标题bvs: bv号url fhttps://search.bilibili.com/all?keyword{parse.quote(keyword)}page{page}o30req self.sess.get(urlurl, headersself.search_headers)html etree.fromstring(req.text, etree.HTMLParser())bvs html.xpath(//div[classbili-video-card__info--right]/a/href)[:3]titles html.xpath(//div[classbili-video-card__info--right]/a/h3/title)[:3]videos []for i, j in zip(titles, bvs):for c in u´★☆❤◦\/:*?|:i i.replace(c, )tmp [i, j]videos.append(tmp)# 输出搜索页面视频标题和视频urlprint(videos)return videos# titles, bvsdef get_download_url(self, arcurl):获取详情页数据信息Parameters:arcurl: 视频播放地址Returns:accept_description: 视频清晰度video_data: 视频地址audio_data: 音频地址title: 标题xp BV\d.{9}if re.findall(xp, arcurl):bv re.findall(xp, arcurl)[0]url fhttps://api.bilibili.com/x/web-interface/view?bvid{bv} # avidcidelse:print(视频BV号解析失败,请检查输入的bv号是否正确)exit(0)req1 self.sess.get(urlurl, headersself.video_headers)ac_json req1.json()avid ac_json[data][aid]cid ac_json[data][cid]url2 fhttps://api.bilibili.com/x/player/wbi/playurl?avid{avid}cid{cid}fnval4048 # playurltitle ac_json[data][title]req2 self.sess.get(urlurl2, headersself.api_headers)playinfo_dict req2.json()accept_description playinfo_dict[data][accept_description] # 视频清晰度# id [playinfo_dict[data][dash][video][0][id]]audio_data [playinfo_dict[data][dash][audio][0][baseUrl]] # 音频数据video_data [playinfo_dict[data][dash][video][0][baseUrl]]# print(id)if not audio_data and not video_data:print(视频解析失败)exit(0)return [accept_description, video_data, audio_data,title]def merge_data(self, dir, video_name):视频合成Parameters:dir: 目录video_name: 视频名time.sleep(0.1)if video_name_2 in os.listdir(self.dir):print( 合成视频已存在)exit(0)else:print(视频合成开始, video_name)cmd fcd {dir} ffmpeg -y -i {video_name}.mp4 -i {video_name}.mp3 -c:v copy -c:a aac -strict experimental -map 0:0 -map 1:0 {video_name}_2.mp4 del {video_name}.mp4 {video_name}.mp3subprocess.run(cmd, shellTrue, stdoutsubprocess.PIPE, stderrsubprocess.PIPE)print(视频合成结束, video_name\r)def search_downloader(self, keyword,page1):批量爬取搜索页视频Parameters:keyword: 关键词page: 页码if self.dir not in os.listdir():os.mkdir(self.dir)for j in range(page):s_video self.search_video(keyword, j1)for i in range(len(s_video)):title s_video[i][0]arcurl s_video[i][1]if title not in os.listdir(self.dir):videos_data self.get_download_url(arcurl)[1]audio_data self.get_download_url(arcurl)[2]if not videos_data[0] or not audio_data[0]:print(第[ %d ]页:%s视频或音频解析失败跳过下载: % (1 j, title))continue # Skip video download if video or audio parsing failsfname title .mp4print(第[ %d ]页:视频[ %s ]下载中: % (1 j, fname)) # 打印页码和指定下载视频self.downloader(videos_data[0], fname)print(视频下载完成!)fname title .mp3print(第[ %d ]页:音频[ %s ]下载中: % (1 j, fname)) # 打印页码和指定下载视频self.downloader(audio_data[0], fname)print(音频下载完成!)# 创建临时文本文件用于合并视频音频try:video_name titledirz self.dirself.merge_data(dirz, video_name)except:print(请安装FFmpeg,并配置环境变量 http://ffmpeg.org/)def a_video_download(self,bv):单个视频爬取Parameters:bv: 关bv号video_info self.get_download_url(bv)title video_info[3]fname {0}.mp4.format(title)print(视频[ %s ]下载中: % fname) # 打印页码和指定下载视频self.downloader(video_info[1][0], fname)print(视频下载完成!)fname {0}.mp3.format(title)print(音频[ %s ]下载中: % fname) # 打印页码和指定下载视频self.downloader(video_info[2][0], fname)print(音频下载完成!)self.merge_data(self.dir,video_info[3])if __name__ __main__:if len(sys.argv) 1:sys.argv.append(--help)parser argparse.ArgumentParser()parser.add_argument(-d, --dir, requiredTrue, help必要下载路径)parser.add_argument(-bv, --bvid, requiredFalse, help下载指定bv视频)parser.add_argument(-s, --search, requiredFalse, actionstore_true, help批量下载搜索页视频)parser.add_argument(-k, --keyword, requiredFalse, help搜索关键词内容)parser.add_argument(-p, --pages, requiredFalse, help需要下载页码数, typeint)args parser.parse_args()B BiliBili(args.dir)if args.search:if args.keyword and args.pages is None:print(请输入搜索关键词和页码)exit(0)B.search_downloader(args.keyword, args.pages)if args.bvid:if args.search or args.keyword or args.pages:print(下载单个视频请只输入BV号)exit(0)B.a_video_download(args.bvid)# return [accept_description, video_data, audio_data, title]# B BiliBili(猫)# url https://www.bilibili.com/video/BV1Jy4y1K7yp/# aB.get_download_url(url)# B.downloader(a[1][0], a[3])