把headers里面的headers和cookie改成自己的就好了
有不懂的地方可以在下面评论留言
import requests
import re
import jieba
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import time
# BV1EP4y1j7kV
def get_cid(bv):
headers = {
"user-agent": ""
}
resp = requests.get("https://api.bilibili.com/x/player/pagelist?bvid={}&jsonp=jsonp".format(bv), headers=headers)
resp = resp.json()
cids = []
for i in range(len(resp['data'])):
cid = resp['data'][i]['cid']
cids.append(cid)
return cids
def get_days(bv, cid):
headers = {
"user-agent": "",
"referer": "https://www.bilibili.com/video/{}".format(bv),
"cookie": "",
"origin": "https://www.bilibili.com"
}
# https://api.bilibili.com/x/v1/dm/list.so?oid=438198616
# oid和cid是一个东西
response = requests.get(url="https://api.bilibili.com/x/v2/dm/history/index?type=1&oid={}&month=2021-11".format(cid), headers=headers)
json_data = response.json()
days = json_data["data"]
return days
def get_data(bv, days, cids):
headers = {
'user-agent': '',
"referer": "https://www.bilibili.com/video/{}".format(bv),
"cookie": "",
"origin": "https://www.bilibili.com"
}
for cid in cids:
for day in days:
html = requests.get(url="https://api.bilibili.com/x/v2/dm/web/history/seg.so?type=1&oid={}&date={}".format(cid, day), headers=headers).text
result = re.findall(".*?([u4E00-u9FA5]+).*?", html)
time.sleep(1)
print('{}完成{}的爬取'.format(cid, day))
save_data(result, bv)
def save_data(result, bv):
for i in result:
with open('{}&B站弹幕.txt'.format(bv), 'a', encoding='utf-8') as f:
f.write(i)
f.write('n')
def fenci(bv):
with open('{}&B站弹幕.txt'.format(bv), 'r', encoding='utf-8') as file:
word = file.read()
# jieba.add_word 添加特定的词进去
with open('meaningless.txt', 'r', encoding='UTF-8') as meaningless_file:
meaningless_set = set(meaningless_file.read().split('n'))
meaningless_set.add(' ')
word_list = list(jieba.lcut(word))
word_set = set(word_list) - meaningless_set
# 统计词频,存在字典中
word_dict = {}
for k in word_set:
word_dict[k] = word_list.count(k)
# 对词频进行排序
word_list_sorted = list(word_dict.items())
word_list_sorted.sort(key=lambda i: i[1], reverse=True)
print(word_list_sorted)
with open('{}-list_sorted.txt'.format(bv), 'a', encoding='utf-8') as f:
for i in word_list_sorted[1:]:
f.write(str(i))
f.write('n')
return word_list_sorted
if __name__ == '__main__':
bv = ‘BV1EP4y1j7kV’
cids = get_cid(bv)
print(cids)
days = get_days(bv, cids[0])
print(days)
get_data(bv, days, cids)
fenci(bv)



