一,要爬取的页面
二,代码
#coding=gbk
导入模块
```python
import io
import sys
import pandas as pd
import requests
from lxml import etree
url = "https://www.bilibili.com/v/popular/rank/guochan"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/95.0.4638.69 Safari/537.36 Edg/95.0.1020.53 "
}
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='gb18030')
# 解决gbk问题
response = requests.get(url, headers=headers)
# print(response.text)
tree = etree.HTML(response.text)
divs = tree.xpath("/html/body/div[3]/div/div[2]/div[2]/ul")
# 确定位置
# print(divs)
for div in divs:
div_name = div.xpath("./li/div[2]/div[2]/a/text()")
# print(div_name)
# 动漫名称
div_number = div.xpath("./li/div[2]/div[2]/div[1]/text()")
# print(div_number)
# 更新到多少集
div_amount = div.xpath("./li/div[2]/div[2]/div[2]/span[1]/text()")
# print(div_amount)
div_amount_01 = []
for i in div_amount:
div_amount_01.append(i.replace("n ",""))
#去除里面的换行和空格
# print(div_amount_01)
# 播放量
div_comment = div.xpath("./li/div[2]/div[2]/div[2]/span[2]/text()")
# print(div_comment)
div_comment_01 = []
for i in div_comment:
div_comment_01.append(i.replace("n ",""))
#
# 评论
div_give = div.xpath("./li/div[2]/div[2]/div[2]/span[3]/text()")
# print(div_give)
div_give_01 = []
for i in div_give:
div_give_01.append(i.replace("n ",""))
# 点赞
div_heat = div.xpath("./li/div[2]/div[2]/div[3]/div/text()")
# print(div_heat)
# 综合评分
title = {"动漫名称":div_name,"更新到多少集":div_number,"综合评分":div_heat,"播放量":div_amount_01,"评论":div_comment_01,"点赞":div_give_01}
# 保存到字典里
# print(title)
df = pd.Dataframe(title)
# 转换成Dataframe格式
df.index = df.index+1
# 索引从1开始
df.to_excel("D:/安装包/哔哩哔哩动画.xlsx")
# 存放到excel中



