数据如下
tbody tr td span span class c-index c-index-hot1 c-gap-icon-right-small 1 /span 张婷婷 /span /td td class opr-toplist-right 92 i class opr-toplist-st c-icon c-icon-down /i /td /tr tr td span span class c-index c-index-hot1 c-gap-icon-right-small 2 /span 王华 /span /td td class opr-toplist-right 91 i class opr-toplist-st c-icon c-icon-down /i /td /tr tr td span span class c-index c-index-hot1 c-gap-icon-right-small 3 /span 张岚 /span /td td class opr-toplist-right 90 i class opr-toplist-st c-icon c-icon-down /i /td /tr tr td span span class c-index c-gap-icon-right-small 4 /span 孙鸿峰 /span /td td class opr-toplist-right 90 i class opr-toplist-st c-icon c-icon-down /i /td /tr tr td span span class c-index c-gap-icon-right-small 5 /span 周海栋 /span /td td class opr-toplist-right 89 i class opr-toplist-st c-icon c-icon-down /i /td /tr tr td span span class c-index c-gap-icon-right-small 6 /span 武静 /span /td td class opr-toplist-right 88 i class opr-toplist-st c-icon c-icon-down /i /td /tr /tbody
我一共是用来两种方法解开了这个题 一个将数据存入了字典 一个将数据存入了元组中 第一种方法麻烦一点 但是可以用于正则表达式爬取网络中的信息 点个 吧 代码如下
import re
import requests
#提取所有学生的序号 姓名 成绩
str tbody
tr td span span class c-index c-index-hot1 c-gap-icon-right-small 1 /span 张婷婷 /span /td td class opr-toplist-right 92 i class opr-toplist-st c-icon c-icon-down /i /td /tr
tr td span span class c-index c-index-hot1 c-gap-icon-right-small 2 /span 王华 /span /td td class opr-toplist-right 91 i class opr-toplist-st c-icon c-icon-down /i /td /tr
tr td span span class c-index c-index-hot1 c-gap-icon-right-small 3 /span 张岚 /span /td td class opr-toplist-right 90 i class opr-toplist-st c-icon c-icon-down /i /td /tr
tr td span span class c-index c-gap-icon-right-small 4 /span 孙鸿峰 /span /td td class opr-toplist-right 90 i class opr-toplist-st c-icon c-icon-down /i /td /tr
tr td span span class c-index c-gap-icon-right-small 5 /span 周海栋 /span /td td class opr-toplist-right 89 i class opr-toplist-st c-icon c-icon-down /i /td /tr
tr td span span class c-index c-gap-icon-right-small 6 /span 武静 /span /td td class opr-toplist-right 88 i class opr-toplist-st c-icon c-icon-down /i /td /tr
/tbody
#爬取数据利用则这表达式 一步一步缩小范围
#首先建立一个大列表用来存储小字典的信息
student []
tr_list re.findall(r tr (.*?) /tr ,str,re.S)#先将每一个tr一次提出来
#print(tr_list)
for tr in tr_list:
#num re.search(r c-gap-icon-right-small (.*)? /span ,tr,re.S).group(1),出错会多提取一些元素1 /span 张婷婷
num re.search(r c-gap-icon-right-small (d)? /span ,tr,re.S).group(1)
name re.search(r /span (.*?) /span , tr, re.S).group(1)
grade re.search(r td class opr-toplist-right (.*?) i class opr-toplist-st c-icon c-icon-down , tr, re.S).group(1)
n n 1
#建立字典存储信息
item {}#建立一个空字典
item[ num ] num
item[ name ] name
item[ grade ] grade
print(item.values())
student.append(item)
# num re.search(r span class c-index c-index-hot1 c-gap-icon-right-small (.*?) /span , tr, re.S)
# print(n)
print(student)
#方法二 正则表达式
slt re.findall(r c-gap-icon-right-small (d ) /span (w ) /span /td td class opr-toplist-right (d ) i class opr-toplist-st c-icon c-icon-down ,str,re.S)
print(slt)
点个 吧



