def __init__(self):
self.url https://tv.ci/sb/ke7nhZe3c1-.html?wd %E5%A5%A5%E7%89%B9%E6%9B%BC submit
self.headers {
user-agent : Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36 }
def get_all_html(self):
count 1
while True:
try:
print(f 总url第{count}次请求 )
count 1
response requests.get(self.url, headers self.headers, timeout 5)
except Exception:
time.sleep(3)
else:
text response.content.decode()
return text
def get_all_url(self, text):
html etree.HTML(text)
href html.xpath( /html/body/div[1]/div/div/div[1]/div/div/div[2]/ul/li[*]/div[2]/h4/a/ href )
name html.xpath( /html/body/div[1]/div/div/div[1]/div/div/div[2]/ul/li[*]/div[2]/h4/a/text() )
dict_text dict(zip(name, [f https://tv.ci/{i} for i in href]))
print( 所有url已爬取 )
return dict_text
def get_html(self, name, url):
count 1
while True:
try:
print(f {name}第{count}次请求 )
count 1
response requests.get(url, headers self.headers, timeout 5)
except Exception:
time.sleep(3)
else:
text response.content.decode()
return text
def get_url(self, url, text):
html etree.HTML(text)
href html.xpath( /html/body/div[1]/div/div[1]/div[3]/div/div[2]/div[1]/ul/li[*]/a/ href )
name html.xpath( /html/body/div[1]/div/div[1]/div[3]/div/div[2]/div[1]/ul/li[*]/a/text() )
dict_text dict(zip(name, [f https://tv.ci/{i} for i in href]))
return dict_text
# def get_data(self, name, url):
# count 1
# while True:
# try:
# print(f {name}第{count}次请求 )
# count 1
# response requests.get(url, headers self.headers, timeout 5)
# except Exception:
# time.sleep(3)
# else:
# text response.content.decode()
# return text
# def save_data(self, name, url):
# text f {name}n
# with open( ./00文件夹/url/奥特曼.txt , w , encoding utf-8 ) as f:
# f.write(name, url)
def run(self):
url_text
# 获取主页html
all_html self.get_all_html()
# 获取所有奥特曼url
dict_all_url self.get_all_url(all_html)
for name, url in dict_all_url.items():
html self.get_html(name, url)
dict_url self.get_url(url, html)
url_text name n str(dict_url) n
# for name_other, url_other in dict_url:
# # self.get_data()
# self.save_data(name_other, url_other)
print(f {name}已爬取 )
with open( ./00文件夹/url/奥特曼.txt , w , encoding utf-8 ) as f:
f.write(url_text)