import requests
from bs4 import BeautifulSoup
import re
import io
import sys
URL = "https://*******************"#不提供网址
for num in range(1,5):
l = URL+str(num)
req = requests.get(url=l)
print(req.status_code)
req.encoding = "utf-8"
if(req.status_code == 200):
#print(req.text)
html = req.text
bf = BeautifulSoup(html,"lxml")
main = bf.select(".post")
name = bf.select(".entry-title")
for i in range(0,10):
Name = name[i].string
Main = main[i]
Http=re.findall(r"href="https://www.mshxw.com/skin/sinaskin/image/nopic.gif" rel=",Main.encode("utf-8").decode("utf-8"))[0]
Tag =re.findall(r"tag">https://www.mshxw.com/skin/sinaskin/image/nopic.gif<",Main.encode("utf-8").decode("utf-8"))
Image =re.findall(r"src="https://www.mshxw.com/skin/sinaskin/image/nopic.gif"",Main.encode("utf-8").decode("utf-8"))
ImHttp="".join(Image)
Content = re.findall(r"https://www.mshxw.com/skin/sinaskin/image/nopic.gif
",Main.encode("utf-8").decode("utf-8"))
#print(Http)
Hreq = requests.get(url=Http)
if(Hreq.status_code == 200):
Hhtml =Hreq.text
#print(Hhtml)
Hbf = BeautifulSoup(Hhtml,"lxml")
Hmain = Hbf.select(".entry-content")
Hmain1 = Hmain[0]
#print(Hmain1)
Hcontent=re.findall(r"https://www.mshxw.com/skin/sinaskin/image/nopic.gif
",Hmain1.encode("utf-8").decode("utf-8"))
Hcode = '#'
for i in range(0,len(Hcontent)):
l = len(Hcontent[i])
if (l==40 and (Hcontent[i][1].islower() or Hcontent[i][1].isdigit() )):
Hcode = Hcontent[i]
break
print(Hcode)
#print(Hcontent)
else:print("网络异常")
print(Tag)
print(ImHttp)
print(Name)
if(Name==None or ImHttp==None):
print(ImHttp)
else:
imurl = requests.get(ImHttp)
img = imurl.content
try:
with open(r"F:liuli%s.jpg"%Name,"wb")as file:
file.write(img)
except FileNotFoundError:
print("名字不合法")
else:print("成功")



