学习爬虫数据解析以及存入MySQL的小练习
二. 结果展示 三.爬取网页https://hangzhou.anjuke.com/sale/p15/?from=navigatio四.源代码
import requests
from bs4 import BeautifulSoup
import mysql.connector
class ZufangSpider():
my_db = mysql.connector.connect(host='localhost', user='root', passwd='root', database='mytestdb',
auth_plugin='mysql_native_password')
mycursor = my_db.cursor()
def __init__(self):
self.url='https://hangzhou.anjuke.com/sale/p15/?from=navigation'
self.headers={'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36'}
def send_requests(self,url):
resp=requests.get(url,headers=self.headers)
if resp.status_code==200:
return resp
def parse_html(self,resp):
lst=[]
html=resp.text
bs=BeautifulSoup(html,'lxml')
section=bs.find('section',class_='list')
div_list=section.find_all('div',class_='property-content')
for item in div_list:
titles=item.find('h3',class_='property-content-title-name').text
content=item.find('div',class_='property-content-info')
p_list=content.find_all('p')
houseinfo=p_list[0].text
area=p_list[1].text
areas=area[28:34]
floor=p_list[3].text
floors=floor[28:37]
year=p_list[4].text
years=year[28:36]
name=item.find('p',class_='property-content-info-comm-name').text
address=item.find('p',class_='property-content-info-comm-address').text
price=item.find('span',class_='property-price-total-num').text
prices=price+'万'
price_average=item.find('p',class_='property-price-average').text
sale_man=item.find('span',class_='property-extra-text').text
lst.append((titles,houseinfo,areas,floors,years,name,address,prices,price_average,sale_man))
self.save(lst)
def save(self,lst):
sql='insert into tb_hangzhou (titles,houseinfo,areas,floors,years,nname,address,prices,price_average,sale_man) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
self.mycursor.executemany(sql,lst)
self.my_db.commit()
print('插入完毕')
def start(self):
url=self.url
resp=self.send_requests(url)
self.parse_html(resp)
if __name__ == '__main__':
zufang=ZufangSpider()
zufang.start()



