步骤代码遇到的问题及解决办法
1.缺少模块2. 报错3 报错
步骤1 创建"web服务",获取key
2 网上找到Python爬虫代码
3 运行,调试,直至能得到结果
注意:不是自己写的,从网上找的
from urllib.parse import quote
import urllib.request
import urllib
import pandas as pd
import xlwt
import json
class getpoi:
output_path="C:/out/" ##########输出文件夹名称###########
path_class ="C:/out/amap_poicode.xlsx" ##########输入的POI分类编码表(用以指示需要爬取的POI类型)###########
amap_web_key = 'key' ##########高德API的密钥###########
poi_search_url = "https://restapi.amap.com/v3/place/text?" ##########不需要更改##########
cityname = '重庆市' ############目标的城市(与城市编码表中的名称需要一致)#############
areas = ['重庆市市辖区','万州区','涪陵区','渝中区','大渡口区','江北区','沙坪坝区','九龙坡区','南岸区','北碚区'] ############目标城市的次级行政范围(与城市编码表中的名称需要一致)#############
totalcontent = {}
def __init__(self):
data_class = self.getclass()
for type_class in data_class:
for area in self.areas:
page = 1;
if type_class['type_num'] / 10000 < 10:
classtype = str('0') + str(type_class['type_num'])
else:
classtype = str(type_class['type_num'])
while True:
if classtype[-4:] =="0000":
break;
poidata = self.get_poi(classtype, area, page);
poidata = json.loads(poidata)
if poidata['count'] == "0":
break;
else:
poilist = self.hand(poidata)
print("area:" + area + " type:" + classtype + " page:第" + str(page) + "页 count:" + poidata['count'] + "poilist:")
page += 1
for pois in poilist:
if classtype[0:2] in self.totalcontent.keys():
pois['bigclass'] = type_class['bigclass']
pois['midclass'] = type_class['midclass']
pois['smallclass'] = type_class['smallclass']
list_total = self.totalcontent[classtype[0:2]]
list_total.append(pois)
else:
self.totalcontent[classtype[0:2]] = []
pois['bigclass'] = type_class['bigclass']
pois['midclass'] = type_class['midclass']
pois['smallclass'] = type_class['smallclass']
self.totalcontent[classtype[0:2]].append(pois)
for content in self.totalcontent:
self.writeexcel(self.totalcontent[content], content)
def writeexcel(self, data, classname):
book = xlwt.Workbook(encoding='utf-8', style_compression=0)
sheet = book.add_sheet(classname, cell_overwrite_ok=True)
# 第一行(列标题)
sheet.write(0, 0, 'x')
sheet.write(0, 1, 'y')
sheet.write(0, 2, 'count')
sheet.write(0, 3, 'name')
sheet.write(0, 4, 'adname')
sheet.write(0, 5, 'smallclass')
sheet.write(0, 6, 'typecode')
sheet.write(0, 7, 'midclass')
classname = data[0]['bigclass']
for i in range(len(data)):
sheet.write(i + 1, 0, data[i]['lng'])
sheet.write(i + 1, 1, data[i]['lat'])
sheet.write(i + 1, 2, 1)
sheet.write(i + 1, 3, data[i]['name'])
sheet.write(i + 1, 4, data[i]['adname'])
sheet.write(i + 1, 5, data[i]['smallclass'])
sheet.write(i + 1, 6, data[i]['classname'])
sheet.write(i + 1, 7, data[i]['midclass'])
book.save(self.output_path + self.cityname + '_' + classname + '.xls')
def hand(self, poidate):
pois = poidate['pois']
poilist = []
for i in range(len(pois)):
content = {}
content['lng'] = float(str(pois[i]['location']).split(",")[0])
content['lat'] = float(str(pois[i]['location']).split(",")[1])
content['name'] = pois[i]['name']
content['adname'] = pois[i]['adname']
content['classname'] = pois[i]['typecode']
poilist.append(content)
return poilist
def readfile(self, readfilename, sheetname):
data = pd.read_excel(readfilename, sheet_name=sheetname)
return data
def getclass(self):
readcontent = self.readfile(self.path_class, "amap_poicode")
data = []
for num in range(readcontent.shape[0]):
content = {}
content['type_num'] = readcontent.iloc[num]['NEW_TYPE']
content['bigclass'] = readcontent.iloc[num]['大类']
content['midclass'] = readcontent.iloc[num]['中类']
content['smallclass'] = readcontent.iloc[num]['小类']
data.append(content)
return data
def get_poi(self, keywords, city, page):
poiurl = self.poi_search_url +"key="+self.amap_web_key+ "&extensions=all&keywords=" + quote(keywords) + '&city=' +quote(city)+"&citylimit=true"+"&offset=25"+"&page=" +str(page)+"&output=json"
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36"}
req = urllib.request.Request(url=poiurl, headers=headers)
data = ''
with urllib.request.urlopen(req) as f:
data = f.read().decode('utf8')
return data
if __name__ == "__main__":
gp = getpoi()
遇到的问题及解决办法
1.缺少模块
解决办法: 在cmd模式下,执行命令
pip install XXXX
安装需要的模块
2. 报错报错:module ‘urllib’ has no attribute ‘request’
解决办法:改一下导入模块
import urllib.request3 报错
报错:http.client.RemoteDisconnected: Remote end closed connection without response
原因:服务器根据U-A判断拒绝爬虫
解决办法:修改headers
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36"}
req = urllib.request.Request(url=poiurl, headers=headers)



