import requests
import json
import time
import pandas as pd
from multiprocessing import Pool
import pymysql
def get_data():
host='********'
port=330***
user='****'
password='******'
database='**'
conn = pymysql.connect(host=host,port=port,user=user,password=password,database=database,charset="utf8")
cur=conn.cursor()
return cur,conn
def get_id():
df=pd.read_excel(r'C:UserslbshipDesktop小米汇总(1).xlsx')
listid=[int(x) for x in list(df.合同号[1:])]
return listid
headers={
'authority':'cs.pt.xiaomi.com',
'cookie':'uLocale=zh_CN; _aegis_pp=eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJuYmYiOjE2MzI3MDYxMDAsImlhdCI6MTYzMjcwNjEwMCwiaXNzIjoiTUktSU5GT1NFQyIsImF1ZCI6ImNzLnB0LnhpYW9taS5jb20iLCJjIjowLCJleHAiOjE2MzI5Njg5MDAsImlkIjoiZGQ2M2Q0YWNkNGUyZTFjODhmYzE5YWZlYWM2Nzc1NDMiLCJ0eXAiOiJwYXNzcG9ydCIsInN1YiI6IjI1ODMyNzY1MDMifQ.ZVQ7UMYt2NppIPRXxaa0IfGdFmScO20w_6GfmAFNHaUGp3GgQj3GSmoaRi0d7muqOpQdFREA0o4kXP7Tk49AFA; JSESSIonID=aaaK8AHWc5UFVdMhZSjVx; route=613dbcb48df018c80180367e4d5f2120; cUserId=cQL_eXMj_fqZhhKQDDuXO507-EQ; serviceToken=AJ2fzAEMmU7fnrCLTUircTZLPGSz6UrbQoh06wZIgBboBOBRvDWWEYc+sfDt8wJj0xTYY9fyveErgjAYHvnKsTQTcyiK92cP5Za0GPgxTSlDbY5/QD03QK4FEvu8NK9dF31gytED7Ei5ElizPQlWOpexzDNhyVwx6M/7cFnWTPX/gy1nNfK1MgzJiUIzw5unb6ioActGEGs8n8CtZmkont++EkeJnI8nP8BGz93O/Fiv9vY049QsmzcK2Vhu+en9hgQbeNqzixikNLJihgLZ+08fP/6Az20UawDvTAc7krM=; userId=2583276503; mifiadmincspub_slh=Cs3cyKReSq83+lzMwc6gZ7CYAJQ=; mifiadmincspub_ph=FV56S95Qm30C5HaEtiaxOA==',
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36',
'x-referer':'https://cs.pt.xiaomi.com/web/'
}
def get_url(hth):
url='https://cs.pt.xiaomi.com/cs/case/list?draw=2&start=0&length=10&search%5Bvalue%5D=&search%5Bregex%5D=false&caseCode=&orgnId=&assignee=&xiaomiId='+str(hth)+'&flag=&age=&status=&updateTimeFrom=1420041600000&updateTimeTo=1632758400000&createTimeFrom=1420041600000&createTimeTo=1632758400000&queueType=&idNum=&mobile=&name=&amountFrom=&amountTo=&daysFrom=&daysTo=&caseType=&channel=&caseCategory=&_=1632819272975'
r=requests.get(url,headers=headers)
data1=json.loads(r.text)
print('正在爬取用户ID{}'.format(hth))
try:
ids=data1['data'][0]['id']
mifi=data1['data'][0]['mifiId']
get_amount(hth,ids,mifi)
except Exception as e:
print(e)
def get_amount(hth,ids,mid):
newurl='https://cs.pt.xiaomi.com/repayment/getRepaymentDetailList?draw=1&start=0&length=100&search%5Bvalue%5D=&search%5Bregex%5D=false&mifiId='+str(mid)+'&_=1632557536569'
r1=requests.get(newurl,headers=headers)
data2=json.loads(r1.text)
cur,conn=get_data()
for i in range(int(data2['recordsTotal'])):
accountTime=data2['data'][i]['accountTime']
totalAmount=data2['data'][i]['amount']
sql="insert into get_amount values('{}','{}','{}','{}','{}')".format(hth,ids,mid,accountTime,totalAmount)
cur.execute(sql)
conn.commit()
if __name__ == '__main__':
pool=Pool(processes=3)
listid=get_id()
pool.map(get_url,listid) #多进程爬取,注意get_url函数没有括号
pool.close()
pool.join()
结果:
注:cookies和URL 会过期,所以每天爬前都要更新



