栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 软件开发 > 后端开发 > Python

用Python爬取系统的回款列表到SQL里面做分析

Python 更新时间: 发布时间: IT归档 最新发布 模块sitemap 名妆网 法律咨询 聚返吧 英语巴士网 伯小乐 网商动力

用Python爬取系统的回款列表到SQL里面做分析

import requests
import json
import time
import pandas as pd
from multiprocessing import Pool
import pymysql

def get_data():
    host='********'
    port=330***
    user='****'
    password='******'
    database='**'
    conn = pymysql.connect(host=host,port=port,user=user,password=password,database=database,charset="utf8")
    cur=conn.cursor()
    return cur,conn



def get_id():
    df=pd.read_excel(r'C:UserslbshipDesktop小米汇总(1).xlsx')
    listid=[int(x) for x in list(df.合同号[1:])]
    return listid
headers={
    'authority':'cs.pt.xiaomi.com',
    'cookie':'uLocale=zh_CN; _aegis_pp=eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJuYmYiOjE2MzI3MDYxMDAsImlhdCI6MTYzMjcwNjEwMCwiaXNzIjoiTUktSU5GT1NFQyIsImF1ZCI6ImNzLnB0LnhpYW9taS5jb20iLCJjIjowLCJleHAiOjE2MzI5Njg5MDAsImlkIjoiZGQ2M2Q0YWNkNGUyZTFjODhmYzE5YWZlYWM2Nzc1NDMiLCJ0eXAiOiJwYXNzcG9ydCIsInN1YiI6IjI1ODMyNzY1MDMifQ.ZVQ7UMYt2NppIPRXxaa0IfGdFmScO20w_6GfmAFNHaUGp3GgQj3GSmoaRi0d7muqOpQdFREA0o4kXP7Tk49AFA; JSESSIonID=aaaK8AHWc5UFVdMhZSjVx; route=613dbcb48df018c80180367e4d5f2120; cUserId=cQL_eXMj_fqZhhKQDDuXO507-EQ; serviceToken=AJ2fzAEMmU7fnrCLTUircTZLPGSz6UrbQoh06wZIgBboBOBRvDWWEYc+sfDt8wJj0xTYY9fyveErgjAYHvnKsTQTcyiK92cP5Za0GPgxTSlDbY5/QD03QK4FEvu8NK9dF31gytED7Ei5ElizPQlWOpexzDNhyVwx6M/7cFnWTPX/gy1nNfK1MgzJiUIzw5unb6ioActGEGs8n8CtZmkont++EkeJnI8nP8BGz93O/Fiv9vY049QsmzcK2Vhu+en9hgQbeNqzixikNLJihgLZ+08fP/6Az20UawDvTAc7krM=; userId=2583276503; mifiadmincspub_slh=Cs3cyKReSq83+lzMwc6gZ7CYAJQ=; mifiadmincspub_ph=FV56S95Qm30C5HaEtiaxOA==',
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36',
    'x-referer':'https://cs.pt.xiaomi.com/web/'
}

def get_url(hth):
    url='https://cs.pt.xiaomi.com/cs/case/list?draw=2&start=0&length=10&search%5Bvalue%5D=&search%5Bregex%5D=false&caseCode=&orgnId=&assignee=&xiaomiId='+str(hth)+'&flag=&age=&status=&updateTimeFrom=1420041600000&updateTimeTo=1632758400000&createTimeFrom=1420041600000&createTimeTo=1632758400000&queueType=&idNum=&mobile=&name=&amountFrom=&amountTo=&daysFrom=&daysTo=&caseType=&channel=&caseCategory=&_=1632819272975'
    r=requests.get(url,headers=headers)
    data1=json.loads(r.text)
    print('正在爬取用户ID{}'.format(hth))
    try:
        ids=data1['data'][0]['id']
        mifi=data1['data'][0]['mifiId']
        get_amount(hth,ids,mifi)
    except Exception as e:
        print(e)

def get_amount(hth,ids,mid):
    newurl='https://cs.pt.xiaomi.com/repayment/getRepaymentDetailList?draw=1&start=0&length=100&search%5Bvalue%5D=&search%5Bregex%5D=false&mifiId='+str(mid)+'&_=1632557536569'
    r1=requests.get(newurl,headers=headers)
    data2=json.loads(r1.text)
    cur,conn=get_data()
    for i in range(int(data2['recordsTotal'])):
        accountTime=data2['data'][i]['accountTime']
        totalAmount=data2['data'][i]['amount']
        sql="insert into get_amount values('{}','{}','{}','{}','{}')".format(hth,ids,mid,accountTime,totalAmount)
        cur.execute(sql)
        conn.commit()


if __name__ == '__main__':
    pool=Pool(processes=3)
    listid=get_id()
    pool.map(get_url,listid) #多进程爬取,注意get_url函数没有括号
    pool.close()
    pool.join()

 结果:

注:cookies和URL 会过期,所以每天爬前都要更新

转载请注明:文章转载自 www.mshxw.com
本文地址:https://www.mshxw.com/it/275321.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号