栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 软件开发 > 后端开发 > Python

2021-10-02

Python 更新时间: 发布时间: IT归档 最新发布 模块sitemap 名妆网 法律咨询 聚返吧 英语巴士网 伯小乐 网商动力

2021-10-02

Python爬虫获取tx收集到的关于疫情的数据

提前说明:在b占看到关于这个的视频之后跟着打的代码:
https://www.bilibili.com/video/BV177411j7qJ?spm_id_from=333.999.0.0

import urllib.request as rq
# from bs4 import BeautifulSoup
import json


url_today = "https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5"    # 从腾讯获得数据的地址
url_last = "https://view.inews.qq.com/g2/getOnsInfo?name=disease_other"

def gethtml(url):
    """
    :return: 输入url,返回对应的html数据
    """
    headers = {   #设置请求头,防止反爬
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"
    }
    req = rq.Request(url, headers=headers)    #这两行也可以使用requests,requests可以直接requests.get(url, headers)
    res = rq.urlopen(req)
    html = res.read().decode("utf-8")    # 设置字符格式是utf-8
    return html   

def  get_history(url_last, url_today):
    """
    :return: 输入url,历史数据和当日的详细数据
    """
    # 历史数据
    html_last = gethtml(url_last)
    
    # dict_keys(['ret', 'data'])
    data_last = json.loads(html_last)    # json字符串转字典
#     print("-------------")
#     print(data_last.keys())
#     print("-------------")
    
    # dict_keys(['cityStatis', 'chinaDayList', 'chinaDayAddList', 'provinceCompare', 'now/confirm/iStatis', 'statisGradeCityDetail', 'dailyNewAddHistory', 'dailyHistory', 'wuhanDayList', 'articleList'])
    data_last = json.loads(data_last["data"])
#     print("-------------")
#     print(data_last.keys())
#     print("-------------")

    # 当天数据
    html_today = gethtml(url_today)    # 
    data_today = json.loads(html_today)
    # dict_keys(['lastUpdateTime', 'chinaTotal', 'chinaAdd', 'isShowAdd', 'showAddSwitch', 'areaTree'])
    data_today = json.loads(data_today["data"])
    
    # 数据爬取完成
    
    # ---------------------------------------------------------- #

    # 处理爬取到的历史数据
    history_data = {}   
    for every_data in data_last["chinaDayList"]:  # 每日疫情情况列表
        date = "2021." + every_data["date"]    # date中只有月和日,不能直接存入数据库,要加年份
        tup = time.strptime(date, "%Y.%m.%d")
        date = time.strftime("%Y-%m-%d", tup)  # 改变时间格式,不然插入数据库会报错,数据库是datetime类型

        confirm = every_data["/confirm/i"]    # 总确诊病例
        suspect = every_data["suspect"]    # 总疑似病例
        dead = every_data["dead"]          # 总死亡病例
        heal = every_data["heal"]          # 总治愈病例
        # 新的字典
        history_data[date] = {"/confirm/i":/confirm/i, "suspect":suspect, "dead":dead, "heal":heal}    

    for every_data_add in data_last["chinaDayAddList"]:    # 每日新增情况列表
        date_add = "2021." + every_data_add["date"]
        tup_add = time.strptime(date_add, "%Y.%m.%d")
        date_add = time.strftime("%Y-%m-%d", tup_add)  # 改变时间格式

        /confirm/i_add = every_data_add["/confirm/i"]    # 新增确诊病例
        suspect_add = every_data_add["suspect"]    # 新增疑似病例
        dead_add = every_data_add["dead"]          # 新增死亡病例 
        heal_add = every_data_add["heal"]          # 新增治愈病例
        # 更新数据,update函数可以添加新的键值对
        history_data[date_add].update({"/confirm/i_add":/confirm/i_add, "suspect_add":suspect_add, "dead_add":dead_add, "heal_add":heal_add})

    ''' 
    areaTree :name 中国数据
                   today
                   total
                   children :-name 省级数据 
                            -today
                            -total
                            -children:-name 市级数据
                                      -today
                                      -total
                                      
    '''
    # 处理爬取的实时详细数据
    details = []
    update_time = data_today["lastUpdateTime"]
    data_country = data_today["areaTree"]    # list 25个国家
    # print(data_country[0]["children"])  
    data_province = data_country[0]["children"]    # 0表示中国,children表示中国的各个省区
    # print(data_province)
    for pro_infos in data_province:
        province = pro_infos["name"]    # 中国各省名字
        for city_infos in pro_infos["children"]:    #  每个省的每个市
            city = city_infos["name"]
            confirm = city_infos["total"]["/confirm/i"]        # 总确诊人数
            /confirm/i_add = city_infos["today"]["/confirm/i"]    # 新增确诊人数
            heal = city_infos["total"]["heal"]              # 总治愈人数
            dead = city_infos["total"]["dead"]              # 总死亡人数
            details.append([update_time, province, city, /confirm/i, /confirm/i_add, heal, dead])
    return history_data, details    


get_history(url_last, url_today)
转载请注明:文章转载自 www.mshxw.com
本文地址:https://www.mshxw.com/it/286515.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号