栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 软件开发 > 后端开发 > Python

同花顺爬取概念股数据

Python 更新时间: 发布时间: IT归档 最新发布 模块sitemap 名妆网 法律咨询 聚返吧 英语巴士网 伯小乐 网商动力

同花顺爬取概念股数据

#!/usr/bin/env python
import sys
import random
import time
import requests
from bs4 import BeautifulSoup
import pandas as pd
from sqlalchemy import create_engine
connect_info = 'mysql+pymysql://{}:{}@{}:{}/{}?charset=utf8'.format("root", "123456", "localhost", 3306, "flush")
engine = create_engine(connect_info)
from config import config


class Flush(object):

    def __init__(self):
        self.PAGE_TRACK = 1
        self.MAX_PAGE = 5
        self.PROXY_POOL_API = "http://127.0.0.1:5555/random"
        self.PAGE_LIST = []
        self.proxy_con = 0
        self.MAX_PAGE_flag = True

    def downloader(self, url, num_retries=1):
        headers = config.get_headers()
        bord_list = []
        try:
            time.sleep(random.random() * 1)  # 设置延时
            respons = requests.get(url, headers=headers, timeout=4)
            html = str(respons.content, encoding="gbk")
            soup = BeautifulSoup(html, 'html.parser')
            # 按标签名查找
            print(soup.title)
            # 读取title属性
            cate_group = soup.select(".cate_group .cate_items a")
            for group in cate_group:
                bord_dic = {}
                bord = group.string
                bord_dic["name"] =bord
                bord_dic["bord"] =group['href'].split("/")[-2]  
				# 需要再加
                bord_list.append(bord_dic)
            df1 = pd.Dataframe(bord_list)
            print(bord_list)
            df1.to_sql('concept_bord', engine, if_exists='append', index=False)
            print('PAGGE is {} , URL is:{} to Mysql table successfully!'.format(self.PAGE_TRACK, url))
        except Exception as e:
            print("异常{}, 重新下载{}".format(e, url))

def main():
    try:
        flush = Flush()
        flush.downloader("http://q.10jqka.com.cn/gn/")
    except Exception as err:
        print(err)


if __name__ == "__main__":
    try:
        main()
    finally:
        sys.exit()

转载请注明:文章转载自 www.mshxw.com
本文地址:https://www.mshxw.com/it/700828.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号