python 爬虫免费简历模板

#！usr/bin/env python
# -*- coding:utf-8 -*-
import requests
import os
from lxml import etree
#项目需求：解析出第一页的免费简历，并进行下载保存 https://sc.chinaz.com/jianli/free.html
if __name__ == '__main__':
    headers = {
        'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0"
    }
    #取前三页简历
    url = ["https://sc.chinaz.com/jianli/free.html"]
    for i in range(2, 4):
        url_new = "https://sc.chinaz.com/jianli/free_" + str(i) + ".html"
        url.append(url_new)

    for i in url:
        page_text = requests.get(url=i, headers=headers).text

        tree = etree.HTML(page_text)
        # 解析到简历页面的的url
        div_list = tree.xpath('//div[@id="main"]/div/div')
        div_lists = []
        for div in div_list:
            resume_htm = div.xpath('./a/@href')[0]
            resume_htm = 'https:'+resume_htm
            div_lists.append(resume_htm)
        print('爬取成功！！！', div_lists)

        #解析简历模板压缩包
        if not os.path.exists('./jianliLibs'):
            os.mkdir('./jianliLibs')
        for url in div_lists:
            jianli_page_content = requests.get(url=url, headers=headers).text
            tree = etree.HTML(jianli_page_content)
            # 解析到简历压缩包的url
            jianli_list = tree.xpath('//div[@]/ul')
            jianli_lists = []
            for li in jianli_list:
                resume_rar_url = li.xpath('./li/a/@href')[0]
                # 请求到了简历的二进制数据
                rar_data = requests.get(url=resume_rar_url, headers=headers).content
                rar_name = resume_rar_url.split('/')[-1]
                imgPath = './jianliLibs/' + rar_name
                with open(imgPath, 'wb') as fp:
                    fp.write(rar_data)
                    print(rar_name, '下载成功！！！')
python 爬虫 免费简历模板

Python相关栏目本月热门文章

python 爬虫免费简历模板