Excel 有的封装 封装了个寂寞 尝试封装
from openpyxl import Workbook, load_workbook
import time
class excel():
def __init__(self) - None:
self.wb Workbook()
self.wbac self.wb.active
def get_sheet_names(self):
return self.wb.get_sheet_names()
# 向sheet中写入一行数据
def write_row(self, list, sheet_name Sheet , with_url None):
# sheets self.get_sheet_names()
sheet self.wb.get_sheet_by_name(sheet_name)
#sheet wb.get_sheet_by_name(sheets[0])
if sheet:
# row [y for item in list for y in item]
# sheet.append(row)
for i in range(len(list)):
# ll []
row list[i]
for y in range(len(row)):
content row[y]
if with_url y and i 0:
# print(i,y)
# 替换成链接
# wu HYPERlink( {} , {} ) .format(row[y 1], content)
list[i][y] HYPERlink( {} , {} ) .format(row[y 1], content)
# v self.wbac.cell(row i 1, column y 1).value HYPERlink( {} , {} ) .format(row[y 1], content)
# ll.append(wu)
# continue
# else:
# ll.append(content)
# print(ll)
# sheet.append(ll)
sheet.append(row)
def new_sheet(self, name):
self.wb.create_sheet(name)
def save(self, path f ./files/Excel{str(round(time.time()*1000))}.xlsx ):
self.wb.save(path)
# def __call__(self, list):
# self
# pass
封装的工具类 无用的import删掉
import os, time, requests, random, telnetlib, json, pypinyin
from bs4 import BeautifulSoup
__dir__ os.path.dirname(os.path.abspath(__file__))
# print(__dir__)
def get_headers(localhost True, refer https://www.baidu.com , host None):
ua Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36
if not localhost:
uas [
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36 ,
Mozilla/5.0 (compatible; Baiduspider/2.0; http://www.baidu.com/search/spider.html) ,
Mozilla/5.0 (compatible; Baiduspider-render/2.0; http://www.baidu.com/search/spider.html) ,
Baiduspider-image ( http://www.baidu.com/search/spider.htm) ,
Mozilla/5.0 (compatible; Googlebot/2.1; http://www.google.com/bot.html) ,
Mozilla/5.0 (compatible; Googlebot-Image/1.0; http://www.google.com/bot.html) ,
Sogou web spider/4.0( http://www.sogou.com/docs/help/webmasters.htm#07) ,
Sogou News Spider/4.0( http://www.sogou.com/docs/help/webmasters.htm#07) ,
Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0); ,
Mozilla/5.0 (compatible; bingbot/2.0; http://www.bing.com/bingbot.htm) ,
Sosospider ( http://help.soso.com/webspider.htm) ,
Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)
ua random.choice(uas)
headers {
User-Agent : ua,
Referer : refer,
Host : host
return headers
def get_html(url, ret_type text , timeout 50, encoding utf-8 ):
headers get_headers()
res requests.get(url, headers headers, timeout timeout)
res.encoding encoding
# print(res.status_code)
# print(res.text)
if ret_type text :
return res.text
elif ret_type image :
return res.content
elif ret_type json :
return res.json()
好了 嗯嗯嗯 就是这样…尝试抓取一下自己的文章列表吧 修改这里
# 博主首页链接 blog_url https://blog.csdn.net/博主名称 # 自行查看 博主首页文章的页数 pages 2其他 在保存为csv的时候 再通过office-excel打开会乱码
别急~~ 先用记事本打开 另存为 编码改为ANSI 就可以了。



