Csdn自动化评价功能
前言之前一段时间,看到一些博主在文章下自动评论,当然我是欢迎大家为我的文章进行评论。我也思考了一下,如果是我来进行开发自动化评价,我要如何操作。
首先,我们需要思考的问题,我们先一个一个的把它们列出来。
1、获取文章,并获取到文章id
2、获取到评价的接口。
3、如何处理重复评价。
4、系统中断如何处理。
正文 一、分析获取文章id进入csdn,找到文章列表,按F12,分析文章数据返回接口和返回数据。我们提取主要的数据。
然后去获取到我们所需要的接口,详细步骤这里就不说明了。
使用相同的方式,在自己的文章下面发送一条消息,就可以获取到发送评论的接口。
接着,我们来思考一下我们的流程。
二、流程设计我们可以采取两种方式:
1、获取到文章列表,然后获取到文章id,然后查询是否评论,评论就跳过,没有评论就评论。
优点:不需要储存任何数据,减少操作。
缺点:文章重复查询,重复查询是否评论,效率不高。
2、我们先将获取到的文章列表全部储存到数据库,然后通过获取数据库未评价文章id,进行评价,评价完成之后,标记为已评价,无需查询是否已评价。
优点:效率相对1提高,不会重复查询是否评论。
缺点:需要数据库服务器,需要掌握数据库相关操作。
这里我们采用第二种方式,但我们之前的文章可能有我们评论过的,所以,我们还是需要判断一下是否有评论过,于是我们的流程变了。
三、数据库设计我们已经设计好流程了,然后来设计数据库字段。
我们需要文章id,文章url,作者,是否评论字段,是否点赞字段,当然你还可以增加其他的一些字段。
我们开始创建表:(防止昵称有表情符号,编码不使用utf8,采用utf8mb4 )
CREATE TABLE `article` ( `articleId` bigint NOT NULL COMMENT 'id', `articleDetailUrl` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT 'url', `articleTitle` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '标题', `nickName` varchar(30) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '昵称', `hotRankScore` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '热度', `comment` int NOT NULL DEFAULT '0' COMMENT '是否评论(0,否,1是)', `like` int NOT NULL DEFAULT '0' COMMENT '是否点赞(0,否,1是)', `insert_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATe CURRENT_TIMESTAMP, PRIMARY KEY (`articleId`) USING BTREE ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;
这样,我们就创建好了我们的数据表。
四、操作数据库我们已经建好了数据库,我们现在思考一下,我们需要用到哪些操作。思考方向,始终围绕着增删改查来思考,这样你会给加清楚。
1、增加:插入文章列表数据
2、删除:这里我们用不到
3、修改:未评论更新为已评论
4、查询:查询未评论的文章id,查询该文章列表是否插入过数据库
因此我们编写方法:
POOL = PooledDB(
creator=pymysql, maxconnections=20, mincached=6, maxcached=None, maxshared=5,
blocking=True, maxusage=None, setsession=[], ping=0, host='127.0.0.1',port=3306,user='root',password='root',database='csdn_article',charset='utf8')
def insert_article(articleId, articleDetailUrl,articleTitle, nickName, hotRankScore):
db = POOL.connection()
conn = db.cursor()# 使用cursor()方法获取操作游标
conn.execute("INSERT INTO `article`(`articleId`, `articleDetailUrl`,`articleTitle`, `nickName`, `hotRankScore`) VALUES (%s, '%s','%s', '%s', '%s');"%(articleId, articleDetailUrl,pymysql.escape_string(articleTitle), pymysql.escape_string(nickName), hotRankScore))# 使用execute方法执行SQL语句
data=db.commit()# 使用 fetchone() 方法获取一条数据
db.close()
return data
def select_is_insert(articleId):
db = POOL.connection()
conn = db.cursor()# 使用cursor()方法获取操作游标
conn.execute("SELECT COUNT(*) FROM `article` WHERe `articleId` = %s;"%articleId)# 使用execute方法执行SQL语句
data = conn.fetchall()# 使用 fetchone() 方法获取一条数据
db.close()
return data[0][0]
def select_is_comment():#查询没有评论的数据
db = POOL.connection()
conn = db.cursor()# 使用cursor()方法获取操作游标
conn.execute("SELECT `articleId`,`articleDetailUrl` FROM `article` WHERe `comment` = '0' LIMIT 0, 2;")# 使用execute方法执行SQL语句
data = conn.fetchall()# 使用 fetchone() 方法获取一条数据
db.close()
return data
def update_article(articleId,comment=1):
db = POOL.connection()
conn = db.cursor()# 使用cursor()方法获取操作游标
conn.execute("UPDATE `article` SET `comment` =%s WHERe `articleId` = %s;"%(comment,articleId))# 使用execute方法执行SQL语句
data=db.commit()# 使用 fetchone() 方法获取一条数据
db.close()
return data
五、获取csdn文章数据
我们继续获取csdn文章列表(从排行榜中获取):
def qzzhrb():
"""全站综合热榜"""
headers1={
'Host': 'blog.csdn.net',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36',
}
for y in range(0,4):
time.sleep(5)
response=requests.get("https://blog.csdn.net/phoenix/web/blog/hotRank?page="+str(y)+"&pageSize=25",headers=headers1)
if response.json()["message"]=="success":
for i in response.json()["data"]:
for j in range(1,len(i["articleDetailUrl"])):
if i["articleDetailUrl"][-j]=="/":
articleId=i["articleDetailUrl"][-j+1:]
if select_is_insert(articleId)!=1:
insert_article( articleId,i["articleDetailUrl"],i["articleTitle"],i["nickName"],i["hotRankScore"])
break
break
#1到2天执行一次
def lynrb():
"""领域内容榜"""
headers1={
'Host': 'blog.csdn.net',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36',
}
list=["python","java","javascript","人工智能","php","c%2Fc%2B%2B","大数据","移动开发","数据结构与算法","游戏","网络","运维","测试"]
for y in range(0,2):
for i in list:
response=requests.get("https://blog.csdn.net/phoenix/web/blog/hotRank?page="+str(y)+"&pageSize=25&child_channel="+i,headers=headers1)
time.sleep(5)
if response.json()["message"]=="success":
for i in response.json()["data"]:
for j in range(1,len(i["articleDetailUrl"])):
if i["articleDetailUrl"][-j]=="/":
articleId=i["articleDetailUrl"][-j+1:]
if select_is_insert(articleId)!=1:
insert_article( articleId,i["articleDetailUrl"],i["articleTitle"],i["nickName"],i["hotRankScore"])
break
break
# lynrb()
#每天8点一次
def xjzzb():
"""新晋作者榜"""
headers1={
'Host': 'blog.csdn.net',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36',
}
for y in range(0,5):
time.sleep(5)
response=requests.get("https://blog.csdn.net/phoenix/web/blog/newUserRank?page="+str(y)+"&pageSize=20",headers=headers1)
if response.json()["message"]=="success":
for i in response.json()["data"]:
for j in range(1,len(i["articleDetailUrl"])):
if i["articleDetailUrl"][-j]=="/":
articleId=i["articleDetailUrl"][-j+1:]
if select_is_insert(articleId)!=1:
insert_article( articleId,i["articleDetailUrl"],i["articleTitle"],i["nickName"],i["hotRankScore"])
break
break
# xjzzb()
def recommend():
"""推荐栏目"""
header={
"path": "/api/articles?type=more&category=home&shown_offset=0",
"accept-language": "zh-CN,zh;q=0.9",
"referer": "https://blog.csdn.net/",
"accept": "application/json, text/javascript, **',
'X-Requested-With': 'XMLHttpRequest',
'sec-ch-ua-mobile': '?0',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36',
'X-Tingyun-Id': 'im-pGljNfnc;r=301378265',
'Origin': 'https://blog.csdn.net',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Dest': 'empty',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
}
data="page=%s&size=%s&commentId="%(page,size)
response=requests.post("https://blog.csdn.net/phoenix/web/v1/comment/list/%s?page=%s&size=%s&commentId="%(id,page,size),headers=header,data=data)
if response.json()["code"]==200:
list=response.json()["data"]["list"]
if response.json()["data"]["count"]<< span="">=size:
for i in list:
if i["info"]["userName"]=="qq_39046854":#用户id为我的id,需要改为自己的
update_article(id)
return True
if response.json()["data"]["count"]>size:
for i in list:
if i["info"]["userName"] == "qq_39046854":
update_article(id)
return True
comment_page(id, 1, response.json()["data"]["count"])
return False
整体逻辑:
查询评论列表数据,如果条数小于等于size,如果评论数据中,没有自己的id,则返回false,如果有自己的id则更新数据库,返回true。
如果条数大于size,如果评论数据中,没有自己的id,则进行递归,如果依然没有,返回false,如果有自己的id则更新数据库,返回true。
七、评论OK,已经做完了装备,然后我们来评论。获取到评论的接口。
def comment(articleId):
"""评论"""
time.sleep(5)
user_headers = {
'Host': 'blog.csdn.net',
'Connection': 'keep-alive',
'Content-Length': '75',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="90", "Google Chrome";v="90"',
'Accept': 'application/json, text/javascript, **; q=0.01',
'X-Requested-With': 'XMLHttpRequest',
'sec-ch-ua-mobile': '?0',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36',
'X-Tingyun-Id': 'im-pGljNfnc;r=290154423',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Origin': 'https://blog.csdn.net',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Dest': 'empty',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
}
content=["666"]
data="commentId=&content=%s&articleId=%s"%(random.choice(content),articleId)
response=requests.post("https://blog.csdn.net/phoenix/web/v1/comment/submit",
data=data.encode('utf-8'),
headers=user_headers)
print(response.json())
if response.json()["message"]=="success":
update_article(articleId)
if response.json()["code"]==400:
update_article(articleId)
def qzzhrb():
"""全站综合热榜"""
headers1={
'Host': 'blog.csdn.net',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36',
}
for y in range(0,4):
time.sleep(5)
response=requests.get("https://blog.csdn.net/phoenix/web/blog/hotRank?page="+str(y)+"&pageSize=25",headers=headers1)
if response.json()["message"]=="success":
for i in response.json()["data"]:
for j in range(1,len(i["articleDetailUrl"])):
if i["articleDetailUrl"][-j]=="/":
articleId=i["articleDetailUrl"][-j+1:]
if select_is_insert(articleId)!=1:
insert_article( articleId,i["articleDetailUrl"],i["articleTitle"],i["nickName"],i["hotRankScore"])
break
break
def lynrb():
"""领域内容榜"""
headers1={
'Host': 'blog.csdn.net',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36',
}
list=["python","java","javascript","人工智能","php","c%2Fc%2B%2B","大数据","移动开发","数据结构与算法","游戏","网络","运维","测试"]
for y in range(0,2):
for i in list:
response=requests.get("https://blog.csdn.net/phoenix/web/blog/hotRank?page="+str(y)+"&pageSize=25&child_channel="+i,headers=headers1)
time.sleep(5)
if response.json()["message"]=="success":
for i in response.json()["data"]:
for j in range(1,len(i["articleDetailUrl"])):
if i["articleDetailUrl"][-j]=="/":
articleId=i["articleDetailUrl"][-j+1:]
if select_is_insert(articleId)!=1:
insert_article( articleId,i["articleDetailUrl"],i["articleTitle"],i["nickName"],i["hotRankScore"])
break
break
def xjzzb():
"""新晋作者榜"""
headers1={
'Host': 'blog.csdn.net',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36',
}
for y in range(0,5):
time.sleep(5)
response=requests.get("https://blog.csdn.net/phoenix/web/blog/newUserRank?page="+str(y)+"&pageSize=20",headers=headers1)
if response.json()["message"]=="success":
for i in response.json()["data"]:
for j in range(1,len(i["articleDetailUrl"])):
if i["articleDetailUrl"][-j]=="/":
articleId=i["articleDetailUrl"][-j+1:]
if select_is_insert(articleId)!=1:
insert_article( articleId,i["articleDetailUrl"],i["articleTitle"],i["nickName"],i["hotRankScore"])
break
break
def recommend():
"""推荐栏目"""
header={
"path": "/api/articles?type=more&category=home&shown_offset=0",
"accept-language": "zh-CN,zh;q=0.9",
"referer": "https://blog.csdn.net/",
"accept": "application/json, text/javascript, **',
'X-Requested-With': 'XMLHttpRequest',
'sec-ch-ua-mobile': '?0',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36',
'X-Tingyun-Id': 'im-pGljNfnc;r=301378265',
'Origin': 'https://blog.csdn.net',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Dest': 'empty',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'cookie': 'uuid_tt_dd=10_30743904990-1609307614140-892319; UN=qq_39046854; p_uid=U010000; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=6525*1*10_30743904990-1609307614140-892319!5744*1*qq_39046854; Hm_lvt_e5ef47b9f471504959267fd614d579cd=1614845242; __gads=ID=e3eda8954669b04c-22bf2c9088c700af:T=1619152098:RT=1619152098:S=ALNI_Mabu5XI7BhNuTCo5hOhZVC6RKaXJw; ssxmod_itna=Qqmx0Qi=D=0=nDeq0LP=4jo4RE7n7oiT+TrDlPpQxA5D8D6DQeGTT0dDB7Q1im7YHwTD578uhWx=m4ou=79C+oXPTDCPGnDBFh3TDee=D5xGoDPxDeDADYE6DAqiOD7qDdEsNv/8DbxYpnDA3Di4D+bkQDmqG0DDtHR4G2D7Un07Dqbu0jWWtohkDqwY+nD0t3xBLebaT5apaq0uiroPK48DDHtYyYQ0GCuxwCYBXTKqGyiKGuATUl9bRCOTXS/LvPthDLBhGCI7D6wAxqY7wC9gM5FYeqWhvQnGWnqReME0DDG8h=S2exD=; ssxmod_itna2=Qqmx0Qi=D=0=nDeq0LP=4jo4RE7n7oiT+D6aKp0DGqDsrdeeDLD=vR9bk=yn8Qk0IkUDnbmYyQjB9+FP6hOP2vebQd20Wa5U70=s5vbAxcwHwWzkr6nbVC3b0N2CdOIvRgQVByY2qdw874XwYWLDV+GM0AfmKDAyG+EPYQjxIRhk8jAx3kDV4MGkidjRfL0mD8Pvt8KGGYXOCPNGA=+sIP6K6P0CdRytv=4yl2FcBaH/bRc=4CFyBmK8=+OKppF4/=EvEcBhvu9iVcXmdRRsd/HW=sCLvk8yjfeDKdwiC3+Y=YhS4hrlZ+M0NVpGM7PDjKDeuD4D; UserName=qq_39046854; UserInfo=354c160126b1426e8ee609598d4e473f; UserToken=354c160126b1426e8ee609598d4e473f; UserNick=%E5%A4%A7%E5%AE%B6%E4%B8%80%E8%B5%B7%E5%AD%A6%E7%BC%96%E7%A8%8B%EF%BC%88python%EF%BC%89; AU=1D9; BT=1619320665305; Hm_up_6bcd52f51e9b3dce32bec4a3997715ac=%7B%22islogin%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isonline%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isvip%22%3A%7B%22value%22%3A%220%22%2C%22scope%22%3A1%7D%2C%22uid_%22%3A%7B%22value%22%3A%22qq_39046854%22%2C%22scope%22%3A1%7D%7D; c_first_ref=www.baidu.com; c_segment=15; dc_sid=3287252e7d613c9ce0fc0f7bff30cc8d; firstDie=1; c_first_page=https%3A//download.csdn.net/download/binzainet/11432043; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1620921821,1620924825,1621223521,1621223546; aliyun_webUmidToken=T2gA_B9FwF6d3YpegJJLr7sTunCxnLaUsnXsbtNlO-dk6-hgLZhENFiE0dyY2OFIiK8=; dc_session_id=10_1621301343026.272834; TY_SESSION_ID=5adae148-659f-4a3c-8354-3c27eafb9e82; announcement-new=%7B%22isLogin%22%3Atrue%2C%22announcementUrl%22%3A%22https%3A%2F%2Fblog.csdn.net%2Fblogdevteam%2Farticle%2Fdetails%2F112280974%3Futm_source%3Dgonggao_0107%22%2C%22announcementCount%22%3A0%2C%22announcementExpire%22%3A3600000%7D; log_Id_click=1149; c_ref=https%3A//blog.csdn.net/; log_Id_view=3430; c_pref=https%3A//blog.csdn.net/; c_utm_medium=distribute.pc_feed.none-task-blog-yuanlijihua_tag_v1-2.nonecase; c_page_id=default; dc_tos=qta45d; log_Id_pv=2040; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1621301378',
}
data="page=%s&size=%s&commentId="%(page,size)
response=requests.post("https://blog.csdn.net/phoenix/web/v1/comment/list/%s?page=%s&size=%s&commentId="%(id,page,size),headers=header,data=data)
if response.json()["code"]==200:
list=response.json()["data"]["list"]
if response.json()["data"]["count"]size:
for i in list:
if i["info"]["userName"] == "qq_39046854":
update_article(id)
return True
comment_page(id, 1, response.json()["data"]["count"])
return False
#每四分钟一次
def is_comment():
not_comment=select_is_comment()
if not_comment!=():
for i in not_comment:
if str(i[1][22:33]) in "qq_39046854weixin_43673589":
update_article(i[0])
else:
y=comment_page(i[0])
if y==False:
comment(i[0])
@repeat(every(10).minutes,func=recommend)
@repeat(every(3).minutes,func=is_comment)
@repeat(every().day.at("10:00"),func=qzzhrb)
@repeat(every().day.at("09:00"),func=lynrb)
@repeat(every().day.at("07:00"),func=xjzzb)
def run_threaded(func):
job_thread = threading.Thread(target=func)
job_thread.start()
def main():
while True:
try:
run_pending()#run_pending:运行所有可以运行的任务
except:
pass
time.sleep(1)
if __name__ == '__main__':
main()



