fix: 合并代码
This commit is contained in:
commit
f84cd3cdfc
4724
driver/chromedriver_win32/LICENSE.chromedriver
Normal file
4724
driver/chromedriver_win32/LICENSE.chromedriver
Normal file
File diff suppressed because it is too large
Load Diff
BIN
driver/chromedriver_win32/chromedriver.exe
Normal file
BIN
driver/chromedriver_win32/chromedriver.exe
Normal file
Binary file not shown.
134
src/plugins/zhibo8/plugins/TouTiao.py
Normal file
134
src/plugins/zhibo8/plugins/TouTiao.py
Normal file
@ -0,0 +1,134 @@
|
||||
import datetime
|
||||
import warnings
|
||||
from datetime import timedelta
|
||||
from nonebot.adapters.onebot.v11 import Bot, Event
|
||||
from nonebot.typing import T_State
|
||||
from nonebot.params import CommandArg, Command
|
||||
|
||||
from nonebot import on_command
|
||||
from nonebot.rule import to_me
|
||||
from requests_html import HTMLSession, HTML
|
||||
import json
|
||||
from selenium.webdriver import Chrome, ChromeOptions
|
||||
|
||||
|
||||
driver_path = '../../../../driver/cromedriver_win32'
|
||||
max_behot_time = '0'
|
||||
# 初始化map对象,分别记录每个人的微头条个数
|
||||
weitoutiaoMap = {"太能喵": 0, "小小": 0, "大帝强": 0, "叶小欢": 0}
|
||||
|
||||
"""
|
||||
获取微头条数据信息
|
||||
"""
|
||||
def getWeiToutiaoInfo():
|
||||
global max_behot_time
|
||||
# 获取上一周的第一天和最后一天的时间戳
|
||||
lastWeekStartTime, lastWeekEndTime = getLastWeekFirstDayTimeStamp()
|
||||
session = HTMLSession()
|
||||
weitoutiaoGet = session.get(f"https://www.toutiao.com/api/pc/list/user/feed?category=pc_profile_ugc&token=MS4wLjABAAAA7lHc4sBPuZaQ85qdIrwVvWm8Ps5O1kPMpuh5lTJAwII&max_behot_time={max_behot_time}&aid=24&app_name=toutiao_web", verify=False, proxies=None)
|
||||
# print(weitoutiaoGet.text)
|
||||
# 使用Json解析返回的数据
|
||||
resultJson = json.loads(weitoutiaoGet.text)
|
||||
# 先获取下个max_behot_time时间戳,如果需要翻页查询需要使用该参数
|
||||
max_behot_time = resultJson['next']['max_behot_time']
|
||||
# 开始循环解析data数据,获取微头条的内容和发布人员
|
||||
dataList = resultJson['data']
|
||||
# 获取每一条消息的发布时间戳,如果发布时间出现早于上周一0点的数据,结束统计,函数返回,否则递归调用当前函数
|
||||
for data in dataList:
|
||||
# 获取本条新闻的发布时间
|
||||
publishTime = data["publish_time"]
|
||||
if int(publishTime) > int(lastWeekEndTime):
|
||||
continue
|
||||
elif int(publishTime) < int(lastWeekStartTime):
|
||||
return
|
||||
else:
|
||||
# 如果是比分预测类的微头条,则跳过
|
||||
content = str(data['content'])
|
||||
if content.find("比分预测】")>0:
|
||||
continue
|
||||
else:
|
||||
# 获取该微头条的发布人和发布日期
|
||||
author = None
|
||||
contentSubString = content[-8]
|
||||
if contentSubString.find("太能喵")>0:
|
||||
weitoutiaoMap["太能喵"] = weitoutiaoMap["太能喵"]+1
|
||||
elif contentSubString.find("小小")>0:
|
||||
weitoutiaoMap["小小"] = weitoutiaoMap["小小"]+1
|
||||
elif contentSubString.find("大帝强") > 0:
|
||||
weitoutiaoMap["大帝强"] = weitoutiaoMap["大帝强"] + 1
|
||||
elif contentSubString.find("叶小欢") > 0:
|
||||
weitoutiaoMap["叶小欢"] = weitoutiaoMap["叶小欢"] + 1
|
||||
getWeiToutiaoInfo()
|
||||
|
||||
"""
|
||||
获取文章数据信息
|
||||
"""
|
||||
def getWenzhangInfo():
|
||||
global max_behot_time
|
||||
|
||||
# 第一步输入这个:去除开头警告
|
||||
warnings.simplefilter('ignore', ResourceWarning)
|
||||
|
||||
chrome_option = ChromeOptions()
|
||||
chrome_option.headless = True
|
||||
chrome_option.add_argument('--no-sandbox') # 解决DevToolsActivePort文件不存在的报错
|
||||
chrome_option.add_argument('--disable-gpu') # 谷歌文档提到需要加上这个属性来规避bug
|
||||
chrome_option.add_argument('--hide-scrollbars') # 隐藏滚动条, 应对一些特殊页面
|
||||
chrome_option.add_argument('--headless')
|
||||
|
||||
browser = Chrome(executable_path=driver_path, options=chrome_option)
|
||||
|
||||
# 获取上一周的第一天和最后一天的时间戳
|
||||
lastWeekStartTime, lastWeekEndTime = getLastWeekFirstDayTimeStamp()
|
||||
session = HTMLSession()
|
||||
wenzhangGet = session.get(f"https://www.toutiao.com/api/pc/list/user/feed?category=pc_profile_article&token=MS4wLjABAAAA7lHc4sBPuZaQ85qdIrwVvWm8Ps5O1kPMpuh5lTJAwII&max_behot_time={max_behot_time}&aid=24&app_name=toutiao_web", verify=False, proxies=None)
|
||||
# print(weitoutiaoGet.text)
|
||||
# 使用Json解析返回的数据
|
||||
resultJson = json.loads(wenzhangGet.text)
|
||||
# 先获取下个max_behot_time时间戳,如果需要翻页查询需要使用该参数
|
||||
max_behot_time = resultJson['next']['max_behot_time']
|
||||
# 开始循环解析data数据,获取微头条的内容和发布人员
|
||||
dataList = resultJson['data']
|
||||
# 获取每一条消息的发布时间戳,如果发布时间出现早于上周一0点的数据,结束统计,函数返回,否则递归调用当前函数
|
||||
for data in dataList:
|
||||
# 获取本条新闻的发布时间
|
||||
publishTime = data["publish_time"]
|
||||
if int(publishTime) > int(lastWeekEndTime):
|
||||
continue
|
||||
elif int(publishTime) < int(lastWeekStartTime):
|
||||
return
|
||||
else:
|
||||
# 获取该文章的标题和对应的url,需要再通过url获取文章正文,解析作者是谁
|
||||
title = data["title"]
|
||||
url = data["url"]
|
||||
if url:
|
||||
# 请求文章正文内容
|
||||
wenzhangGet = browser.get(url)
|
||||
print(wenzhangGet.text)
|
||||
# author = None
|
||||
# if data.find("太能喵")>0:
|
||||
# weitoutiaoMap["太能喵"] = weitoutiaoMap["太能喵"]+1
|
||||
# elif content.find("小小")>0:
|
||||
# weitoutiaoMap["小小"] = weitoutiaoMap["小小"]+1
|
||||
# elif content.find("大帝强") > 0:
|
||||
# weitoutiaoMap["大帝强"] = weitoutiaoMap["大帝强"] + 1
|
||||
# elif content.find("叶小欢") > 0:
|
||||
# weitoutiaoMap["叶小欢"] = weitoutiaoMap["叶小欢"] + 1
|
||||
# 如果没有被return掉,继续请求下一页内容
|
||||
getWeiToutiaoInfo()
|
||||
|
||||
"""
|
||||
获取当前日期上一周的第一天和最后一天的时间戳
|
||||
"""
|
||||
def getLastWeekFirstDayTimeStamp():
|
||||
now = datetime.datetime.now()
|
||||
# 上周第一天和最后一天
|
||||
last_week_start = now - timedelta(days=now.weekday() + 7, hours=now.hour, minutes=now.minute, seconds=now.second, microseconds=now.microsecond)
|
||||
last_week_end = last_week_start - timedelta(days=-7)
|
||||
print(f"上周第一天的日期是:{last_week_start},最后一天的日期是:{last_week_end}")
|
||||
print(f"上周第一天的时间戳是:{last_week_start.timestamp()},最后一天的时间戳是:{last_week_end.timestamp()}")
|
||||
return last_week_start.timestamp(), last_week_end.timestamp()
|
||||
|
||||
if __name__ == '__main__':
|
||||
getWenzhangInfo()
|
||||
# print(weitoutiaoMap)
|
Loading…
x
Reference in New Issue
Block a user