feat: 增加微头条统计

This commit is contained in:
xiaoyan 2023-09-28 17:12:30 +08:00
parent 515503b342
commit 449a097b32
3 changed files with 202 additions and 62 deletions

View File

@ -1,19 +1,49 @@
adapter==0.1
aiocache==0.12.0 aiocache==0.12.0
aiocqhttp==1.4.3 aiocqhttp==1.4.3
aiofiles==23.1.0 aiofiles==23.1.0
anyio==3.6.2 anyio==3.6.2
appdirs==1.4.4 appdirs==1.4.4
argcomplete==2.0.0
argon2-cffi==21.3.0
argon2-cffi-bindings==21.2.0
arrow==1.2.3
asttokens==2.2.1
async-generator==1.10
attrs==22.2.0
backcall==0.2.0
beautifulsoup4==4.11.2 beautifulsoup4==4.11.2
bleach==6.0.0
blinker==1.5 blinker==1.5
brotlipy==0.7.0
bs4==0.0.1 bs4==0.0.1
CacheControl==0.12.11
cashews==4.7.1
certifi==2022.12.7 certifi==2022.12.7
cffi==1.15.1
charset-normalizer==3.0.1 charset-normalizer==3.0.1
cleo==2.0.1
click==8.1.3 click==8.1.3
colorama==0.4.6 colorama==0.4.6
comm==0.1.2
crashtest==0.4.1
cryptography==40.0.2
cssselect==1.2.0 cssselect==1.2.0
debugpy==1.6.6
decorator==5.1.1
defusedxml==0.7.1
distlib==0.3.6
docopt==0.6.2 docopt==0.6.2
dulwich==0.20.50
exceptiongroup==1.1.1
executing==1.2.0
fake-useragent==1.1.1 fake-useragent==1.1.1
fastapi==0.91.0 fastapi==0.91.0
fastjsonschema==2.16.2
filelock==3.9.0
fqdn==1.5.1
gitdb==4.0.9
GitPython==3.1.27
h11==0.14.0 h11==0.14.0
h2==4.1.0 h2==4.1.0
hpack==4.0.0 hpack==4.0.0
@ -25,49 +55,146 @@ hypercorn==0.14.3
hyperframe==6.0.1 hyperframe==6.0.1
idna==3.4 idna==3.4
importlib-metadata==6.0.0 importlib-metadata==6.0.0
ipykernel==6.20.2
ipython==8.9.0
ipython-genutils==0.2.0
ipywidgets==8.0.4
irc3==1.1.8 irc3==1.1.8
isoduration==20.11.0
itsdangerous==2.1.2 itsdangerous==2.1.2
jaraco.classes==3.2.3
jedi==0.18.2
Jinja2==3.1.2 Jinja2==3.1.2
jsonpointer==2.3
jsonschema==4.17.3
jupyter==1.0.0
jupyter-console==6.4.4
jupyter-events==0.6.3
jupyter_client==8.0.1
jupyter_core==5.1.5
jupyter_server==2.1.0
jupyter_server_terminals==0.4.4
jupyterlab-pygments==0.2.2
jupyterlab-widgets==3.0.5
keyring==23.13.1
lfmh==1.1.1 lfmh==1.1.1
lockfile==0.12.2
loguru==0.6.0 loguru==0.6.0
lxml==4.9.2 lxml==4.9.2
MarkupSafe==2.1.2 MarkupSafe==2.1.2
matplotlib-inline==0.1.6
mistune==2.0.4
more-itertools==9.0.0
msgpack==1.0.4 msgpack==1.0.4
multidict==6.0.4 multidict==6.0.4
musicbrainzngs==0.7.1 musicbrainzngs==0.7.1
mysql-connector-python==8.0.28
nbclassic==0.5.1
nbclient==0.7.2
nbconvert==7.2.9
nbformat==5.7.3
nest-asyncio==1.5.6
nonebot==1.9.1
nonebot-adapter-feishu==2.0.0b6
nonebot-adapter-onebot==2.2.1 nonebot-adapter-onebot==2.2.1
nonebot2==2.0.0rc3 nonebot2==2.0.0rc3
notebook==6.5.2
notebook_shim==0.2.2
numpy==1.24.3
onebot==1.3.3 onebot==1.3.3
outcome==1.2.0
packaging==23.0
pandas==2.0.1
pandocfilters==1.5.0
parse==1.19.0 parse==1.19.0
parsimonious==0.10.0
parso==0.8.3
pexpect==4.8.0
pickleshare==0.7.5
pipx==1.1.0
pkginfo==1.9.6
platformdirs==2.6.2
poetry==1.3.2
poetry-core==1.4.0
poetry-plugin-export==1.3.0
priority==2.0.0 priority==2.0.0
prometheus-client==0.16.0
prompt-toolkit==3.0.36
psutil==5.9.4
ptyprocess==0.7.0
pure-eval==0.2.2
pycparser==2.21
pycryptodome==3.17
pydantic==1.10.4 pydantic==1.10.4
pyee==8.2.2 pyee==8.2.2
Pygments==2.14.0
pygtrie==2.5.0 pygtrie==2.5.0
pyOpenSSL==23.1.1
pyppeteer==1.0.2 pyppeteer==1.0.2
pyquery==2.0.0 pyquery==2.0.0
pyrsistent==0.19.3
PySocks==1.7.1
python-dateutil==2.8.2
python-dotenv==0.21.1 python-dotenv==0.21.1
python-json-logger==2.0.4
pytz==2023.3
pywin32==305
pywin32-ctypes==0.2.0
pywinpty==2.0.10
PyYAML==6.0 PyYAML==6.0
pyzmq==25.0.0
qtconsole==5.4.0
QtPy==2.3.0
quart==0.18.3 quart==0.18.3
rapidfuzz==2.13.7
regex==2022.10.31
requests==2.28.2 requests==2.28.2
requests-html==0.10.0 requests-html==0.10.0
requests-toolbelt==0.10.1
rfc3339-validator==0.1.4
rfc3986==1.5.0 rfc3986==1.5.0
rfc3986-validator==0.1.1
selenium==4.9.1
Send2Trash==1.8.0
shellingham==1.5.0.post1
six==1.16.0 six==1.16.0
smmap==5.0.0
sniffio==1.3.0 sniffio==1.3.0
sortedcontainers==2.4.0
soupsieve==2.3.2.post1 soupsieve==2.3.2.post1
stack-data==0.6.2
starlette==0.24.0 starlette==0.24.0
terminado==0.17.1
tinycss2==1.2.1
toml==0.10.2 toml==0.10.2
tomli==2.0.1
tomlkit==0.11.6 tomlkit==0.11.6
tornado==6.2
tqdm==4.64.1 tqdm==4.64.1
traitlets==5.8.1
trio==0.22.0
trio-websocket==0.10.2
trove-classifiers==2023.1.20
typing_extensions==4.4.0 typing_extensions==4.4.0
tzdata==2023.3
uri-template==1.2.0
urllib3==1.26.14 urllib3==1.26.14
urllib3-secure-extra==0.1.0
userpath==1.8.0
uvicorn==0.20.0 uvicorn==0.20.0
venusian==3.0.0 venusian==3.0.0
virtualenv==20.16.5
w3lib==2.1.1 w3lib==2.1.1
watchfiles==0.18.1 watchfiles==0.18.1
wcwidth==0.2.6
webcolors==1.12
webencodings==0.5.1 webencodings==0.5.1
websocket-client==1.5.0
websockets==10.4 websockets==10.4
Werkzeug==2.2.2 Werkzeug==2.2.2
widgetsnbextension==4.0.5
win32-setctime==1.1.0 win32-setctime==1.1.0
wincertstore==0.2
wsproto==1.2.0 wsproto==1.2.0
yarl==1.8.2 yarl==1.8.2
zipp==3.13.0 zipp==3.13.0

View File

@ -7,7 +7,7 @@ from nonebot import on_command
from nonebot.rule import to_me from nonebot.rule import to_me
from requests_html import HTMLSession, HTML from requests_html import HTMLSession, HTML
zhibo8Command = on_command("/zhibo8", rule=to_me(), aliases={'预测', '英超', 'zhibo8'}, priority=10) zhibo8Command = on_command("zhibo8", rule=to_me(), aliases={'预测', '英超', 'zhibo8'}, priority=10)
xiaobianStr = r""" xiaobianStr = r"""
小编们预测比分 小编们预测比分

View File

@ -11,12 +11,25 @@ from requests_html import HTMLSession, HTML
import json import json
from selenium.webdriver import Chrome, ChromeOptions from selenium.webdriver import Chrome, ChromeOptions
tongjiCommand = on_command("统计", rule=to_me(), aliases={'微头条', 'tongji', 'tj'}, priority=10)
driver_path = '../../../../driver/cromedriver_win32' # driver_path = '../../../../driver/cromedriver_win32'
max_behot_time = '0' max_behot_time = '0'
# 初始化map对象分别记录每个人的微头条个数 # 初始化map对象分别记录每个人的微头条个数
weitoutiaoMap = {"太能喵": 0, "小小": 0, "大帝强": 0, "叶小欢": 0} weitoutiaoMap = {"太能喵": 0, "小小": 0, "大帝强": 0, "叶小欢": 0}
"""
接收QQ消息根据消息统计微头条结果
"""
@tongjiCommand.handle()
async def handle_first_receive(bot: Bot, event: Event, state: T_State, args= CommandArg()):
# 清空原有的weitoutiaoMap中的统计结果
for k,v in weitoutiaoMap.items():
weitoutiaoMap[k] = 0
getWeiToutiaoInfo()
# 将上周的微头条考核结果发送给用户
await tongjiCommand.finish(str(weitoutiaoMap))
""" """
获取微头条数据信息 获取微头条数据信息
""" """
@ -49,7 +62,7 @@ def getWeiToutiaoInfo():
else: else:
# 获取该微头条的发布人和发布日期 # 获取该微头条的发布人和发布日期
author = None author = None
contentSubString = content[-8] contentSubString = content[-8:]
if contentSubString.find("太能喵")>0: if contentSubString.find("太能喵")>0:
weitoutiaoMap["太能喵"] = weitoutiaoMap["太能喵"]+1 weitoutiaoMap["太能喵"] = weitoutiaoMap["太能喵"]+1
elif contentSubString.find("小小")>0: elif contentSubString.find("小小")>0:
@ -60,64 +73,64 @@ def getWeiToutiaoInfo():
weitoutiaoMap["叶小欢"] = weitoutiaoMap["叶小欢"] + 1 weitoutiaoMap["叶小欢"] = weitoutiaoMap["叶小欢"] + 1
getWeiToutiaoInfo() getWeiToutiaoInfo()
""" # """
获取文章数据信息 # 获取文章数据信息
""" # """
def getWenzhangInfo(): # def getWenzhangInfo():
global max_behot_time # global max_behot_time
#
# 第一步输入这个:去除开头警告 # # 第一步输入这个:去除开头警告
warnings.simplefilter('ignore', ResourceWarning) # warnings.simplefilter('ignore', ResourceWarning)
#
chrome_option = ChromeOptions() # chrome_option = ChromeOptions()
chrome_option.headless = True # chrome_option.headless = True
chrome_option.add_argument('--no-sandbox') # 解决DevToolsActivePort文件不存在的报错 # chrome_option.add_argument('--no-sandbox') # 解决DevToolsActivePort文件不存在的报错
chrome_option.add_argument('--disable-gpu') # 谷歌文档提到需要加上这个属性来规避bug # chrome_option.add_argument('--disable-gpu') # 谷歌文档提到需要加上这个属性来规避bug
chrome_option.add_argument('--hide-scrollbars') # 隐藏滚动条, 应对一些特殊页面 # chrome_option.add_argument('--hide-scrollbars') # 隐藏滚动条, 应对一些特殊页面
chrome_option.add_experimental_option('excludeSwitches', ['enable-automation']) # chrome_option.add_experimental_option('excludeSwitches', ['enable-automation'])
chrome_option.add_argument('--headless') # chrome_option.add_argument('--headless')
#
browser = Chrome(executable_path=driver_path, options=chrome_option) # browser = Chrome(executable_path=driver_path, options=chrome_option)
#
# 获取上一周的第一天和最后一天的时间戳 # # 获取上一周的第一天和最后一天的时间戳
lastWeekStartTime, lastWeekEndTime = getLastWeekFirstDayTimeStamp() # lastWeekStartTime, lastWeekEndTime = getLastWeekFirstDayTimeStamp()
session = HTMLSession() # session = HTMLSession()
wenzhangGet = session.get(f"https://www.toutiao.com/api/pc/list/user/feed?category=pc_profile_article&token=MS4wLjABAAAA7lHc4sBPuZaQ85qdIrwVvWm8Ps5O1kPMpuh5lTJAwII&max_behot_time={max_behot_time}&aid=24&app_name=toutiao_web", verify=False, proxies=None) # wenzhangGet = session.get(f"https://www.toutiao.com/api/pc/list/user/feed?category=pc_profile_article&token=MS4wLjABAAAA7lHc4sBPuZaQ85qdIrwVvWm8Ps5O1kPMpuh5lTJAwII&max_behot_time={max_behot_time}&aid=24&app_name=toutiao_web", verify=False, proxies=None)
# print(weitoutiaoGet.text) # # print(weitoutiaoGet.text)
# 使用Json解析返回的数据 # # 使用Json解析返回的数据
resultJson = json.loads(wenzhangGet.text) # resultJson = json.loads(wenzhangGet.text)
# 先获取下个max_behot_time时间戳如果需要翻页查询需要使用该参数 # # 先获取下个max_behot_time时间戳如果需要翻页查询需要使用该参数
max_behot_time = resultJson['next']['max_behot_time'] # max_behot_time = resultJson['next']['max_behot_time']
# 开始循环解析data数据获取微头条的内容和发布人员 # # 开始循环解析data数据获取微头条的内容和发布人员
dataList = resultJson['data'] # dataList = resultJson['data']
# 获取每一条消息的发布时间戳如果发布时间出现早于上周一0点的数据结束统计函数返回否则递归调用当前函数 # # 获取每一条消息的发布时间戳如果发布时间出现早于上周一0点的数据结束统计函数返回否则递归调用当前函数
for data in dataList: # for data in dataList:
# 获取本条新闻的发布时间 # # 获取本条新闻的发布时间
publishTime = data["publish_time"] # publishTime = data["publish_time"]
if int(publishTime) > int(lastWeekEndTime): # if int(publishTime) > int(lastWeekEndTime):
continue # continue
elif int(publishTime) < int(lastWeekStartTime): # elif int(publishTime) < int(lastWeekStartTime):
return # return
else: # else:
# 获取该文章的标题和对应的url需要再通过url获取文章正文解析作者是谁 # # 获取该文章的标题和对应的url需要再通过url获取文章正文解析作者是谁
title = data["title"] # title = data["title"]
url = data["url"] # url = data["url"]
if url: # if url:
# 请求文章正文内容 # # 请求文章正文内容
browser.get(url) # browser.get(url)
element = browser.find_element(by='XPATH', value="//article") # element = browser.find_element(by='XPATH', value="//article")
print(element) # print(element)
# author = None # # author = None
# if data.find("太能喵")>0: # # if data.find("太能喵")>0:
# weitoutiaoMap["太能喵"] = weitoutiaoMap["太能喵"]+1 # # weitoutiaoMap["太能喵"] = weitoutiaoMap["太能喵"]+1
# elif content.find("小小")>0: # # elif content.find("小小")>0:
# weitoutiaoMap["小小"] = weitoutiaoMap["小小"]+1 # # weitoutiaoMap["小小"] = weitoutiaoMap["小小"]+1
# elif content.find("大帝强") > 0: # # elif content.find("大帝强") > 0:
# weitoutiaoMap["大帝强"] = weitoutiaoMap["大帝强"] + 1 # # weitoutiaoMap["大帝强"] = weitoutiaoMap["大帝强"] + 1
# elif content.find("叶小欢") > 0: # # elif content.find("叶小欢") > 0:
# weitoutiaoMap["叶小欢"] = weitoutiaoMap["叶小欢"] + 1 # # weitoutiaoMap["叶小欢"] = weitoutiaoMap["叶小欢"] + 1
# 如果没有被return掉继续请求下一页内容 # # 如果没有被return掉继续请求下一页内容
getWeiToutiaoInfo() # getWeiToutiaoInfo()
""" """
获取当前日期上一周的第一天和最后一天的时间戳 获取当前日期上一周的第一天和最后一天的时间戳
@ -132,5 +145,5 @@ def getLastWeekFirstDayTimeStamp():
return last_week_start.timestamp(), last_week_end.timestamp() return last_week_start.timestamp(), last_week_end.timestamp()
if __name__ == '__main__': if __name__ == '__main__':
getWenzhangInfo() getWeiToutiaoInfo()
# print(weitoutiaoMap) # print(weitoutiaoMap)