feat: 增加微头条统计
This commit is contained in:
parent
515503b342
commit
449a097b32
127
requirements.txt
127
requirements.txt
@ -1,19 +1,49 @@
|
|||||||
|
adapter==0.1
|
||||||
aiocache==0.12.0
|
aiocache==0.12.0
|
||||||
aiocqhttp==1.4.3
|
aiocqhttp==1.4.3
|
||||||
aiofiles==23.1.0
|
aiofiles==23.1.0
|
||||||
anyio==3.6.2
|
anyio==3.6.2
|
||||||
appdirs==1.4.4
|
appdirs==1.4.4
|
||||||
|
argcomplete==2.0.0
|
||||||
|
argon2-cffi==21.3.0
|
||||||
|
argon2-cffi-bindings==21.2.0
|
||||||
|
arrow==1.2.3
|
||||||
|
asttokens==2.2.1
|
||||||
|
async-generator==1.10
|
||||||
|
attrs==22.2.0
|
||||||
|
backcall==0.2.0
|
||||||
beautifulsoup4==4.11.2
|
beautifulsoup4==4.11.2
|
||||||
|
bleach==6.0.0
|
||||||
blinker==1.5
|
blinker==1.5
|
||||||
|
brotlipy==0.7.0
|
||||||
bs4==0.0.1
|
bs4==0.0.1
|
||||||
|
CacheControl==0.12.11
|
||||||
|
cashews==4.7.1
|
||||||
certifi==2022.12.7
|
certifi==2022.12.7
|
||||||
|
cffi==1.15.1
|
||||||
charset-normalizer==3.0.1
|
charset-normalizer==3.0.1
|
||||||
|
cleo==2.0.1
|
||||||
click==8.1.3
|
click==8.1.3
|
||||||
colorama==0.4.6
|
colorama==0.4.6
|
||||||
|
comm==0.1.2
|
||||||
|
crashtest==0.4.1
|
||||||
|
cryptography==40.0.2
|
||||||
cssselect==1.2.0
|
cssselect==1.2.0
|
||||||
|
debugpy==1.6.6
|
||||||
|
decorator==5.1.1
|
||||||
|
defusedxml==0.7.1
|
||||||
|
distlib==0.3.6
|
||||||
docopt==0.6.2
|
docopt==0.6.2
|
||||||
|
dulwich==0.20.50
|
||||||
|
exceptiongroup==1.1.1
|
||||||
|
executing==1.2.0
|
||||||
fake-useragent==1.1.1
|
fake-useragent==1.1.1
|
||||||
fastapi==0.91.0
|
fastapi==0.91.0
|
||||||
|
fastjsonschema==2.16.2
|
||||||
|
filelock==3.9.0
|
||||||
|
fqdn==1.5.1
|
||||||
|
gitdb==4.0.9
|
||||||
|
GitPython==3.1.27
|
||||||
h11==0.14.0
|
h11==0.14.0
|
||||||
h2==4.1.0
|
h2==4.1.0
|
||||||
hpack==4.0.0
|
hpack==4.0.0
|
||||||
@ -25,49 +55,146 @@ hypercorn==0.14.3
|
|||||||
hyperframe==6.0.1
|
hyperframe==6.0.1
|
||||||
idna==3.4
|
idna==3.4
|
||||||
importlib-metadata==6.0.0
|
importlib-metadata==6.0.0
|
||||||
|
ipykernel==6.20.2
|
||||||
|
ipython==8.9.0
|
||||||
|
ipython-genutils==0.2.0
|
||||||
|
ipywidgets==8.0.4
|
||||||
irc3==1.1.8
|
irc3==1.1.8
|
||||||
|
isoduration==20.11.0
|
||||||
itsdangerous==2.1.2
|
itsdangerous==2.1.2
|
||||||
|
jaraco.classes==3.2.3
|
||||||
|
jedi==0.18.2
|
||||||
Jinja2==3.1.2
|
Jinja2==3.1.2
|
||||||
|
jsonpointer==2.3
|
||||||
|
jsonschema==4.17.3
|
||||||
|
jupyter==1.0.0
|
||||||
|
jupyter-console==6.4.4
|
||||||
|
jupyter-events==0.6.3
|
||||||
|
jupyter_client==8.0.1
|
||||||
|
jupyter_core==5.1.5
|
||||||
|
jupyter_server==2.1.0
|
||||||
|
jupyter_server_terminals==0.4.4
|
||||||
|
jupyterlab-pygments==0.2.2
|
||||||
|
jupyterlab-widgets==3.0.5
|
||||||
|
keyring==23.13.1
|
||||||
lfmh==1.1.1
|
lfmh==1.1.1
|
||||||
|
lockfile==0.12.2
|
||||||
loguru==0.6.0
|
loguru==0.6.0
|
||||||
lxml==4.9.2
|
lxml==4.9.2
|
||||||
MarkupSafe==2.1.2
|
MarkupSafe==2.1.2
|
||||||
|
matplotlib-inline==0.1.6
|
||||||
|
mistune==2.0.4
|
||||||
|
more-itertools==9.0.0
|
||||||
msgpack==1.0.4
|
msgpack==1.0.4
|
||||||
multidict==6.0.4
|
multidict==6.0.4
|
||||||
musicbrainzngs==0.7.1
|
musicbrainzngs==0.7.1
|
||||||
|
mysql-connector-python==8.0.28
|
||||||
|
nbclassic==0.5.1
|
||||||
|
nbclient==0.7.2
|
||||||
|
nbconvert==7.2.9
|
||||||
|
nbformat==5.7.3
|
||||||
|
nest-asyncio==1.5.6
|
||||||
|
nonebot==1.9.1
|
||||||
|
nonebot-adapter-feishu==2.0.0b6
|
||||||
nonebot-adapter-onebot==2.2.1
|
nonebot-adapter-onebot==2.2.1
|
||||||
nonebot2==2.0.0rc3
|
nonebot2==2.0.0rc3
|
||||||
|
notebook==6.5.2
|
||||||
|
notebook_shim==0.2.2
|
||||||
|
numpy==1.24.3
|
||||||
onebot==1.3.3
|
onebot==1.3.3
|
||||||
|
outcome==1.2.0
|
||||||
|
packaging==23.0
|
||||||
|
pandas==2.0.1
|
||||||
|
pandocfilters==1.5.0
|
||||||
parse==1.19.0
|
parse==1.19.0
|
||||||
|
parsimonious==0.10.0
|
||||||
|
parso==0.8.3
|
||||||
|
pexpect==4.8.0
|
||||||
|
pickleshare==0.7.5
|
||||||
|
pipx==1.1.0
|
||||||
|
pkginfo==1.9.6
|
||||||
|
platformdirs==2.6.2
|
||||||
|
poetry==1.3.2
|
||||||
|
poetry-core==1.4.0
|
||||||
|
poetry-plugin-export==1.3.0
|
||||||
priority==2.0.0
|
priority==2.0.0
|
||||||
|
prometheus-client==0.16.0
|
||||||
|
prompt-toolkit==3.0.36
|
||||||
|
psutil==5.9.4
|
||||||
|
ptyprocess==0.7.0
|
||||||
|
pure-eval==0.2.2
|
||||||
|
pycparser==2.21
|
||||||
|
pycryptodome==3.17
|
||||||
pydantic==1.10.4
|
pydantic==1.10.4
|
||||||
pyee==8.2.2
|
pyee==8.2.2
|
||||||
|
Pygments==2.14.0
|
||||||
pygtrie==2.5.0
|
pygtrie==2.5.0
|
||||||
|
pyOpenSSL==23.1.1
|
||||||
pyppeteer==1.0.2
|
pyppeteer==1.0.2
|
||||||
pyquery==2.0.0
|
pyquery==2.0.0
|
||||||
|
pyrsistent==0.19.3
|
||||||
|
PySocks==1.7.1
|
||||||
|
python-dateutil==2.8.2
|
||||||
python-dotenv==0.21.1
|
python-dotenv==0.21.1
|
||||||
|
python-json-logger==2.0.4
|
||||||
|
pytz==2023.3
|
||||||
|
pywin32==305
|
||||||
|
pywin32-ctypes==0.2.0
|
||||||
|
pywinpty==2.0.10
|
||||||
PyYAML==6.0
|
PyYAML==6.0
|
||||||
|
pyzmq==25.0.0
|
||||||
|
qtconsole==5.4.0
|
||||||
|
QtPy==2.3.0
|
||||||
quart==0.18.3
|
quart==0.18.3
|
||||||
|
rapidfuzz==2.13.7
|
||||||
|
regex==2022.10.31
|
||||||
requests==2.28.2
|
requests==2.28.2
|
||||||
requests-html==0.10.0
|
requests-html==0.10.0
|
||||||
|
requests-toolbelt==0.10.1
|
||||||
|
rfc3339-validator==0.1.4
|
||||||
rfc3986==1.5.0
|
rfc3986==1.5.0
|
||||||
|
rfc3986-validator==0.1.1
|
||||||
|
selenium==4.9.1
|
||||||
|
Send2Trash==1.8.0
|
||||||
|
shellingham==1.5.0.post1
|
||||||
six==1.16.0
|
six==1.16.0
|
||||||
|
smmap==5.0.0
|
||||||
sniffio==1.3.0
|
sniffio==1.3.0
|
||||||
|
sortedcontainers==2.4.0
|
||||||
soupsieve==2.3.2.post1
|
soupsieve==2.3.2.post1
|
||||||
|
stack-data==0.6.2
|
||||||
starlette==0.24.0
|
starlette==0.24.0
|
||||||
|
terminado==0.17.1
|
||||||
|
tinycss2==1.2.1
|
||||||
toml==0.10.2
|
toml==0.10.2
|
||||||
|
tomli==2.0.1
|
||||||
tomlkit==0.11.6
|
tomlkit==0.11.6
|
||||||
|
tornado==6.2
|
||||||
tqdm==4.64.1
|
tqdm==4.64.1
|
||||||
|
traitlets==5.8.1
|
||||||
|
trio==0.22.0
|
||||||
|
trio-websocket==0.10.2
|
||||||
|
trove-classifiers==2023.1.20
|
||||||
typing_extensions==4.4.0
|
typing_extensions==4.4.0
|
||||||
|
tzdata==2023.3
|
||||||
|
uri-template==1.2.0
|
||||||
urllib3==1.26.14
|
urllib3==1.26.14
|
||||||
|
urllib3-secure-extra==0.1.0
|
||||||
|
userpath==1.8.0
|
||||||
uvicorn==0.20.0
|
uvicorn==0.20.0
|
||||||
venusian==3.0.0
|
venusian==3.0.0
|
||||||
|
virtualenv==20.16.5
|
||||||
w3lib==2.1.1
|
w3lib==2.1.1
|
||||||
watchfiles==0.18.1
|
watchfiles==0.18.1
|
||||||
|
wcwidth==0.2.6
|
||||||
|
webcolors==1.12
|
||||||
webencodings==0.5.1
|
webencodings==0.5.1
|
||||||
|
websocket-client==1.5.0
|
||||||
websockets==10.4
|
websockets==10.4
|
||||||
Werkzeug==2.2.2
|
Werkzeug==2.2.2
|
||||||
|
widgetsnbextension==4.0.5
|
||||||
win32-setctime==1.1.0
|
win32-setctime==1.1.0
|
||||||
|
wincertstore==0.2
|
||||||
wsproto==1.2.0
|
wsproto==1.2.0
|
||||||
yarl==1.8.2
|
yarl==1.8.2
|
||||||
zipp==3.13.0
|
zipp==3.13.0
|
||||||
|
@ -7,7 +7,7 @@ from nonebot import on_command
|
|||||||
from nonebot.rule import to_me
|
from nonebot.rule import to_me
|
||||||
from requests_html import HTMLSession, HTML
|
from requests_html import HTMLSession, HTML
|
||||||
|
|
||||||
zhibo8Command = on_command("/zhibo8", rule=to_me(), aliases={'预测', '英超', 'zhibo8'}, priority=10)
|
zhibo8Command = on_command("zhibo8", rule=to_me(), aliases={'预测', '英超', 'zhibo8'}, priority=10)
|
||||||
|
|
||||||
xiaobianStr = r"""
|
xiaobianStr = r"""
|
||||||
小编们预测比分:
|
小编们预测比分:
|
||||||
|
@ -11,12 +11,25 @@ from requests_html import HTMLSession, HTML
|
|||||||
import json
|
import json
|
||||||
from selenium.webdriver import Chrome, ChromeOptions
|
from selenium.webdriver import Chrome, ChromeOptions
|
||||||
|
|
||||||
|
tongjiCommand = on_command("统计", rule=to_me(), aliases={'微头条', 'tongji', 'tj'}, priority=10)
|
||||||
|
|
||||||
driver_path = '../../../../driver/cromedriver_win32'
|
# driver_path = '../../../../driver/cromedriver_win32'
|
||||||
max_behot_time = '0'
|
max_behot_time = '0'
|
||||||
# 初始化map对象,分别记录每个人的微头条个数
|
# 初始化map对象,分别记录每个人的微头条个数
|
||||||
weitoutiaoMap = {"太能喵": 0, "小小": 0, "大帝强": 0, "叶小欢": 0}
|
weitoutiaoMap = {"太能喵": 0, "小小": 0, "大帝强": 0, "叶小欢": 0}
|
||||||
|
|
||||||
|
"""
|
||||||
|
接收QQ消息,根据消息统计微头条结果
|
||||||
|
"""
|
||||||
|
@tongjiCommand.handle()
|
||||||
|
async def handle_first_receive(bot: Bot, event: Event, state: T_State, args= CommandArg()):
|
||||||
|
# 清空原有的weitoutiaoMap中的统计结果
|
||||||
|
for k,v in weitoutiaoMap.items():
|
||||||
|
weitoutiaoMap[k] = 0
|
||||||
|
getWeiToutiaoInfo()
|
||||||
|
# 将上周的微头条考核结果发送给用户
|
||||||
|
await tongjiCommand.finish(str(weitoutiaoMap))
|
||||||
|
|
||||||
"""
|
"""
|
||||||
获取微头条数据信息
|
获取微头条数据信息
|
||||||
"""
|
"""
|
||||||
@ -49,7 +62,7 @@ def getWeiToutiaoInfo():
|
|||||||
else:
|
else:
|
||||||
# 获取该微头条的发布人和发布日期
|
# 获取该微头条的发布人和发布日期
|
||||||
author = None
|
author = None
|
||||||
contentSubString = content[-8]
|
contentSubString = content[-8:]
|
||||||
if contentSubString.find("太能喵")>0:
|
if contentSubString.find("太能喵")>0:
|
||||||
weitoutiaoMap["太能喵"] = weitoutiaoMap["太能喵"]+1
|
weitoutiaoMap["太能喵"] = weitoutiaoMap["太能喵"]+1
|
||||||
elif contentSubString.find("小小")>0:
|
elif contentSubString.find("小小")>0:
|
||||||
@ -60,64 +73,64 @@ def getWeiToutiaoInfo():
|
|||||||
weitoutiaoMap["叶小欢"] = weitoutiaoMap["叶小欢"] + 1
|
weitoutiaoMap["叶小欢"] = weitoutiaoMap["叶小欢"] + 1
|
||||||
getWeiToutiaoInfo()
|
getWeiToutiaoInfo()
|
||||||
|
|
||||||
"""
|
# """
|
||||||
获取文章数据信息
|
# 获取文章数据信息
|
||||||
"""
|
# """
|
||||||
def getWenzhangInfo():
|
# def getWenzhangInfo():
|
||||||
global max_behot_time
|
# global max_behot_time
|
||||||
|
#
|
||||||
# 第一步输入这个:去除开头警告
|
# # 第一步输入这个:去除开头警告
|
||||||
warnings.simplefilter('ignore', ResourceWarning)
|
# warnings.simplefilter('ignore', ResourceWarning)
|
||||||
|
#
|
||||||
chrome_option = ChromeOptions()
|
# chrome_option = ChromeOptions()
|
||||||
chrome_option.headless = True
|
# chrome_option.headless = True
|
||||||
chrome_option.add_argument('--no-sandbox') # 解决DevToolsActivePort文件不存在的报错
|
# chrome_option.add_argument('--no-sandbox') # 解决DevToolsActivePort文件不存在的报错
|
||||||
chrome_option.add_argument('--disable-gpu') # 谷歌文档提到需要加上这个属性来规避bug
|
# chrome_option.add_argument('--disable-gpu') # 谷歌文档提到需要加上这个属性来规避bug
|
||||||
chrome_option.add_argument('--hide-scrollbars') # 隐藏滚动条, 应对一些特殊页面
|
# chrome_option.add_argument('--hide-scrollbars') # 隐藏滚动条, 应对一些特殊页面
|
||||||
chrome_option.add_experimental_option('excludeSwitches', ['enable-automation'])
|
# chrome_option.add_experimental_option('excludeSwitches', ['enable-automation'])
|
||||||
chrome_option.add_argument('--headless')
|
# chrome_option.add_argument('--headless')
|
||||||
|
#
|
||||||
browser = Chrome(executable_path=driver_path, options=chrome_option)
|
# browser = Chrome(executable_path=driver_path, options=chrome_option)
|
||||||
|
#
|
||||||
# 获取上一周的第一天和最后一天的时间戳
|
# # 获取上一周的第一天和最后一天的时间戳
|
||||||
lastWeekStartTime, lastWeekEndTime = getLastWeekFirstDayTimeStamp()
|
# lastWeekStartTime, lastWeekEndTime = getLastWeekFirstDayTimeStamp()
|
||||||
session = HTMLSession()
|
# session = HTMLSession()
|
||||||
wenzhangGet = session.get(f"https://www.toutiao.com/api/pc/list/user/feed?category=pc_profile_article&token=MS4wLjABAAAA7lHc4sBPuZaQ85qdIrwVvWm8Ps5O1kPMpuh5lTJAwII&max_behot_time={max_behot_time}&aid=24&app_name=toutiao_web", verify=False, proxies=None)
|
# wenzhangGet = session.get(f"https://www.toutiao.com/api/pc/list/user/feed?category=pc_profile_article&token=MS4wLjABAAAA7lHc4sBPuZaQ85qdIrwVvWm8Ps5O1kPMpuh5lTJAwII&max_behot_time={max_behot_time}&aid=24&app_name=toutiao_web", verify=False, proxies=None)
|
||||||
# print(weitoutiaoGet.text)
|
# # print(weitoutiaoGet.text)
|
||||||
# 使用Json解析返回的数据
|
# # 使用Json解析返回的数据
|
||||||
resultJson = json.loads(wenzhangGet.text)
|
# resultJson = json.loads(wenzhangGet.text)
|
||||||
# 先获取下个max_behot_time时间戳,如果需要翻页查询需要使用该参数
|
# # 先获取下个max_behot_time时间戳,如果需要翻页查询需要使用该参数
|
||||||
max_behot_time = resultJson['next']['max_behot_time']
|
# max_behot_time = resultJson['next']['max_behot_time']
|
||||||
# 开始循环解析data数据,获取微头条的内容和发布人员
|
# # 开始循环解析data数据,获取微头条的内容和发布人员
|
||||||
dataList = resultJson['data']
|
# dataList = resultJson['data']
|
||||||
# 获取每一条消息的发布时间戳,如果发布时间出现早于上周一0点的数据,结束统计,函数返回,否则递归调用当前函数
|
# # 获取每一条消息的发布时间戳,如果发布时间出现早于上周一0点的数据,结束统计,函数返回,否则递归调用当前函数
|
||||||
for data in dataList:
|
# for data in dataList:
|
||||||
# 获取本条新闻的发布时间
|
# # 获取本条新闻的发布时间
|
||||||
publishTime = data["publish_time"]
|
# publishTime = data["publish_time"]
|
||||||
if int(publishTime) > int(lastWeekEndTime):
|
# if int(publishTime) > int(lastWeekEndTime):
|
||||||
continue
|
# continue
|
||||||
elif int(publishTime) < int(lastWeekStartTime):
|
# elif int(publishTime) < int(lastWeekStartTime):
|
||||||
return
|
# return
|
||||||
else:
|
# else:
|
||||||
# 获取该文章的标题和对应的url,需要再通过url获取文章正文,解析作者是谁
|
# # 获取该文章的标题和对应的url,需要再通过url获取文章正文,解析作者是谁
|
||||||
title = data["title"]
|
# title = data["title"]
|
||||||
url = data["url"]
|
# url = data["url"]
|
||||||
if url:
|
# if url:
|
||||||
# 请求文章正文内容
|
# # 请求文章正文内容
|
||||||
browser.get(url)
|
# browser.get(url)
|
||||||
element = browser.find_element(by='XPATH', value="//article")
|
# element = browser.find_element(by='XPATH', value="//article")
|
||||||
print(element)
|
# print(element)
|
||||||
# author = None
|
# # author = None
|
||||||
# if data.find("太能喵")>0:
|
# # if data.find("太能喵")>0:
|
||||||
# weitoutiaoMap["太能喵"] = weitoutiaoMap["太能喵"]+1
|
# # weitoutiaoMap["太能喵"] = weitoutiaoMap["太能喵"]+1
|
||||||
# elif content.find("小小")>0:
|
# # elif content.find("小小")>0:
|
||||||
# weitoutiaoMap["小小"] = weitoutiaoMap["小小"]+1
|
# # weitoutiaoMap["小小"] = weitoutiaoMap["小小"]+1
|
||||||
# elif content.find("大帝强") > 0:
|
# # elif content.find("大帝强") > 0:
|
||||||
# weitoutiaoMap["大帝强"] = weitoutiaoMap["大帝强"] + 1
|
# # weitoutiaoMap["大帝强"] = weitoutiaoMap["大帝强"] + 1
|
||||||
# elif content.find("叶小欢") > 0:
|
# # elif content.find("叶小欢") > 0:
|
||||||
# weitoutiaoMap["叶小欢"] = weitoutiaoMap["叶小欢"] + 1
|
# # weitoutiaoMap["叶小欢"] = weitoutiaoMap["叶小欢"] + 1
|
||||||
# 如果没有被return掉,继续请求下一页内容
|
# # 如果没有被return掉,继续请求下一页内容
|
||||||
getWeiToutiaoInfo()
|
# getWeiToutiaoInfo()
|
||||||
|
|
||||||
"""
|
"""
|
||||||
获取当前日期上一周的第一天和最后一天的时间戳
|
获取当前日期上一周的第一天和最后一天的时间戳
|
||||||
@ -132,5 +145,5 @@ def getLastWeekFirstDayTimeStamp():
|
|||||||
return last_week_start.timestamp(), last_week_end.timestamp()
|
return last_week_start.timestamp(), last_week_end.timestamp()
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
getWenzhangInfo()
|
getWeiToutiaoInfo()
|
||||||
# print(weitoutiaoMap)
|
# print(weitoutiaoMap)
|
Loading…
x
Reference in New Issue
Block a user