From 449a097b32528c944bcb94dd287cb69da4744766 Mon Sep 17 00:00:00 2001 From: xiaoyan Date: Thu, 28 Sep 2023 17:12:30 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=A2=9E=E5=8A=A0=E5=BE=AE=E5=A4=B4?= =?UTF-8?q?=E6=9D=A1=E7=BB=9F=E8=AE=A1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- requirements.txt | 127 ++++++++++++++++++++++++ src/plugins/zhibo8/plugins/Foresee.py | 2 +- src/plugins/zhibo8/plugins/TouTiao.py | 135 ++++++++++++++------------ 3 files changed, 202 insertions(+), 62 deletions(-) diff --git a/requirements.txt b/requirements.txt index 4df8d79..98ceb6b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,19 +1,49 @@ +adapter==0.1 aiocache==0.12.0 aiocqhttp==1.4.3 aiofiles==23.1.0 anyio==3.6.2 appdirs==1.4.4 +argcomplete==2.0.0 +argon2-cffi==21.3.0 +argon2-cffi-bindings==21.2.0 +arrow==1.2.3 +asttokens==2.2.1 +async-generator==1.10 +attrs==22.2.0 +backcall==0.2.0 beautifulsoup4==4.11.2 +bleach==6.0.0 blinker==1.5 +brotlipy==0.7.0 bs4==0.0.1 +CacheControl==0.12.11 +cashews==4.7.1 certifi==2022.12.7 +cffi==1.15.1 charset-normalizer==3.0.1 +cleo==2.0.1 click==8.1.3 colorama==0.4.6 +comm==0.1.2 +crashtest==0.4.1 +cryptography==40.0.2 cssselect==1.2.0 +debugpy==1.6.6 +decorator==5.1.1 +defusedxml==0.7.1 +distlib==0.3.6 docopt==0.6.2 +dulwich==0.20.50 +exceptiongroup==1.1.1 +executing==1.2.0 fake-useragent==1.1.1 fastapi==0.91.0 +fastjsonschema==2.16.2 +filelock==3.9.0 +fqdn==1.5.1 +gitdb==4.0.9 +GitPython==3.1.27 h11==0.14.0 h2==4.1.0 hpack==4.0.0 @@ -25,49 +55,146 @@ hypercorn==0.14.3 hyperframe==6.0.1 idna==3.4 importlib-metadata==6.0.0 +ipykernel==6.20.2 +ipython==8.9.0 +ipython-genutils==0.2.0 +ipywidgets==8.0.4 irc3==1.1.8 +isoduration==20.11.0 itsdangerous==2.1.2 +jaraco.classes==3.2.3 +jedi==0.18.2 Jinja2==3.1.2 +jsonpointer==2.3 +jsonschema==4.17.3 +jupyter==1.0.0 +jupyter-console==6.4.4 +jupyter-events==0.6.3 +jupyter_client==8.0.1 +jupyter_core==5.1.5 +jupyter_server==2.1.0 +jupyter_server_terminals==0.4.4 +jupyterlab-pygments==0.2.2 +jupyterlab-widgets==3.0.5 +keyring==23.13.1 lfmh==1.1.1 +lockfile==0.12.2 loguru==0.6.0 lxml==4.9.2 MarkupSafe==2.1.2 +matplotlib-inline==0.1.6 +mistune==2.0.4 +more-itertools==9.0.0 msgpack==1.0.4 multidict==6.0.4 musicbrainzngs==0.7.1 +mysql-connector-python==8.0.28 +nbclassic==0.5.1 +nbclient==0.7.2 +nbconvert==7.2.9 +nbformat==5.7.3 +nest-asyncio==1.5.6 +nonebot==1.9.1 +nonebot-adapter-feishu==2.0.0b6 nonebot-adapter-onebot==2.2.1 nonebot2==2.0.0rc3 +notebook==6.5.2 +notebook_shim==0.2.2 +numpy==1.24.3 onebot==1.3.3 +outcome==1.2.0 +packaging==23.0 +pandas==2.0.1 +pandocfilters==1.5.0 parse==1.19.0 +parsimonious==0.10.0 +parso==0.8.3 +pexpect==4.8.0 +pickleshare==0.7.5 +pipx==1.1.0 +pkginfo==1.9.6 +platformdirs==2.6.2 +poetry==1.3.2 +poetry-core==1.4.0 +poetry-plugin-export==1.3.0 priority==2.0.0 +prometheus-client==0.16.0 +prompt-toolkit==3.0.36 +psutil==5.9.4 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pycparser==2.21 +pycryptodome==3.17 pydantic==1.10.4 pyee==8.2.2 +Pygments==2.14.0 pygtrie==2.5.0 +pyOpenSSL==23.1.1 pyppeteer==1.0.2 pyquery==2.0.0 +pyrsistent==0.19.3 +PySocks==1.7.1 +python-dateutil==2.8.2 python-dotenv==0.21.1 +python-json-logger==2.0.4 +pytz==2023.3 +pywin32==305 +pywin32-ctypes==0.2.0 +pywinpty==2.0.10 PyYAML==6.0 +pyzmq==25.0.0 +qtconsole==5.4.0 +QtPy==2.3.0 quart==0.18.3 +rapidfuzz==2.13.7 +regex==2022.10.31 requests==2.28.2 requests-html==0.10.0 +requests-toolbelt==0.10.1 +rfc3339-validator==0.1.4 rfc3986==1.5.0 +rfc3986-validator==0.1.1 +selenium==4.9.1 +Send2Trash==1.8.0 +shellingham==1.5.0.post1 six==1.16.0 +smmap==5.0.0 sniffio==1.3.0 +sortedcontainers==2.4.0 soupsieve==2.3.2.post1 +stack-data==0.6.2 starlette==0.24.0 +terminado==0.17.1 +tinycss2==1.2.1 toml==0.10.2 +tomli==2.0.1 tomlkit==0.11.6 +tornado==6.2 tqdm==4.64.1 +traitlets==5.8.1 +trio==0.22.0 +trio-websocket==0.10.2 +trove-classifiers==2023.1.20 typing_extensions==4.4.0 +tzdata==2023.3 +uri-template==1.2.0 urllib3==1.26.14 +urllib3-secure-extra==0.1.0 +userpath==1.8.0 uvicorn==0.20.0 venusian==3.0.0 +virtualenv==20.16.5 w3lib==2.1.1 watchfiles==0.18.1 +wcwidth==0.2.6 +webcolors==1.12 webencodings==0.5.1 +websocket-client==1.5.0 websockets==10.4 Werkzeug==2.2.2 +widgetsnbextension==4.0.5 win32-setctime==1.1.0 +wincertstore==0.2 wsproto==1.2.0 yarl==1.8.2 zipp==3.13.0 diff --git a/src/plugins/zhibo8/plugins/Foresee.py b/src/plugins/zhibo8/plugins/Foresee.py index b2ebec2..a9403e6 100644 --- a/src/plugins/zhibo8/plugins/Foresee.py +++ b/src/plugins/zhibo8/plugins/Foresee.py @@ -7,7 +7,7 @@ from nonebot import on_command from nonebot.rule import to_me from requests_html import HTMLSession, HTML -zhibo8Command = on_command("/zhibo8", rule=to_me(), aliases={'预测', '英超', 'zhibo8'}, priority=10) +zhibo8Command = on_command("zhibo8", rule=to_me(), aliases={'预测', '英超', 'zhibo8'}, priority=10) xiaobianStr = r""" 小编们预测比分: diff --git a/src/plugins/zhibo8/plugins/TouTiao.py b/src/plugins/zhibo8/plugins/TouTiao.py index 256c6bd..24245e5 100644 --- a/src/plugins/zhibo8/plugins/TouTiao.py +++ b/src/plugins/zhibo8/plugins/TouTiao.py @@ -11,12 +11,25 @@ from requests_html import HTMLSession, HTML import json from selenium.webdriver import Chrome, ChromeOptions +tongjiCommand = on_command("统计", rule=to_me(), aliases={'微头条', 'tongji', 'tj'}, priority=10) -driver_path = '../../../../driver/cromedriver_win32' +# driver_path = '../../../../driver/cromedriver_win32' max_behot_time = '0' # 初始化map对象,分别记录每个人的微头条个数 weitoutiaoMap = {"太能喵": 0, "小小": 0, "大帝强": 0, "叶小欢": 0} +""" +接收QQ消息,根据消息统计微头条结果 +""" +@tongjiCommand.handle() +async def handle_first_receive(bot: Bot, event: Event, state: T_State, args= CommandArg()): + # 清空原有的weitoutiaoMap中的统计结果 + for k,v in weitoutiaoMap.items(): + weitoutiaoMap[k] = 0 + getWeiToutiaoInfo() + # 将上周的微头条考核结果发送给用户 + await tongjiCommand.finish(str(weitoutiaoMap)) + """ 获取微头条数据信息 """ @@ -49,7 +62,7 @@ def getWeiToutiaoInfo(): else: # 获取该微头条的发布人和发布日期 author = None - contentSubString = content[-8] + contentSubString = content[-8:] if contentSubString.find("太能喵")>0: weitoutiaoMap["太能喵"] = weitoutiaoMap["太能喵"]+1 elif contentSubString.find("小小")>0: @@ -60,64 +73,64 @@ def getWeiToutiaoInfo(): weitoutiaoMap["叶小欢"] = weitoutiaoMap["叶小欢"] + 1 getWeiToutiaoInfo() -""" -获取文章数据信息 -""" -def getWenzhangInfo(): - global max_behot_time - - # 第一步输入这个:去除开头警告 - warnings.simplefilter('ignore', ResourceWarning) - - chrome_option = ChromeOptions() - chrome_option.headless = True - chrome_option.add_argument('--no-sandbox') # 解决DevToolsActivePort文件不存在的报错 - chrome_option.add_argument('--disable-gpu') # 谷歌文档提到需要加上这个属性来规避bug - chrome_option.add_argument('--hide-scrollbars') # 隐藏滚动条, 应对一些特殊页面 - chrome_option.add_experimental_option('excludeSwitches', ['enable-automation']) - chrome_option.add_argument('--headless') - - browser = Chrome(executable_path=driver_path, options=chrome_option) - - # 获取上一周的第一天和最后一天的时间戳 - lastWeekStartTime, lastWeekEndTime = getLastWeekFirstDayTimeStamp() - session = HTMLSession() - wenzhangGet = session.get(f"https://www.toutiao.com/api/pc/list/user/feed?category=pc_profile_article&token=MS4wLjABAAAA7lHc4sBPuZaQ85qdIrwVvWm8Ps5O1kPMpuh5lTJAwII&max_behot_time={max_behot_time}&aid=24&app_name=toutiao_web", verify=False, proxies=None) - # print(weitoutiaoGet.text) - # 使用Json解析返回的数据 - resultJson = json.loads(wenzhangGet.text) - # 先获取下个max_behot_time时间戳,如果需要翻页查询需要使用该参数 - max_behot_time = resultJson['next']['max_behot_time'] - # 开始循环解析data数据,获取微头条的内容和发布人员 - dataList = resultJson['data'] - # 获取每一条消息的发布时间戳,如果发布时间出现早于上周一0点的数据,结束统计,函数返回,否则递归调用当前函数 - for data in dataList: - # 获取本条新闻的发布时间 - publishTime = data["publish_time"] - if int(publishTime) > int(lastWeekEndTime): - continue - elif int(publishTime) < int(lastWeekStartTime): - return - else: - # 获取该文章的标题和对应的url,需要再通过url获取文章正文,解析作者是谁 - title = data["title"] - url = data["url"] - if url: - # 请求文章正文内容 - browser.get(url) - element = browser.find_element(by='XPATH', value="//article") - print(element) - # author = None - # if data.find("太能喵")>0: - # weitoutiaoMap["太能喵"] = weitoutiaoMap["太能喵"]+1 - # elif content.find("小小")>0: - # weitoutiaoMap["小小"] = weitoutiaoMap["小小"]+1 - # elif content.find("大帝强") > 0: - # weitoutiaoMap["大帝强"] = weitoutiaoMap["大帝强"] + 1 - # elif content.find("叶小欢") > 0: - # weitoutiaoMap["叶小欢"] = weitoutiaoMap["叶小欢"] + 1 - # 如果没有被return掉,继续请求下一页内容 - getWeiToutiaoInfo() +# """ +# 获取文章数据信息 +# """ +# def getWenzhangInfo(): +# global max_behot_time +# +# # 第一步输入这个:去除开头警告 +# warnings.simplefilter('ignore', ResourceWarning) +# +# chrome_option = ChromeOptions() +# chrome_option.headless = True +# chrome_option.add_argument('--no-sandbox') # 解决DevToolsActivePort文件不存在的报错 +# chrome_option.add_argument('--disable-gpu') # 谷歌文档提到需要加上这个属性来规避bug +# chrome_option.add_argument('--hide-scrollbars') # 隐藏滚动条, 应对一些特殊页面 +# chrome_option.add_experimental_option('excludeSwitches', ['enable-automation']) +# chrome_option.add_argument('--headless') +# +# browser = Chrome(executable_path=driver_path, options=chrome_option) +# +# # 获取上一周的第一天和最后一天的时间戳 +# lastWeekStartTime, lastWeekEndTime = getLastWeekFirstDayTimeStamp() +# session = HTMLSession() +# wenzhangGet = session.get(f"https://www.toutiao.com/api/pc/list/user/feed?category=pc_profile_article&token=MS4wLjABAAAA7lHc4sBPuZaQ85qdIrwVvWm8Ps5O1kPMpuh5lTJAwII&max_behot_time={max_behot_time}&aid=24&app_name=toutiao_web", verify=False, proxies=None) +# # print(weitoutiaoGet.text) +# # 使用Json解析返回的数据 +# resultJson = json.loads(wenzhangGet.text) +# # 先获取下个max_behot_time时间戳,如果需要翻页查询需要使用该参数 +# max_behot_time = resultJson['next']['max_behot_time'] +# # 开始循环解析data数据,获取微头条的内容和发布人员 +# dataList = resultJson['data'] +# # 获取每一条消息的发布时间戳,如果发布时间出现早于上周一0点的数据,结束统计,函数返回,否则递归调用当前函数 +# for data in dataList: +# # 获取本条新闻的发布时间 +# publishTime = data["publish_time"] +# if int(publishTime) > int(lastWeekEndTime): +# continue +# elif int(publishTime) < int(lastWeekStartTime): +# return +# else: +# # 获取该文章的标题和对应的url,需要再通过url获取文章正文,解析作者是谁 +# title = data["title"] +# url = data["url"] +# if url: +# # 请求文章正文内容 +# browser.get(url) +# element = browser.find_element(by='XPATH', value="//article") +# print(element) +# # author = None +# # if data.find("太能喵")>0: +# # weitoutiaoMap["太能喵"] = weitoutiaoMap["太能喵"]+1 +# # elif content.find("小小")>0: +# # weitoutiaoMap["小小"] = weitoutiaoMap["小小"]+1 +# # elif content.find("大帝强") > 0: +# # weitoutiaoMap["大帝强"] = weitoutiaoMap["大帝强"] + 1 +# # elif content.find("叶小欢") > 0: +# # weitoutiaoMap["叶小欢"] = weitoutiaoMap["叶小欢"] + 1 +# # 如果没有被return掉,继续请求下一页内容 +# getWeiToutiaoInfo() """ 获取当前日期上一周的第一天和最后一天的时间戳 @@ -132,5 +145,5 @@ def getLastWeekFirstDayTimeStamp(): return last_week_start.timestamp(), last_week_end.timestamp() if __name__ == '__main__': - getWenzhangInfo() + getWeiToutiaoInfo() # print(weitoutiaoMap) \ No newline at end of file