feat: 增加微头条统计

2023-09-28 17:12:30 +08:00 · 2023-09-28 17:12:30 +08:00 · 449a097b32
commit 449a097b32
parent 515503b342
3 changed files with 202 additions and 62 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -1,19 +1,49 @@
+adapter==0.1
 aiocache==0.12.0
 aiocqhttp==1.4.3
 aiofiles==23.1.0
 anyio==3.6.2
 appdirs==1.4.4
+argcomplete==2.0.0
+argon2-cffi==21.3.0
+argon2-cffi-bindings==21.2.0
+arrow==1.2.3
+asttokens==2.2.1
+async-generator==1.10
+attrs==22.2.0
+backcall==0.2.0
 beautifulsoup4==4.11.2
+bleach==6.0.0
 blinker==1.5
+brotlipy==0.7.0
 bs4==0.0.1
+CacheControl==0.12.11
+cashews==4.7.1
 certifi==2022.12.7
+cffi==1.15.1
 charset-normalizer==3.0.1
+cleo==2.0.1
 click==8.1.3
 colorama==0.4.6
+comm==0.1.2
+crashtest==0.4.1
+cryptography==40.0.2
 cssselect==1.2.0
+debugpy==1.6.6
+decorator==5.1.1
+defusedxml==0.7.1
+distlib==0.3.6
 docopt==0.6.2
+dulwich==0.20.50
+exceptiongroup==1.1.1
+executing==1.2.0
 fake-useragent==1.1.1
 fastapi==0.91.0
+fastjsonschema==2.16.2
+filelock==3.9.0
+fqdn==1.5.1
+gitdb==4.0.9
+GitPython==3.1.27
 h11==0.14.0
 h2==4.1.0
 hpack==4.0.0
@ -25,49 +55,146 @@ hypercorn==0.14.3
 hyperframe==6.0.1
 idna==3.4
 importlib-metadata==6.0.0
+ipykernel==6.20.2
+ipython==8.9.0
+ipython-genutils==0.2.0
+ipywidgets==8.0.4
 irc3==1.1.8
+isoduration==20.11.0
 itsdangerous==2.1.2
+jaraco.classes==3.2.3
+jedi==0.18.2
 Jinja2==3.1.2
+jsonpointer==2.3
+jsonschema==4.17.3
+jupyter==1.0.0
+jupyter-console==6.4.4
+jupyter-events==0.6.3
+jupyter_client==8.0.1
+jupyter_core==5.1.5
+jupyter_server==2.1.0
+jupyter_server_terminals==0.4.4
+jupyterlab-pygments==0.2.2
+jupyterlab-widgets==3.0.5
+keyring==23.13.1
 lfmh==1.1.1
+lockfile==0.12.2
 loguru==0.6.0
 lxml==4.9.2
 MarkupSafe==2.1.2
+matplotlib-inline==0.1.6
+mistune==2.0.4
+more-itertools==9.0.0
 msgpack==1.0.4
 multidict==6.0.4
 musicbrainzngs==0.7.1
+mysql-connector-python==8.0.28
+nbclassic==0.5.1
+nbclient==0.7.2
+nbconvert==7.2.9
+nbformat==5.7.3
+nest-asyncio==1.5.6
+nonebot==1.9.1
+nonebot-adapter-feishu==2.0.0b6
 nonebot-adapter-onebot==2.2.1
 nonebot2==2.0.0rc3
+notebook==6.5.2
+notebook_shim==0.2.2
+numpy==1.24.3
 onebot==1.3.3
+outcome==1.2.0
+packaging==23.0
+pandas==2.0.1
+pandocfilters==1.5.0
 parse==1.19.0
+parsimonious==0.10.0
+parso==0.8.3
+pexpect==4.8.0
+pickleshare==0.7.5
+pipx==1.1.0
+pkginfo==1.9.6
+platformdirs==2.6.2
+poetry==1.3.2
+poetry-core==1.4.0
+poetry-plugin-export==1.3.0
 priority==2.0.0
+prometheus-client==0.16.0
+prompt-toolkit==3.0.36
+psutil==5.9.4
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pycparser==2.21
+pycryptodome==3.17
 pydantic==1.10.4
 pyee==8.2.2
+Pygments==2.14.0
 pygtrie==2.5.0
+pyOpenSSL==23.1.1
 pyppeteer==1.0.2
 pyquery==2.0.0
+pyrsistent==0.19.3
+PySocks==1.7.1
+python-dateutil==2.8.2
 python-dotenv==0.21.1
+python-json-logger==2.0.4
+pytz==2023.3
+pywin32==305
+pywin32-ctypes==0.2.0
+pywinpty==2.0.10
 PyYAML==6.0
+pyzmq==25.0.0
+qtconsole==5.4.0
+QtPy==2.3.0
 quart==0.18.3
+rapidfuzz==2.13.7
+regex==2022.10.31
 requests==2.28.2
 requests-html==0.10.0
+requests-toolbelt==0.10.1
+rfc3339-validator==0.1.4
 rfc3986==1.5.0
+rfc3986-validator==0.1.1
+selenium==4.9.1
+Send2Trash==1.8.0
+shellingham==1.5.0.post1
 six==1.16.0
+smmap==5.0.0
 sniffio==1.3.0
+sortedcontainers==2.4.0
 soupsieve==2.3.2.post1
+stack-data==0.6.2
 starlette==0.24.0
+terminado==0.17.1
+tinycss2==1.2.1
 toml==0.10.2
+tomli==2.0.1
 tomlkit==0.11.6
+tornado==6.2
 tqdm==4.64.1
+traitlets==5.8.1
+trio==0.22.0
+trio-websocket==0.10.2
+trove-classifiers==2023.1.20
 typing_extensions==4.4.0
+tzdata==2023.3
+uri-template==1.2.0
 urllib3==1.26.14
+urllib3-secure-extra==0.1.0
+userpath==1.8.0
 uvicorn==0.20.0
 venusian==3.0.0
+virtualenv==20.16.5
 w3lib==2.1.1
 watchfiles==0.18.1
+wcwidth==0.2.6
+webcolors==1.12
 webencodings==0.5.1
+websocket-client==1.5.0
 websockets==10.4
 Werkzeug==2.2.2
+widgetsnbextension==4.0.5
 win32-setctime==1.1.0
+wincertstore==0.2
 wsproto==1.2.0
 yarl==1.8.2
 zipp==3.13.0
--- a/src/plugins/zhibo8/plugins/Foresee.py
+++ b/src/plugins/zhibo8/plugins/Foresee.py
@ -7,7 +7,7 @@ from nonebot import on_command
 from nonebot.rule import to_me
 from requests_html import HTMLSession, HTML

-zhibo8Command = on_command("/zhibo8", rule=to_me(), aliases={'预测', '英超', 'zhibo8'}, priority=10)
+zhibo8Command = on_command("zhibo8", rule=to_me(), aliases={'预测', '英超', 'zhibo8'}, priority=10)

 xiaobianStr = r"""
 小编们预测比分：
--- a/src/plugins/zhibo8/plugins/TouTiao.py
+++ b/src/plugins/zhibo8/plugins/TouTiao.py
@ -11,12 +11,25 @@ from requests_html import HTMLSession, HTML
 import json
 from selenium.webdriver import Chrome, ChromeOptions

+tongjiCommand = on_command("统计", rule=to_me(), aliases={'微头条', 'tongji', 'tj'}, priority=10)

-driver_path = '../../../../driver/cromedriver_win32'
+# driver_path = '../../../../driver/cromedriver_win32'
 max_behot_time = '0'
 # 初始化map对象，分别记录每个人的微头条个数
 weitoutiaoMap = {"太能喵": 0, "小小": 0, "大帝强": 0, "叶小欢": 0}

+"""
+接收QQ消息，根据消息统计微头条结果
+"""
+@tongjiCommand.handle()
+async def handle_first_receive(bot: Bot, event: Event, state: T_State, args= CommandArg()):
+    # 清空原有的weitoutiaoMap中的统计结果
+    for k,v in weitoutiaoMap.items():
+        weitoutiaoMap[k] = 0
+    getWeiToutiaoInfo()
+    # 将上周的微头条考核结果发送给用户
+    await tongjiCommand.finish(str(weitoutiaoMap))
+
 """
 获取微头条数据信息
 """
@ -49,7 +62,7 @@ def getWeiToutiaoInfo():
            else:
                # 获取该微头条的发布人和发布日期
                author = None
-                contentSubString = content[-8]
+                contentSubString = content[-8:]
                if contentSubString.find("太能喵")>0:
                    weitoutiaoMap["太能喵"] = weitoutiaoMap["太能喵"]+1
                elif contentSubString.find("小小")>0:
@ -60,64 +73,64 @@ def getWeiToutiaoInfo():
                    weitoutiaoMap["叶小欢"] = weitoutiaoMap["叶小欢"] + 1
    getWeiToutiaoInfo()

-"""
-获取文章数据信息
-"""
-def getWenzhangInfo():
-    global max_behot_time
-
-    # 第一步输入这个：去除开头警告
-    warnings.simplefilter('ignore', ResourceWarning)
-
-    chrome_option = ChromeOptions()
-    chrome_option.headless = True
-    chrome_option.add_argument('--no-sandbox')  # 解决DevToolsActivePort文件不存在的报错
-    chrome_option.add_argument('--disable-gpu')  # 谷歌文档提到需要加上这个属性来规避bug
-    chrome_option.add_argument('--hide-scrollbars')  # 隐藏滚动条, 应对一些特殊页面
-    chrome_option.add_experimental_option('excludeSwitches', ['enable-automation'])
-    chrome_option.add_argument('--headless')
-
-    browser = Chrome(executable_path=driver_path, options=chrome_option)
-
-    # 获取上一周的第一天和最后一天的时间戳
-    lastWeekStartTime, lastWeekEndTime = getLastWeekFirstDayTimeStamp()
-    session = HTMLSession()
-    wenzhangGet = session.get(f"https://www.toutiao.com/api/pc/list/user/feed?category=pc_profile_article&token=MS4wLjABAAAA7lHc4sBPuZaQ85qdIrwVvWm8Ps5O1kPMpuh5lTJAwII&max_behot_time={max_behot_time}&aid=24&app_name=toutiao_web", verify=False, proxies=None)
-    # print(weitoutiaoGet.text)
-    # 使用Json解析返回的数据
-    resultJson = json.loads(wenzhangGet.text)
-    # 先获取下个max_behot_time时间戳，如果需要翻页查询需要使用该参数
-    max_behot_time = resultJson['next']['max_behot_time']
-    # 开始循环解析data数据，获取微头条的内容和发布人员
-    dataList = resultJson['data']
-    # 获取每一条消息的发布时间戳，如果发布时间出现早于上周一0点的数据，结束统计，函数返回，否则递归调用当前函数
-    for data in dataList:
-        # 获取本条新闻的发布时间
-        publishTime = data["publish_time"]
-        if int(publishTime) > int(lastWeekEndTime):
-            continue
-        elif int(publishTime) < int(lastWeekStartTime):
-            return
-        else:
-            # 获取该文章的标题和对应的url，需要再通过url获取文章正文，解析作者是谁
-            title = data["title"]
-            url = data["url"]
-            if url:
-                # 请求文章正文内容
-                browser.get(url)
-                element = browser.find_element(by='XPATH', value="//article")
-                print(element)
-            # author = None
-            # if data.find("太能喵")>0:
-            #     weitoutiaoMap["太能喵"] = weitoutiaoMap["太能喵"]+1
-            # elif content.find("小小")>0:
-            #     weitoutiaoMap["小小"] = weitoutiaoMap["小小"]+1
-            # elif content.find("大帝强") > 0:
-            #     weitoutiaoMap["大帝强"] = weitoutiaoMap["大帝强"] + 1
-            # elif content.find("叶小欢") > 0:
-            #     weitoutiaoMap["叶小欢"] = weitoutiaoMap["叶小欢"] + 1
-    # 如果没有被return掉，继续请求下一页内容
-    getWeiToutiaoInfo()
+# """
+# 获取文章数据信息
+# """
+# def getWenzhangInfo():
+#     global max_behot_time
+#
+#     # 第一步输入这个：去除开头警告
+#     warnings.simplefilter('ignore', ResourceWarning)
+#
+#     chrome_option = ChromeOptions()
+#     chrome_option.headless = True
+#     chrome_option.add_argument('--no-sandbox')  # 解决DevToolsActivePort文件不存在的报错
+#     chrome_option.add_argument('--disable-gpu')  # 谷歌文档提到需要加上这个属性来规避bug
+#     chrome_option.add_argument('--hide-scrollbars')  # 隐藏滚动条, 应对一些特殊页面
+#     chrome_option.add_experimental_option('excludeSwitches', ['enable-automation'])
+#     chrome_option.add_argument('--headless')
+#
+#     browser = Chrome(executable_path=driver_path, options=chrome_option)
+#
+#     # 获取上一周的第一天和最后一天的时间戳
+#     lastWeekStartTime, lastWeekEndTime = getLastWeekFirstDayTimeStamp()
+#     session = HTMLSession()
+#     wenzhangGet = session.get(f"https://www.toutiao.com/api/pc/list/user/feed?category=pc_profile_article&token=MS4wLjABAAAA7lHc4sBPuZaQ85qdIrwVvWm8Ps5O1kPMpuh5lTJAwII&max_behot_time={max_behot_time}&aid=24&app_name=toutiao_web", verify=False, proxies=None)
+#     # print(weitoutiaoGet.text)
+#     # 使用Json解析返回的数据
+#     resultJson = json.loads(wenzhangGet.text)
+#     # 先获取下个max_behot_time时间戳，如果需要翻页查询需要使用该参数
+#     max_behot_time = resultJson['next']['max_behot_time']
+#     # 开始循环解析data数据，获取微头条的内容和发布人员
+#     dataList = resultJson['data']
+#     # 获取每一条消息的发布时间戳，如果发布时间出现早于上周一0点的数据，结束统计，函数返回，否则递归调用当前函数
+#     for data in dataList:
+#         # 获取本条新闻的发布时间
+#         publishTime = data["publish_time"]
+#         if int(publishTime) > int(lastWeekEndTime):
+#             continue
+#         elif int(publishTime) < int(lastWeekStartTime):
+#             return
+#         else:
+#             # 获取该文章的标题和对应的url，需要再通过url获取文章正文，解析作者是谁
+#             title = data["title"]
+#             url = data["url"]
+#             if url:
+#                 # 请求文章正文内容
+#                 browser.get(url)
+#                 element = browser.find_element(by='XPATH', value="//article")
+#                 print(element)
+#             # author = None
+#             # if data.find("太能喵")>0:
+#             #     weitoutiaoMap["太能喵"] = weitoutiaoMap["太能喵"]+1
+#             # elif content.find("小小")>0:
+#             #     weitoutiaoMap["小小"] = weitoutiaoMap["小小"]+1
+#             # elif content.find("大帝强") > 0:
+#             #     weitoutiaoMap["大帝强"] = weitoutiaoMap["大帝强"] + 1
+#             # elif content.find("叶小欢") > 0:
+#             #     weitoutiaoMap["叶小欢"] = weitoutiaoMap["叶小欢"] + 1
+#     # 如果没有被return掉，继续请求下一页内容
+#     getWeiToutiaoInfo()

 """
 获取当前日期上一周的第一天和最后一天的时间戳
@ -132,5 +145,5 @@ def getLastWeekFirstDayTimeStamp():
    return last_week_start.timestamp(), last_week_end.timestamp()

 if __name__ == '__main__':
-    getWenzhangInfo()
+    getWeiToutiaoInfo()
    # print(weitoutiaoMap)