fix: 使用selenium抓取文章内容

2023-09-27 10:01:34 +08:00 · 2023-09-27 10:01:34 +08:00 · 66822bdfce
commit 66822bdfce
parent 24b6d4ca44
1 changed files with 4 additions and 2 deletions
--- a/src/plugins/zhibo8/plugins/TouTiao.py
+++ b/src/plugins/zhibo8/plugins/TouTiao.py
@ -74,6 +74,7 @@ def getWenzhangInfo():
    chrome_option.add_argument('--no-sandbox')  # 解决DevToolsActivePort文件不存在的报错
    chrome_option.add_argument('--disable-gpu')  # 谷歌文档提到需要加上这个属性来规避bug
    chrome_option.add_argument('--hide-scrollbars')  # 隐藏滚动条, 应对一些特殊页面
+    chrome_option.add_experimental_option('excludeSwitches', ['enable-automation'])
    chrome_option.add_argument('--headless')

    browser = Chrome(executable_path=driver_path, options=chrome_option)
@ -103,8 +104,9 @@ def getWenzhangInfo():
            url = data["url"]
            if url:
                # 请求文章正文内容
-                wenzhangGet = browser.get(url)
-                print(wenzhangGet.text)
+                browser.get(url)
+                element = browser.find_element(by='XPATH', value="//article")
+                print(element)
            # author = None
            # if data.find("太能喵")>0:
            #     weitoutiaoMap["太能喵"] = weitoutiaoMap["太能喵"]+1