fix: 使用selenium抓取文章内容

This commit is contained in:
xiaoyan 2023-09-27 10:01:34 +08:00
parent 24b6d4ca44
commit 66822bdfce

View File

@ -74,6 +74,7 @@ def getWenzhangInfo():
chrome_option.add_argument('--no-sandbox') # 解决DevToolsActivePort文件不存在的报错
chrome_option.add_argument('--disable-gpu') # 谷歌文档提到需要加上这个属性来规避bug
chrome_option.add_argument('--hide-scrollbars') # 隐藏滚动条, 应对一些特殊页面
chrome_option.add_experimental_option('excludeSwitches', ['enable-automation'])
chrome_option.add_argument('--headless')
browser = Chrome(executable_path=driver_path, options=chrome_option)
@ -103,8 +104,9 @@ def getWenzhangInfo():
url = data["url"]
if url:
# 请求文章正文内容
wenzhangGet = browser.get(url)
print(wenzhangGet.text)
browser.get(url)
element = browser.find_element(by='XPATH', value="//article")
print(element)
# author = None
# if data.find("太能喵")>0:
# weitoutiaoMap["太能喵"] = weitoutiaoMap["太能喵"]+1