fix: 使用selenium抓取文章内容
This commit is contained in:
parent
24b6d4ca44
commit
66822bdfce
@ -74,6 +74,7 @@ def getWenzhangInfo():
|
||||
chrome_option.add_argument('--no-sandbox') # 解决DevToolsActivePort文件不存在的报错
|
||||
chrome_option.add_argument('--disable-gpu') # 谷歌文档提到需要加上这个属性来规避bug
|
||||
chrome_option.add_argument('--hide-scrollbars') # 隐藏滚动条, 应对一些特殊页面
|
||||
chrome_option.add_experimental_option('excludeSwitches', ['enable-automation'])
|
||||
chrome_option.add_argument('--headless')
|
||||
|
||||
browser = Chrome(executable_path=driver_path, options=chrome_option)
|
||||
@ -103,8 +104,9 @@ def getWenzhangInfo():
|
||||
url = data["url"]
|
||||
if url:
|
||||
# 请求文章正文内容
|
||||
wenzhangGet = browser.get(url)
|
||||
print(wenzhangGet.text)
|
||||
browser.get(url)
|
||||
element = browser.find_element(by='XPATH', value="//article")
|
||||
print(element)
|
||||
# author = None
|
||||
# if data.find("太能喵")>0:
|
||||
# weitoutiaoMap["太能喵"] = weitoutiaoMap["太能喵"]+1
|
||||
|
Loading…
x
Reference in New Issue
Block a user