fix: 使用selenium抓取文章内容
This commit is contained in:
parent
24b6d4ca44
commit
66822bdfce
@ -74,6 +74,7 @@ def getWenzhangInfo():
|
|||||||
chrome_option.add_argument('--no-sandbox') # 解决DevToolsActivePort文件不存在的报错
|
chrome_option.add_argument('--no-sandbox') # 解决DevToolsActivePort文件不存在的报错
|
||||||
chrome_option.add_argument('--disable-gpu') # 谷歌文档提到需要加上这个属性来规避bug
|
chrome_option.add_argument('--disable-gpu') # 谷歌文档提到需要加上这个属性来规避bug
|
||||||
chrome_option.add_argument('--hide-scrollbars') # 隐藏滚动条, 应对一些特殊页面
|
chrome_option.add_argument('--hide-scrollbars') # 隐藏滚动条, 应对一些特殊页面
|
||||||
|
chrome_option.add_experimental_option('excludeSwitches', ['enable-automation'])
|
||||||
chrome_option.add_argument('--headless')
|
chrome_option.add_argument('--headless')
|
||||||
|
|
||||||
browser = Chrome(executable_path=driver_path, options=chrome_option)
|
browser = Chrome(executable_path=driver_path, options=chrome_option)
|
||||||
@ -103,8 +104,9 @@ def getWenzhangInfo():
|
|||||||
url = data["url"]
|
url = data["url"]
|
||||||
if url:
|
if url:
|
||||||
# 请求文章正文内容
|
# 请求文章正文内容
|
||||||
wenzhangGet = browser.get(url)
|
browser.get(url)
|
||||||
print(wenzhangGet.text)
|
element = browser.find_element(by='XPATH', value="//article")
|
||||||
|
print(element)
|
||||||
# author = None
|
# author = None
|
||||||
# if data.find("太能喵")>0:
|
# if data.find("太能喵")>0:
|
||||||
# weitoutiaoMap["太能喵"] = weitoutiaoMap["太能喵"]+1
|
# weitoutiaoMap["太能喵"] = weitoutiaoMap["太能喵"]+1
|
||||||
|
Loading…
x
Reference in New Issue
Block a user