关闭浏览器受控制的警告条
1.尝试使用开启开发者模式
opt = webdriver.ChromeOptions()
# 把chrome设置成无界面模式,不论windows还是linux都可以,自动适配对应参数
opt.set_headless()
# 创建chrome无界面对象
opt.add_argument("--start-maximized") # 界面设置最大化
# opt.add_argument('no-sandbox')
opt.add_argument('--headless')
opt.add_argument('--disable-gpu')
opt.add_experimental_option('excludeSwitches', ['enable-automation']) #开启开发者模式
driver = webdriver.Chrome(options=opt)
运行之后还是会报404,这种方法放弃
2.尝试使用firefix浏览器
opt=webdriver.FirefoxOptions()
opt.set_headless()
opt.add_argument("--start-maximized")
opt.add_argument('--headless')
opt.add_argument('--disable-gpu')
driver = webdriver.Firefox(options=opt)
成功解决问题,在无界面模式下可以爬取,
隐藏Selenium的指纹特征
关闭 window.navigator.webdriver = true
属性
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options, executable_path='./chromedriver')
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
driver.get('http://exercise.kingname.info')
但是selenium启动的chrome不仅只有这一特性,所以光移除window.navigator.webdriver并不能完全隐藏selenium的特征.
利用selenium连接chrome的远程调试端口
需要关闭所有打开的chrome,然后再用命令打开!
chrome.exe --remote-debugging-port=9222 --user-data-dir="D:\temp\"
selenium连接chrome
from time import sleep
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
# 不要拦截弹出框
chrome_options.add_argument(" --disable-popup-blocking ")
chrome_options.add_argument(
'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36')
chrome_options.add_experimental_option('debuggerAddress', '127.0.0.1:9222')
driver = Chrome('D:/chromedriver.exe', options=chrome_options)
driver.get('https://bot.sannysoft.com/')
sleep(5)
driver.save_screenshot('pic.png')
# 模拟在A页面新打开一个标签页
driver.execute_script("window.open('https://bot.sannysoft.com/')")
sleep(5)
windows = driver.window_handles
driver.switch_to.window(windows[-1])
driver.save_screenshot('pic2.png')
sleep(2)
driver.quit()
selenium 定制启动 chrome 的选项 https://blog.csdn.net/vinson0526/article/details/51850929
去除selenium标识 https://blog.csdn.net/qq_35531549/article/details/89023525
webdriver 添加启动参数
option.add_argument('--proxy-server=http://127.0.0.1:8083')
option.add_argument('--disable-infobars --ignore-certificate-errors')
mitmproxy中间人攻击替换JS文件内容
pip install mitmproxy
mitmdump -s mitmdump.py -p 8083
mitmdump.py
from mitmproxy import ctx
def response(flow):
# 'js'字符串为目标网站的相应js名
if 'js' in flow.request.url:
for i in ['webdriver', '__driver_evaluate', '__webdriver_evaluate', '__selenium_evaluate', '__fxdriver_evaluate', '__driver_unwrapped', '__webdriver_unwrapped', '__selenium_unwrapped', '__fxdriver_unwrapped', '_Selenium_IDE_Recorder', '_selenium', 'calledSelenium', '_WEBDRIVER_ELEM_CACHE', 'ChromeDriverw', 'driver-evaluate', 'webdriver-evaluate', 'selenium-evaluate', 'webdriverCommand', 'webdriver-evaluate-response', '__webdriverFunc', '__webdriver_script_fn', '__$webdriverAsyncExecutor', '__lastWatirAlert', '__lastWatirConfirm', '__lastWatirPrompt', '$chrome_asyncScriptInfo', '$cdc_asdjflasutopfhvcZLmcfl_']:
ctx.log.info('Remove %s from %s.' % (i, flow.request.url))
flow.response.text = flow.response.text.replace('"%s"' % (i), '"NO-SUCH-ATTR"')
flow.response.text = flow.response.text.replace('t.webdriver', 'false')
flow.response.text = flow.response.text.replace('ChromeDriver', '')
ImportError: DLL load failed https://www.pythonheidong.com/blog/article/505501/a49fe558a4a5819c4409/
移除Selenium中的 window.navigator.webdriver https://www.cnblogs.com/presleyren/p/12936553.html
Selenium 屏蔽 webdriver被识别出来 的一些解决办法 https://blog.csdn.net/u010451638/article/details/109850249
selenium 无界面模式下 反爬虫解决方案 https://www.cnblogs.com/sophia201552/p/11764643.html
爬虫(二)浏览器如何识别selenium及爬虫如何绕过反爬 https://zhuanlan.zhihu.com/p/78368287
Python3+mitmproxy安装使用教程(Windows)https://blog.csdn.net/qq_33337186/article/details/105428108