selenium配置

本文详细介绍了在Linux环境下安装Chrome浏览器和Chromedriver的方法,包括使用wget和yum命令安装Chrome,以及如何下载和配置Chromedriver。此外,还提供了设置Chrome和Chromedriver路径的代码示例,以及如何在Selenium中使用代理服务器,包括无需用户名和密码的代理设置,以及需要用户名和密码的代理配置。同时,文章还讨论了在使用代理插件时遇到的问题和解决方案,以及如何在后台运行Selenium和控制窗口大小。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

 

1.Chrome安装

wget https://dl.google.com/linux/direct/google-chrome-stable_current_x86_64.rpm
yum install ./google-chrome-stable_current_x86_64.rpm

在公司中往往没有yum权限,可以采用以下方式安装:

rpm2cpio google-chrome-stable_current_x86_64.rpm |cpio -ivdm

① 安装后当前目录会出现opt、etc、usr三个文件

② wget命令默认下载的是最新版本的Chrome浏览器,通过以下方式查看Chrome版本:

cd ./opt/google/chrome
./chrome --version

③ Chrome版本必须和Chromedriver版本一致

2.Chromedriver安装

下载路径:http://chromedriver.storage.googleapis.com/index.html 选择合适的Chromedriver版本。

unzip chromedriver_linux64.zip
chmod +x /usr/bin/chromedriver #给予执行权限

可以将Chrome和Chromedriver下载至统一目录下,然后在该目录下安装。

3.路径设置

from selenium import webdriver
option = webdriver.ChromeOptions()
option.binary_location ='/home/xxx/tool/chrome/opt/google/chrome/chrome'  #chrome路径
chromedriver_path='/home/xxx/tool/chrome/chromedriver'  #chromedriver路径
browser = webdriver.Chrome(executable_path = chromedriver_path, chrome_options=option)

执行以上程序没有问题则配置完成

4.设置代理

(1)无需用户名和密码的代理:

option.add_argument('--proxy-server=http://'+proxy)
#proxy='ip:port'

(2)需要用户名和密码:(以插件的方式使用代理,必须有头)

import string
import zipfile

from selenium import webdriver


def create_proxy_auth_extension(proxy_host, proxy_port, proxy_username, proxy_password, scheme='http',
                                plugin_path=None):
    if plugin_path is None:
        plugin_path = r'{}_{}@http-dyn.dobel.com_9020.zip'.format(proxy_username, proxy_password)

    manifest_json = """
    {
        "version": "1.0.0",
        "manifest_version": 2,
        "name": "Dobel Proxy",
        "permissions": [
            "proxy",
            "tabs",
            "unlimitedStorage",
            "storage",
            "<all_urls>",
            "webRequest",
            "webRequestBlocking"
        ],
        "background": {
            "scripts": ["background.js"]
        },
        "minimum_chrome_version":"22.0.0"
    }
    """

    background_js = string.Template(
        """
        var config = {
            mode: "fixed_servers",
            rules: {
                singleProxy: {
                    scheme: "${scheme}",
                    host: "${host}",
                    port: parseInt(${port})
                },
                bypassList: ["foobar.com"]
            }
          };

        chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});

        function callbackFn(details) {
            return {
                authCredentials: {
                    username: "${username}",
                    password: "${password}"
                }
            };
        }

        chrome.webRequest.onAuthRequired.addListener(
            callbackFn,
            {urls: ["<all_urls>"]},
            ['blocking']
        );
        """
    ).substitute(
        host=proxy_host,
        port=proxy_port,
        username=proxy_username,
        password=proxy_password,
        scheme=scheme,
    )

    with zipfile.ZipFile(plugin_path, 'w') as zp:
        zp.writestr("manifest.json", manifest_json)
        zp.writestr("background.js", background_js)

    return plugin_path

def from_proxy_get_daili(proxy):
    # proxy是这种格式 user:pass@ip:port
    user_pass_str, ip_port_str = proxy.split('@')
    proxyHost, proxyPort = ip_port_str.split(':')
    proxyUser, proxyPass = user_pass_str.split(':')
    return proxyHost, proxyPort, proxyUser, proxyPass

def get_driver(proxy):
    proxyHost, proxyPort, proxyUser, proxyPass = from_proxy_get_daili(proxy)
    proxy_auth_plugin_path = create_proxy_auth_extension(
        proxy_host=proxyHost,
        proxy_port=proxyPort,
        proxy_username=proxyUser,
        proxy_password=proxyPass)

    option = webdriver.ChromeOptions()
    option.add_extension(proxy_auth_plugin_path)#增加代理插件
    option.add_argument('--no-sandbox')
    option.add_argument('--disable-gpu')
    option.add_experimental_option('excludeSwitches', ['enable-logging'])
    option.binary_location ='/home/xxx/tool/chrome/opt/google/chrome/chrome'#chrome
    chromedriver_path='/home/xxx/tool/chrome/chromedriver'#chromedriver
    drive = webdriver.Chrome(executable_path = chromedriver_path, chrome_options=option)

    #drive = webdriver.Chrome(chrome_options=option)
    return drive


if __name__ == '__main__':
    # 代理服务器
    #proxy = 'username:password@ip:port'
    proxy='username:password@60.28.160.178:888'
    driver = get_driver(proxy)
    driver.get("http://httpbin.org/ip")
    print(driver.page_source)#检查是否是代理服务器
    '''
    driver.page_source返回值为str格式的html,可以直接使用BeautifulSoup处理
    soup=BeautifulSoup(driver.page_source,'lxml')
    '''
    #driver.close()#关闭浏览器tag
    driver.quit()#关闭浏览器、释放端口

5.后台运行

问题背景:在使用公司ip代理池时,4(1)的方法无法使用,故只能以插件的方式使用代理。这带来一个问题,插件的使用和无头模式(--headless)不兼容,报如下错误,并且这个问题似乎没有团队维护和解决。如果无法使用无头模式,由于公司每晚电脑都会关机,这导致无法长期后台部署。

Traceback (most recent call last):
  File "proxy_plugin.py", line 125, in <module>
    browser = get_browser(proxy)
  File "proxy_plugin.py", line 113, in get_browser
    browser = webdriver.Chrome(executable_path = chromedriver_path, chrome_options=option)
  File "/home/qinbo/.local/lib/python3.6/site-packages/selenium/webdriver/chrome/webdriver.py", line 81, in __init__
    desired_capabilities=desired_capabilities)
  File "/home/qinbo/.local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 157, in __init__
    self.start_session(capabilities, browser_profile)
  File "/home/qinbo/.local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 252, in start_session
    response = self.execute(Command.NEW_SESSION, parameters)
  File "/home/qinbo/.local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
    self.error_handler.check_response(response)
  File "/home/qinbo/.local/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: unknown error: failed to wait for extension background page to load: chrome-extension://bhglohmdhaagkangamldncbkppipbiid/_generated_background_page.html
from unknown error: page could not be found: chrome-extension://bhglohmdhaagkangamldncbkppipbiid/_generated_background_page.html

解决:参考https://stackoverflow.com/questions/45372066/is-it-possible-to-run-google-chrome-in-headless-mode-with-extensions/45372648#45372648

You can use pyvirtualdisplay to run the chrome with zero display on your server. The best thing is you can run extensions by using this trick. 代码如下

from pyvirtualdisplay import Display

proxy='hexin:hx300033@101.71.41.166:888'

display = Display(visible=0, size=(800, 600))
display.start()

browser = get_browser(proxy)
browser.get("http://httpbin.org/ip")
print(browser.page_source)
time.sleep(10)

browser.quit()

display.stop()

但是报错如下:(意思是没有安装Xvfb,我的理解:Xvfb是一个虚拟浏览器或缓冲区,selenium打开的浏览器会在该虚拟浏览器中打开而不会在前台打开,此时无需设置driver为无头模式。)

Traceback (most recent call last):
  File "/home/qinbo/tool/Anaconda3.6/anaconda3/lib/python3.6/site-packages/easyprocess/__init__.py", line 169, in start
    cmd, stdout=stdout, stderr=stderr, cwd=self.cwd, env=self.env,
  File "/home/qinbo/tool/Anaconda3.6/anaconda3/lib/python3.6/subprocess.py", line 709, in __init__
    restore_signals, start_new_session)
  File "/home/qinbo/tool/Anaconda3.6/anaconda3/lib/python3.6/subprocess.py", line 1344, in _execute_child
    raise child_exception_type(errno_num, err_msg, err_filename)
FileNotFoundError: [Errno 2] No such file or directory: 'Xvfb': 'Xvfb'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "proxy_plugin.py", line 125, in <module>
    display = Display(visible=0, size=(800, 600))
  File "/home/qinbo/tool/Anaconda3.6/anaconda3/lib/python3.6/site-packages/pyvirtualdisplay/display.py", line 61, in __init__
    **kwargs
  File "/home/qinbo/tool/Anaconda3.6/anaconda3/lib/python3.6/site-packages/pyvirtualdisplay/xvfb.py", line 50, in __init__
    manage_global_env=manage_global_env,
  File "/home/qinbo/tool/Anaconda3.6/anaconda3/lib/python3.6/site-packages/pyvirtualdisplay/abstractdisplay.py", line 88, in __init__
    helptext = get_helptext(program)
  File "/home/qinbo/tool/Anaconda3.6/anaconda3/lib/python3.6/site-packages/pyvirtualdisplay/util.py", line 10, in get_helptext
    p.call()
  File "/home/qinbo/tool/Anaconda3.6/anaconda3/lib/python3.6/site-packages/easyprocess/__init__.py", line 141, in call
    self.start().wait(timeout=timeout)
  File "/home/qinbo/tool/Anaconda3.6/anaconda3/lib/python3.6/site-packages/easyprocess/__init__.py", line 174, in start
    raise EasyProcessError(self, "start error")
easyprocess.EasyProcessError: start error <EasyProcess cmd_param=['Xvfb', '-help'] cmd=['Xvfb', '-help'] oserror=[Errno 2] No such file or directory: 'Xvfb': 'Xvfb' return_code=None stdout="None" stderr="None" timeout_happened=False>

解决:安装Xvfb就能后台运行了!

yum install xorg-x11-server-Xvfb
#yum install x11vnc
#Xvfb :1 -screen 0 1024x768x24 -nolisten tcp &
#export DISPLAY=:1
我关闭Xvfb的进程后依然能后台运行,这个地方有点蒙。

6.窗口大小问题

背景:有些网站在定位元素时,窗口尺寸不同定位方法也不同,因此需要对窗口大小进行控制。

#设置虚拟窗口大小 可以设置为与实际显示器相同参数
display = Display(visible=0, size=(1920, 1080))

#获取当前浏览器窗口大小
size_Dict = browser.get_window_size()
# 打印浏览器的宽和高
print("当前浏览器的宽:", size_Dict['width'])
print("当前浏览器的高:", size_Dict['height'])

#设置浏览器窗口大小
browser.set_window_size(width=1665, height=1060, windowHandle="current")

参考博客:https://blog.csdn.net/weixin_42081389/article/details/96482232?utm_medium=distribute.pc_relevant_bbs_down.none-task--2~all~first_rank_v2~rank_v25-1.nonecase&depth_1-utm_source=distribute.pc_relevant_bbs_down.none-task--2~all~first_rank_v2~rank_v25-1.nonecase

https://blog.csdn.net/ywdhzxf/article/details/83211631

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值