1.Chrome安装
wget https://dl.google.com/linux/direct/google-chrome-stable_current_x86_64.rpm
yum install ./google-chrome-stable_current_x86_64.rpm
在公司中往往没有yum权限,可以采用以下方式安装:
rpm2cpio google-chrome-stable_current_x86_64.rpm |cpio -ivdm
① 安装后当前目录会出现opt、etc、usr三个文件
② wget命令默认下载的是最新版本的Chrome浏览器,通过以下方式查看Chrome版本:
cd ./opt/google/chrome
./chrome --version
③ Chrome版本必须和Chromedriver版本一致
2.Chromedriver安装
下载路径:http://chromedriver.storage.googleapis.com/index.html 选择合适的Chromedriver版本。
unzip chromedriver_linux64.zip
chmod +x /usr/bin/chromedriver #给予执行权限
可以将Chrome和Chromedriver下载至统一目录下,然后在该目录下安装。
3.路径设置
from selenium import webdriver
option = webdriver.ChromeOptions()
option.binary_location ='/home/xxx/tool/chrome/opt/google/chrome/chrome' #chrome路径
chromedriver_path='/home/xxx/tool/chrome/chromedriver' #chromedriver路径
browser = webdriver.Chrome(executable_path = chromedriver_path, chrome_options=option)
执行以上程序没有问题则配置完成
4.设置代理
(1)无需用户名和密码的代理:
option.add_argument('--proxy-server=http://'+proxy)
#proxy='ip:port'
(2)需要用户名和密码:(以插件的方式使用代理,必须有头)
import string
import zipfile
from selenium import webdriver
def create_proxy_auth_extension(proxy_host, proxy_port, proxy_username, proxy_password, scheme='http',
plugin_path=None):
if plugin_path is None:
plugin_path = r'{}_{}@http-dyn.dobel.com_9020.zip'.format(proxy_username, proxy_password)
manifest_json = """
{
"version": "1.0.0",
"manifest_version": 2,
"name": "Dobel Proxy",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"<all_urls>",
"webRequest",
"webRequestBlocking"
],
"background": {
"scripts": ["background.js"]
},
"minimum_chrome_version":"22.0.0"
}
"""
background_js = string.Template(
"""
var config = {
mode: "fixed_servers",
rules: {
singleProxy: {
scheme: "${scheme}",
host: "${host}",
port: parseInt(${port})
},
bypassList: ["foobar.com"]
}
};
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
function callbackFn(details) {
return {
authCredentials: {
username: "${username}",
password: "${password}"
}
};
}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{urls: ["<all_urls>"]},
['blocking']
);
"""
).substitute(
host=proxy_host,
port=proxy_port,
username=proxy_username,
password=proxy_password,
scheme=scheme,
)
with zipfile.ZipFile(plugin_path, 'w') as zp:
zp.writestr("manifest.json", manifest_json)
zp.writestr("background.js", background_js)
return plugin_path
def from_proxy_get_daili(proxy):
# proxy是这种格式 user:pass@ip:port
user_pass_str, ip_port_str = proxy.split('@')
proxyHost, proxyPort = ip_port_str.split(':')
proxyUser, proxyPass = user_pass_str.split(':')
return proxyHost, proxyPort, proxyUser, proxyPass
def get_driver(proxy):
proxyHost, proxyPort, proxyUser, proxyPass = from_proxy_get_daili(proxy)
proxy_auth_plugin_path = create_proxy_auth_extension(
proxy_host=proxyHost,
proxy_port=proxyPort,
proxy_username=proxyUser,
proxy_password=proxyPass)
option = webdriver.ChromeOptions()
option.add_extension(proxy_auth_plugin_path)#增加代理插件
option.add_argument('--no-sandbox')
option.add_argument('--disable-gpu')
option.add_experimental_option('excludeSwitches', ['enable-logging'])
option.binary_location ='/home/xxx/tool/chrome/opt/google/chrome/chrome'#chrome
chromedriver_path='/home/xxx/tool/chrome/chromedriver'#chromedriver
drive = webdriver.Chrome(executable_path = chromedriver_path, chrome_options=option)
#drive = webdriver.Chrome(chrome_options=option)
return drive
if __name__ == '__main__':
# 代理服务器
#proxy = 'username:password@ip:port'
proxy='username:password@60.28.160.178:888'
driver = get_driver(proxy)
driver.get("http://httpbin.org/ip")
print(driver.page_source)#检查是否是代理服务器
'''
driver.page_source返回值为str格式的html,可以直接使用BeautifulSoup处理
soup=BeautifulSoup(driver.page_source,'lxml')
'''
#driver.close()#关闭浏览器tag
driver.quit()#关闭浏览器、释放端口
5.后台运行
问题背景:在使用公司ip代理池时,4(1)的方法无法使用,故只能以插件的方式使用代理。这带来一个问题,插件的使用和无头模式(--headless)不兼容,报如下错误,并且这个问题似乎没有团队维护和解决。如果无法使用无头模式,由于公司每晚电脑都会关机,这导致无法长期后台部署。
Traceback (most recent call last):
File "proxy_plugin.py", line 125, in <module>
browser = get_browser(proxy)
File "proxy_plugin.py", line 113, in get_browser
browser = webdriver.Chrome(executable_path = chromedriver_path, chrome_options=option)
File "/home/qinbo/.local/lib/python3.6/site-packages/selenium/webdriver/chrome/webdriver.py", line 81, in __init__
desired_capabilities=desired_capabilities)
File "/home/qinbo/.local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 157, in __init__
self.start_session(capabilities, browser_profile)
File "/home/qinbo/.local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 252, in start_session
response = self.execute(Command.NEW_SESSION, parameters)
File "/home/qinbo/.local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
self.error_handler.check_response(response)
File "/home/qinbo/.local/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: unknown error: failed to wait for extension background page to load: chrome-extension://bhglohmdhaagkangamldncbkppipbiid/_generated_background_page.html
from unknown error: page could not be found: chrome-extension://bhglohmdhaagkangamldncbkppipbiid/_generated_background_page.html
You can use pyvirtualdisplay
to run the chrome with zero display on your server. The best thing is you can run extensions by using this trick. 代码如下
from pyvirtualdisplay import Display
proxy='hexin:hx300033@101.71.41.166:888'
display = Display(visible=0, size=(800, 600))
display.start()
browser = get_browser(proxy)
browser.get("http://httpbin.org/ip")
print(browser.page_source)
time.sleep(10)
browser.quit()
display.stop()
但是报错如下:(意思是没有安装Xvfb,我的理解:Xvfb是一个虚拟浏览器或缓冲区,selenium打开的浏览器会在该虚拟浏览器中打开而不会在前台打开,此时无需设置driver为无头模式。)
Traceback (most recent call last):
File "/home/qinbo/tool/Anaconda3.6/anaconda3/lib/python3.6/site-packages/easyprocess/__init__.py", line 169, in start
cmd, stdout=stdout, stderr=stderr, cwd=self.cwd, env=self.env,
File "/home/qinbo/tool/Anaconda3.6/anaconda3/lib/python3.6/subprocess.py", line 709, in __init__
restore_signals, start_new_session)
File "/home/qinbo/tool/Anaconda3.6/anaconda3/lib/python3.6/subprocess.py", line 1344, in _execute_child
raise child_exception_type(errno_num, err_msg, err_filename)
FileNotFoundError: [Errno 2] No such file or directory: 'Xvfb': 'Xvfb'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "proxy_plugin.py", line 125, in <module>
display = Display(visible=0, size=(800, 600))
File "/home/qinbo/tool/Anaconda3.6/anaconda3/lib/python3.6/site-packages/pyvirtualdisplay/display.py", line 61, in __init__
**kwargs
File "/home/qinbo/tool/Anaconda3.6/anaconda3/lib/python3.6/site-packages/pyvirtualdisplay/xvfb.py", line 50, in __init__
manage_global_env=manage_global_env,
File "/home/qinbo/tool/Anaconda3.6/anaconda3/lib/python3.6/site-packages/pyvirtualdisplay/abstractdisplay.py", line 88, in __init__
helptext = get_helptext(program)
File "/home/qinbo/tool/Anaconda3.6/anaconda3/lib/python3.6/site-packages/pyvirtualdisplay/util.py", line 10, in get_helptext
p.call()
File "/home/qinbo/tool/Anaconda3.6/anaconda3/lib/python3.6/site-packages/easyprocess/__init__.py", line 141, in call
self.start().wait(timeout=timeout)
File "/home/qinbo/tool/Anaconda3.6/anaconda3/lib/python3.6/site-packages/easyprocess/__init__.py", line 174, in start
raise EasyProcessError(self, "start error")
easyprocess.EasyProcessError: start error <EasyProcess cmd_param=['Xvfb', '-help'] cmd=['Xvfb', '-help'] oserror=[Errno 2] No such file or directory: 'Xvfb': 'Xvfb' return_code=None stdout="None" stderr="None" timeout_happened=False>
解决:安装Xvfb就能后台运行了!
yum install xorg-x11-server-Xvfb
#yum install x11vnc
#Xvfb :1 -screen 0 1024x768x24 -nolisten tcp &
#export DISPLAY=:1
我关闭Xvfb的进程后依然能后台运行,这个地方有点蒙。
6.窗口大小问题
背景:有些网站在定位元素时,窗口尺寸不同定位方法也不同,因此需要对窗口大小进行控制。
#设置虚拟窗口大小 可以设置为与实际显示器相同参数
display = Display(visible=0, size=(1920, 1080))
#获取当前浏览器窗口大小
size_Dict = browser.get_window_size()
# 打印浏览器的宽和高
print("当前浏览器的宽:", size_Dict['width'])
print("当前浏览器的高:", size_Dict['height'])
#设置浏览器窗口大小
browser.set_window_size(width=1665, height=1060, windowHandle="current")
https://blog.csdn.net/ywdhzxf/article/details/83211631