from selenium import webdriver as _root_webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.alert import Alert from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait from selenium.common.exceptions import * import tempfile import platform import random import json import time import sys import os # You can set the drive path and browser location through the environment variables, # and select the webdriver type. # It has preferential priority through environmental variable settings. SELENIUM_WIRE = os.environ.get('SELENIUM_WIRE') or '' # Attention!!! # Must set the environment before importing selenium. # Otherwise, these settings will be invalid. SELENIUM_BROWSER_CHOOSE = os.environ.get('SELENIUM_BROWSER_CHOOSE') or 'Chrome' SELENIUM_BROWSER_DRIVER = os.environ.get('SELENIUM_BROWSER_DRIVER') or '' SELENIUM_BROWSER_BINARY = os.environ.get('SELENIUM_BROWSER_BINARY') or '' # Whether to turn on headless mode. HEADLESS_BROWSER_ENABLE = os.environ.get('HEADLESS_BROWSER_ENABLE') == '1' or os.environ.get('HEADLESS') == '1' # Choose Selenium or Selenium-Wire, default: Selenium. if SELENIUM_WIRE: exec('from seleniumwire import webdriver as _root_webdriver') # Choose options and service. if SELENIUM_BROWSER_CHOOSE: exec('from selenium.webdriver.%s.options import Options' % (SELENIUM_BROWSER_CHOOSE or 'Chrome').lower()) exec('from selenium.webdriver.%s.service import Service' % (SELENIUM_BROWSER_CHOOSE or 'Chrome').lower()) # Use the chrome driver by default, such a code writing method is for grammar prompts. DriverChoose = _root_webdriver.Chrome if SELENIUM_BROWSER_CHOOSE: exec('DriverChoose=_root_webdriver.%s' % (SELENIUM_BROWSER_CHOOSE or 'Chrome').capitalize()) class BrowserMobileEmulation(dict): """ Mobile emulation parameters. """ def __init__(self, w=540, h=960, user_agent=None): du = 'Mozilla/5.0 (Linux; U; Android 13; zh-cn; 2109119BC Build/TKQ1.220829.002) ' \ 'AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 ' \ 'Chrome/98.0.4758.102 MQQBrowser/13.6 Mobile Safari/537.36' user_agent = user_agent or du super().__init__({'w': w, 'h': h, 'user_agent': user_agent}) self.w = self.h = self.user_agent = None def __setattr__(self, key, value): pass def __getitem__(self, item): try: return super().__getitem__(item) except KeyError: return None def __getattr__(self, item): try: return super().__getitem__(item) except KeyError: return None class DefaultChromeLocation: """ Set default chrome and driver location. """ xDriver = '/opt/google/chrome/chromedriver' xChrome = '/opt/google/chrome/chrome' wDriver = 'C:/Program Files/Google/Chrome/Application/chromedriver.exe' wChrome = 'C:/Program Files/Google/Chrome/Application/chrome.exe' class PositionTab: """ Position for switch tab. """ Prev = 'Go-Prev' Next = 'Go-Next' class ColorUtils: """ Color utils. """ @staticmethod def hex2rgb(color): color = color[1:].upper() for x in color: if x not in '0123456789ABCDEF': raise Exception('Found invalid hexa character {0}.'.format(x)) if len(color) == 6 or len(color) == 8: color = '#' + color[0:6] elif len(color) == 3: color = '#' + color[0] * 2 + color[1] * 2 + color[2] * 2 else: raise Exception('Hexa string should be 3, 6 or 8 digits. if 8 digits, last 2 are ignored.') hexcolor = color[1:] r, g, b = int(hexcolor[0:2], 16), int(hexcolor[2:4], 16), int(hexcolor[4:6], 16) return r, g, b class Browser(DriverChoose): """ Browser web driver. """ def __init__( self, driver: str = None, binary: str = None, headless: bool = False, lang: str = None, mute: bool = False, no_images: bool = False, user_agent: str = None, http_proxy: str = None, home: str = None, window_size: str = None, mobile_emulation: BrowserMobileEmulation = None, option_arguments: list = None, req_interceptor=None, res_interceptor=None, ): choose = SELENIUM_BROWSER_CHOOSE.capitalize() self.platform = platform.uname().system default_driver = None default_binary = None headless = HEADLESS_BROWSER_ENABLE or headless if choose == 'Chrome': if self.platform == 'Linux': default_driver = DefaultChromeLocation.xDriver default_binary = DefaultChromeLocation.xChrome else: default_driver = DefaultChromeLocation.wDriver default_binary = DefaultChromeLocation.wChrome driver = SELENIUM_BROWSER_DRIVER or driver or default_driver binary = SELENIUM_BROWSER_BINARY or binary or default_binary if self.platform == 'Linux' and not window_size: window_size = '1920x1080' if self.platform == 'Windows' and headless and not window_size: window_size = '1920x1080' # Initialization settings. if (isinstance(option_arguments, list)) is False: option_arguments = [] cdplist = [] service = Service() options = Options() self.cdplist = cdplist # Delete prompt information of chrome being controlled. hasattr(options, 'add_experimental_option') and options.add_experimental_option("excludeSwitches", ['enable-automation', 'enable-logging']) # Mobile emulation parameter setting start. if mobile_emulation: if hasattr(options, 'add_experimental_option') is False: raise Exception('Do not support mobile emulation currently.') self.w_browser = mobile_emulation.w + 14 self.h_browser = mobile_emulation.h + 0 self.w_inner_window = mobile_emulation.w + 0 self.h_inner_window = mobile_emulation.h - 86 self.mobile_emulation_screen_w = mobile_emulation.w self.mobile_emulation_screen_h = mobile_emulation.h window_size = '%s,%s' % (self.w_browser, self.h_browser) options.add_experimental_option( 'mobileEmulation', { 'deviceMetrics': { 'width': self.w_inner_window, 'height': self.h_inner_window, 'pixelRatio': 2.75, 'touch': True }, 'userAgent': mobile_emulation.user_agent } ) cdplist.append([ 'Emulation.setUserAgentOverride', { 'userAgent': mobile_emulation.user_agent, 'userAgentMetadata': { 'platform': 'Android' if mobile_emulation.user_agent.find('iPhone') == -1 else 'iPhone', 'mobile': True, 'platformVersion': '', 'architecture': '', 'model': '' } } ]) else: self.w_browser = 0 self.h_browser = 0 self.w_inner_window = 0 self.h_inner_window = 0 self.mobile_emulation_screen_w = 0 self.mobile_emulation_screen_h = 0 # Mobile emulation parameter setting end. # Set browser and webdriver path. if driver: service.path = driver if binary: options.binary_location = binary # Add webdriver option arguments. for i in option_arguments: options.add_argument(i) # Set headless mode. if self.platform == 'Linux' or headless: options.add_argument('--headless') # Set no-sandbox mode. if self.platform == 'Linux': options.add_argument('--no-sandbox') options.add_argument('--disable-dev-shm-usage') options.add_argument('--disable-gpu') # Set language of browser, default is zh-CN. if lang: options.add_argument('--lang=%s' % (lang or 'zh-CN')) hasattr(options, 'set_preference') and options.set_preference('intl.accept_languages', lang or 'zh-CN') # Set mute. if mute: options.add_argument('--mute-audio=true') hasattr(options, 'set_preference') and print('Warning: Do not support mute audio currently.', file=sys.stderr) # Set no images mode. if no_images: options.add_argument('--blink-settings=imagesEnabled=false') hasattr(options, 'set_preference') and print('Warning: Do not support disable images currently.', file=sys.stderr) # Set default user agent. if user_agent: options.add_argument('--user-agent=%s' % user_agent) hasattr(options, 'set_preference') and options.set_preference('general.useragent.override', user_agent) # Set http proxy for browser. if http_proxy: options.add_argument('--proxy-server=http://%s' % http_proxy) # Set browser window size before startup. if window_size: options.add_argument('--window-size=%s' % window_size.replace("\x20", '').replace('x', ',')) else: options.add_argument('--start-maximized') # Start the browser. super().__init__(service=service, options=options) # Selenium-Wire backend optimization start. try: self.backend.master.options.add_option('ssl_insecure', bool, True, 'Do not verify upstream server SSL/TLS certificates.') self.backend.master.options.add_option('upstream_cert', bool, False, 'Connect to upstream server to look up certificate details.') self.backend.master.options.add_option('http2', bool, False, 'Enable/disable HTTP/2 support.') except AttributeError: pass # Selenium-Wire backend optimization end. if mobile_emulation: cdplist.append(['Emulation.setFocusEmulationEnabled', {'enabled': True}]) cdplist.append(['Emulation.setTouchEmulationEnabled', {'enabled': True, 'maxTouchPoints': 5}]) cdplist.append(['Emulation.setEmitTouchEventsForMouse', {'enabled': True, 'configuration': 'mobile'}]) # Set the request and response interceptor. if req_interceptor: hasattr(self, 'backend') or print('Warning: Can not use the interceptor, because not extends Seleniun-Wire.', file=sys.stderr) self.request_interceptor = req_interceptor if res_interceptor: hasattr(self, 'backend') or print('Warning: Can not use the interceptor, because not extends Seleniun-Wire.', file=sys.stderr) self.response_interceptor = res_interceptor # Sync set http proxy for Selenium-Wire backend. if http_proxy: self.proxy = {'http': 'http://%s' % http_proxy, 'https': 'https://%s' % http_proxy} # Set browser window size after startup, by default, there will be full screen display window. if window_size: self.set_window_size(*window_size.replace("\x20", '').replace('x', ',').split(',')) else: self.maximize_window() # Sets a sticky timeout to implicitly wait for an element to be found. self.implicitly_wait(10) # Set the amount of time to wait for a page load to complete. self.set_page_load_timeout(25) # Open the default page. home and self.open(home) @staticmethod def wait(secs: int | float = 1): """ Will sleep waiting. """ number_int = int(secs) number_float = secs - number_int for i in range(number_int): time.sleep(1) else: time.sleep(number_float) def open(self, url=None): """ Open the URL, simulate into the URL in the address bar and jump, the new page has no Referrer. """ self.update_cdp_command() return self.get(url) def turn(self, url=None): """ Simulation "window.location.href" jumps, the new page has Referrer. """ return self.execute_script('window.location.href=%s;' % json.dumps(url, indent=None, ensure_ascii=True), None) def find(self, path): """ Use XPath to find an element. """ ele = self.find_element(By.XPATH, path) self.element_prominent(ele, '#f8be5f') return ele def find_mult(self, path): """ Use XPath to find elements. """ ele = self.find_elements(By.XPATH, path) len(ele) > 0 and [self.element_prominent(e, '#f8be5f') for e in ele] return ele def find_element_by(self, sentence): """ Custom find element, pass into a tuple or list. """ ele = self.find_element(*sentence) self.element_prominent(ele, '#f8be5f') return ele def click(self, element): """ Click element for desktop version. """ self.element_prominent(element, '#ff0000') self.action_chains().reset_actions() self.action_chains().click(element).perform() self.wait(0.1) def touch(self, x, y): """ Click on the coordinates for Mobile edition. """ self.action_chains().reset_actions() self.action_chains().move_by_offset(x, y).click().perform() self.wait(0.1) def input(self, element, content): """ Enter the content to the element. """ self.element_prominent(element, '#00b6f1') self.action_chains().reset_actions() self.action_chains().send_keys_to_element(element, content).perform() self.wait(0.1) def mouse(self, element): """ Park the mouse here. """ self.element_prominent(element, '#49dc07') self.action_chains().reset_actions() self.action_chains().move_to_element(element).perform() def tab_create(self, url=None): """ Create a new tab and open the URL. """ self.switch_to.new_window('tab') self.update_cdp_command() url and self.open(url) def tab_switch(self, tab: int | str): """ Switch the browser tab page """ handles = self.window_handles lengths = len(handles) current = handles.index(self.current_window_handle) if isinstance(tab, int): handle = tab elif tab == PositionTab.Prev: handle = (current - 1) elif tab in PositionTab.Next: handle = (current + 1) % lengths else: handle = None self.switch_to.window(handles[handle]) self.wait(0.2) self.update_cdp_command() def tab_cancel(self): """ Close the current browser tab page. """ handles = self.window_handles if len(handles): current = handles.index(self.current_window_handle) self.close() current > 0 and self.switch_to.window(handles[current - 1]) self.wait(0.2) def tab_cancel_all(self): """ Close all the browser tab page. """ handles = self.window_handles for i in handles: self.tab_cancel() def frame_switch_to(self, element_of_frame): """ Switch frame to the specified frame element. """ self.switch_to.frame(element_of_frame) self.wait(0.2) def frame_switch_to_default(self): """ Switch to the default frame. """ self.switch_to.default_content() self.wait(0.2) def scroll(self): """ Scroll page. :return: """ self.action_chains().reset_actions() self.action_chains().scroll_by_amount(0, self.execute_script('return document.documentElement.clientHeight;')).perform() self.wait(0.8) def scroll_to(self, pos: int | str): """ Scroll to the specified location. """ if isinstance(pos, int) and pos > 0: self.execute_script('window.scrollTo(0, arguments[0]);', pos) elif pos == 0: self.execute_script('window.scrollTo(0, 0);') elif pos == 0 - 1: self.execute_script('window.scrollTo(0, document.body.scrollHeight);') else: pass self.wait(0.8) def scroll_to_element(self, element): """ Scroll to the specified element location. """ self.action_chains().reset_actions() self.action_chains().scroll_to_element(element).perform() self.wait(0.8) def element_force_display(self, element): """ Make hidden element visible and interactive. """ self.execute_script( 'let e=arguments[0];e.style.display="inline-block";e.style.visibility="visible";e.setAttribute("hidden","false");', element ) def element_prominent(self, element, color='#ff0000', dura=2500): """ Make the element highlight. """ if not element: return False high = ColorUtils.hex2rgb(color) r = high[0] g = high[1] b = high[2] self.execute_script(''' let e=arguments[0]; try{ let o=[e.style.background||null,e.style.border||null]; e.style.border="1px solid %s";e.style.background="rgba(%s,%s,%s,0.2)"; if(!e.prominent){ e.prominent=true; setTimeout(function(args){try{args[0].prominent=null;args[0].style.background=args[1][0];args[0].style.border=args[1][1]}catch(e){}},%s,[e,o]); } }catch(e){}''' % (color, r, g, b, dura), element ) def webdriver_wait(self, timeout: float, poll_frequency: float = 0.5, ignored_exceptions=None): """ Return WebDriverWait object. """ return WebDriverWait( driver=self, timeout=timeout, poll_frequency=poll_frequency, ignored_exceptions=ignored_exceptions ) def current_alert(self): """ Return current alert object. """ return Alert(self) def window_inner_size(self): """ Get the page window inner size. """ size = self.execute_script('return [window.innerWidth, window.innerHeight];') return {'w': size[0] or 0, 'h': size[1] or 0} def action_chains(self): """ Return ActionChains object. """ return ActionChains(self) def screenshot(self) -> bytes: """ Screenshot as bytes. """ return self.get_screenshot_as_png() def update_cdp_command(self) -> None: for cmd in self.cdplist: self.execute_cdp_cmd(*cmd) class WebDriver(Browser): """ Get a browser driver object. """ def __init__(self): super().__init__(lang='zh-CN') if __name__ == '__main__': # e.g. Test it can work normally. driver = WebDriver() driver.open('https://www.hao123.com/') driver.wait() driver.quit()