Selenium uc webdriver chrome instances proliferating, eating up all memory and CPU

102 Views Asked by At

I have included steps to close the selenium python webdriver, and even restart it regularly to avoid accumulating problems. But my Windows 10 Task Manager shows a steady increase in memory and CPU usage up to the point of saturation and freezing up my computer, and the number of Chrome instances proliferate, despite my steps to close the webdriver regularly:

import csv
import json
import os
import time
from selenium import webdriver
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import logging
from selenium.webdriver.chrome.options import Options

# Setup logging
current_time = time.strftime("%Y%m%d_%H%M%S")
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(levelname)s %(message)s',
                    handlers=[
                        logging.FileHandler(f"log_{current_time}.log"),
                        logging.StreamHandler()
                    ])
logger = logging.getLogger()

# Global driver variable
global_driver = None


def close_driver():
    global global_driver
    if global_driver:
        try:
            global_driver.quit()
        except Exception as e:
            logger.error(f"Error closing the driver: {e}")
        global_driver = None




def create_driver():
    close_driver()  # Ensure any existing driver is closed
    try:
        global global_driver
        chrome_options = Options()
        chrome_options.add_argument("--headless")  # Run Chrome in headless mode
        global_driver = uc.Chrome(options=chrome_options)
        return global_driver
    except Exception as e:
        logger.error(f"Error creating new driver instance: {e}")
        return None


# Global setting for scraping quantity (1 or 2)
SCRAPE_QTY = 2  # Set to 2 if scraping for 2 qty values

representative_postcodes = [
    ("SYDMET", "EASTGARDENS", "NSW", "2036"),
    ("NSWTWN", "BAY VILLAGE", "NSW", "2261"),
    ("NSWREG", "CATTLE CREEK", "NSW", "2339"),
    ("QLDTWN", "BOTTLE CREEK", "QLD", "2469"),
    ("MELMET", "CROSS KEYS", "VIC", "3041"),
    ("VICTWN", "BELL PARK", "VIC", "3215"),
    ("VICREG", "TERANG", "VIC", "3264"),
    ("BRIMET", "ASPLEY", "QLD", "4034"),
    ("QLDREG", "CARPENDALE", "QLD", "4344"),
    ("ADEMET", "OAKLANDS PARK", "SA", "5046"),
    ("SAREG", "CAPE JERVIS", "SA", "5204"),
    ("PERMET", "KARAKIN", "WA", "6044"),
    ("WAREG", "BALBARRUP", "WA", "6258"),
    ("TAZTWN", "CAPE PILLAR", "TAS", "7182"),
    ("TASMAN", "BLACK HILLS", "TAS", "7140"),
    ("DARMET", "ANULA", "NT", "0812"),
    ("NTREG", "ALICE SPRINGS", "NT", "0870")
]


def read_source_csv(file_path):
    try:
        with open(file_path, newline='', encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile)
            return list(reader)
    except Exception as e:
        logger.error(f"Error reading CSV file: {e}")
        return []


def append_to_csv(file_name, data):
    try:
        file_exists = os.path.isfile(file_name)
        with open(file_name, 'a', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            if not file_exists:
                writer.writerow(['sku', 'postcode_group', 'suburb', 'state', 'postcode', 'rate'])
            writer.writerow(data)
    except Exception as e:
        logger.error(f"Error writing to CSV file: {e}")


def save_last_processed(sku, postcode_group):
    try:
        with open('last_processed.json', 'w') as file:
            json.dump({'last_processed_sku': sku, 'last_processed_postcode_group': postcode_group}, file)
    except Exception as e:
        logger.error(f"Error saving last processed record: {e}")


def load_last_processed():
    try:
        with open('last_processed.json', 'r') as file:
            return json.load(file)
    except FileNotFoundError:
        return {'last_processed_sku': None, 'last_processed_postcode_group': None}
    except Exception as e:
        logger.error(f"Error loading last processed record: {e}")
        return {'last_processed_sku': None, 'last_processed_postcode_group': None}


def scrape_shipping_rates(driver, sku, product_url, postcode_group):
    try:
        driver.get(product_url)
        logger.info("Page loaded.")  # Debugging print


        # Check for the 404 error message on the page
        try:
            WebDriverWait(driver, 3).until(
                EC.presence_of_element_located((By.XPATH, "//h1[text()='Whoops, our bad...']"))
            )
            logger.info(f"404 Error for {sku} at {product_url}")
            append_to_csv('404.csv', [sku, *postcode_group, '404 Error'])
            return '404 Error'
        except TimeoutException:
            # If the 404 message is not found, continue with the scraping
            logger.error("No 404 Error detected. Continuing scraping.")


        # Handling quantity selection if SCRAPE_QTY is set to 2
        if SCRAPE_QTY == 2:
            try:
                qty_input = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.ID, "qty"))
                )
                qty_input.clear()
                qty_input.send_keys("2")
                logger.info("Quantity set to 2.")  # Debugging print
            except TimeoutException:
                logger.error("Quantity input not found or loading issue.")



        # Execute JavaScript to get HTTP status code
        status_code = driver.execute_script(
            "return window.performance.getEntriesByType('navigation')[0].responseStart;"
        )
        

        

        # Wait for the city input to appear and enter the postcode
        city_input = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "city"))
        )
        logger.info("City input found.")  # Debugging print
        city_input.clear()  # Clear the input field
        city_input.send_keys(postcode_group[3])  # Enter the representative postcode
        logger.info("Postcode entered.")  # Debugging print

        time.sleep(2)


        suggestion = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "#suggetion-box ul li"))
        )
        logger.info("Suggestion box found.")  # Debugging print

        suggestion.click()
        logger.info("Suggestion clicked.")  # Debugging print

        # Click the Get Rate button
        get_rate_button = driver.find_element(By.ID, "get_rate")
        get_rate_button.click()
        logger.info("Get rate button clicked.")  # Debugging print

        # Check for error message
        try:
            WebDriverWait(driver, 3).until(
                EC.visibility_of_element_located((By.ID, "shipping_rate_estimation_error"))
            )
            append_to_csv('red_errormsg_returned.csv', [sku, *postcode_group, 'Error'])
            return
        except TimeoutException as e:
            logger.error(f"TimeoutException for error message check: {e}")
            # No error message, continue

        # Check for rate table
        try:
            WebDriverWait(driver, 3).until(
                EC.visibility_of_element_located((By.ID, "result-table"))
            )
            rate_span = driver.find_element(By.CSS_SELECTOR, "#result-table .price")
            rate = rate_span.text.replace('$', '').replace(',', '')
            append_to_csv('fed_scraped_shipping_rates.csv', [sku, *postcode_group, rate])
        except TimeoutException as e:
            logger.error(f"TimeoutException for rate table check: {e}")
            append_to_csv('rate_not_detected.csv', [sku, *postcode_group, 'No Rate'])

    # except Exception as e:
    #     logger.info(f"Error processing SKU {sku} for postcode group {postcode_group[0]}: {e}")
    except Exception as e:
        error_message = str(e)
        if "Out of Memory" in error_message or "Timed out receiving message from renderer" in error_message:
            logger.error(f"Out of Memory error processing SKU {sku} for postcode group {postcode_group[0]}")
            append_to_csv('out_of_memory_errors.csv', [sku, *postcode_group, 'Out of Memory'])
            return 'Out of Memory'
        else:
            logger.error(f"Error processing SKU {sku} for postcode group {postcode_group[0]}: {e}")
            return 'Error'

    return 'Success'


def find_starting_index(source_data, last_processed_sku):
    for index, row in enumerate(source_data):
        if row['SKU'] == last_processed_sku:
            return index
    return 0






def get_postcode_group_index(postcode_group_name):
    for index, (name, _, _, _) in enumerate(representative_postcodes):
        if name == postcode_group_name:
            return index
    return -1





def process_sku(driver, row, postcode_group):

    try:
        result = scrape_shipping_rates(driver, row['SKU'], row['Product URL'], postcode_group)

        if result == 'Success':
            logger.info(f"Successfully processed SKU: {row['SKU']}, Postcode Group: {postcode_group[0]}")
            save_last_processed(row['SKU'], postcode_group[0])
        elif result == 'Out of Memory':
            # Don't update last_processed, but return the 'Out of Memory' status
            logger.error(f"Out of Memory error for SKU: {row['SKU']}, Postcode Group: {postcode_group[0]}")
            return 'Out of Memory'
        else:
            logger.error(f"Error encountered for SKU: {row['SKU']}, Postcode Group: {postcode_group[0]}")

        time.sleep(1)
        return result
    
    except Exception as e:
        # Close the driver in case of an exception and then re-raise the exception
        if driver:
            driver.quit()
        raise e
    

def read_processed_skus(file_name):
    processed_skus = set()
    if os.path.isfile(file_name):
        with open(file_name, 'r', encoding='utf-8') as file:
            reader = csv.reader(file)
            next(reader, None)  # Skip header
            for row in reader:
                processed_skus.add((row[0], row[1]))  # SKU and postcode group
    return processed_skus




def main():
    global global_driver
    logger.info("Starting script...")
    try:
        source_data = read_source_csv("Data feed-22.01.24.csv")
        if not source_data:
            logger.info("No data found in source file, exiting.")
            return

        processed_skus = read_processed_skus("fed_scraped_shipping_rates.csv")
        errors = read_processed_skus("out_of_memory_errors.csv") | read_processed_skus("rate_not_detected.csv") | read_processed_skus("red_errormsg_returned.csv") | read_processed_skus("404.csv")

        process_count = 0
        driver = create_driver()  # Create a new driver instance

        for row in source_data:
            for postcode_group in representative_postcodes:
                if (row['SKU'], postcode_group[0]) in processed_skus or (row['SKU'], postcode_group[0]) in errors:
                    continue

                result = process_sku(driver, row, postcode_group)
                process_count += 1

                if result == 'Out of Memory' or process_count >= 25:
                    driver.quit()  # Close the current driver
                    driver = create_driver()  # Create a new driver instance
                    process_count = 0

    except Exception as e:
        logger.error(f"An error occurred in the main function: {e}")
    finally:
        close_driver()
        logger.info("Script finished.")

if __name__ == '__main__':
    main()
1

There are 1 best solutions below

1
Giuseppe On

It is not related to UC. Tested and verified the same using latest selenium.