twitter datascraping with selenium

27 Views Asked by ff-oo At 13 March 2024 at 14:09

I'm having trouble with a code to do datascraping of tweets with selenium. This is the code:

import selenium 
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from time import sleep
import getpass as gp

# Initialize WebDriver with Chrome
driver = webdriver.Chrome(executable_path=r"Downloads/chromedriver-win32/chromedriver-win32/chromedriver.exe")

# Open Twitter login page
driver.get("https://twitter.com/login")

# Set the path for Chrome WebDriver
PATH = "Downloads/chromedriver-win32/chromedriver-win32/chromedriver.exe"
driver = webdriver.Chrome(PATH)

# Open Twitter login page again
driver.get("https://twitter.com/login")

# Subject of interest
subject = "Elon Musk"

# Wait for page to load
sleep(3)

# Find username input field and enter username
username = driver.find_element(By.XPATH, "//input[@name='text']")
username.send_keys("*********")

# Find 'Next' button and click
next_button = driver.find_element(By.XPATH, "//span[contains(text(),'Next')]")
next_button.click()

# Wait for page to load
sleep(3)

# Find password input field and enter password
password = driver.find_element(By.XPATH, "//input[@name='password']")
password.send_keys("**********")

# Find 'Log in' button and click
log_in = driver.find_element(By.XPATH, "//span[contains(text(),'Log in')]")
log_in.click()

# Wait for page to load
sleep(3)

# Find search box, enter subject and press Enter
search_box = driver.find_element(By.XPATH, "//input[@data-testid='SearchBox_Search_Input']")
search_box.send_keys(subject)
search_box.send_keys(Keys.ENTER)

# Wait for page to load
sleep(3)

# Click on 'People' tab
people = driver.find_element(By.XPATH, "//span[contains(text(),'People')]")
people.click()

# Wait for page to load
sleep(3)

# Click on profile of interest
profile = driver.find_element(By.XPATH, "//*[@id='react-root']/div/div/div[2]/main/div/div/div/div/div/div[3]/section/div/div/div[1]/div/div/div/div/div[2]/div[1]/div[1]/div/div[1]/a/div/div[1]/span/span[1]")
profile.click()

# Get the UserTag
UserTag = driver.find_element(By.XPATH,"//div[@data-testid='User-Names']").text
print(UserTag)

# Wait for page to load
sleep(5)

# Initialize lists to store data
UserTags = []
TimeStamps = []
Tweets = []
Replys = []
reTweets = []
Likes = []

# Extract tweets
articles = driver.find_elements(By.XPATH, "//article[@data-testid='tweet']")
while True: 
for article in articles:
    UserTag = article.find_element(By.XPATH, ".//div[@data-testid='User-Names']").text
    UserTags.append(UserTag)
    TimeStamp = article.find_element(By.XPATH, ".//time").get_attribute('datetime')
    TimeStamps.append(TimeStamp)
    Tweet = article.find_element(By.XPATH, ".//div[@data-testid='tweetText']").text
    Tweets.append(Tweet)
    Reply = article.find_element(By.XPATH, ".//div[@data-testid='reply']")
    Replys.append(Reply)
    reTweet = article.find_element(By.XPATH, ".//div[@data-testid='retweet']")
    reTweets.append(reTweet)
    Like = article.find_element(By.XPATH, ".//div[@data-testid='Like']")
    Likes.append(Like)

# Scroll down
driver.execute_script('windows.scroLLto(0,document.body.scroLLHeight);')
articles = driver.find_elements(By.XPATH,"//article[@data-testid='tweet']")
Tweets2 = list(set(Tweets))
if len(Tweets2) > 5:
    break

# Print lengths of lists
print(len(UserTags),
len(TimeStamps),
len(Tweets),
len(Replys),
len(reTweets),
len(Likes))

# Import pandas
import pandas as pd

# Create DataFrame
df = pd.DataFrame(zip(UserTags,TimeStamps,Tweets,Replys,reTweets,Likes),
              columns=['UserTags','TimeStamps','Tweets','Replys','reTweets','Likes'])

# Display DataFrame head
df.head()

# Save DataFrame to Excel file
df.to_excel(r"C:/Users/Felipe Ojalvo/Downloads/tweets_musk.xlsx",index=False)`

The terminal responds to me:

"name": "NoSuchElementException",
"message": "Message: no such element: Unable to locate element: {\"method\":\"xpath\",\"selector\":\"//input[@name='text']\"}\n  (Session info: chrome=122.0.6261.112)\nStacktrace:\n\tGetHandleVerifier

I have tried changing the chrome web driver update and modifying the ways to write the XPATHs and it does not fix the problem. Does anyone know how to solve this problem? What I have tried to do is to datascraping tweets from a user and have the result return a dataframe with tabulated data.

Original Q&A

twitter datascraping with selenium

There are 0 best solutions below

Related Questions in PYTHON

Related Questions in SELENIUM-WEBDRIVER

Related Questions in WEB-SCRAPING

Related Questions in TWITTER

Trending Questions

Popular # Hahtags

Popular Questions