Scraping data from a refreshing javascript page in Python

35 Views Asked by At

I try to scrape some data from the site https://bloks.io/live The first problem is that I have no way to access the refreshing data in that table. My idea is to check if the first column changes. If it does I have to check, which account and so on... So I need to read constantly the first column but it doesn't work.

I tried it with CSS-selectors, but no success.

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup


link = 'https://bloks.io/live'

driver = webdriver.Chrome(service=Service((ChromeDriverManager().install())))
driver.get(link)

page =driver.page_source

tSoup = BeautifulSoup(driver.find_element(By.CSS_SELECTOR, '#info>tbody>tr:nth-child(2)').get_attribute('outerHTML'), 'html.parser')

This got me an no such element error message. Could anyone help me?

UPDATE: my code is finding the element, which I want to check constantly, but the element doesn't refresh. My code so far:

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import time

def connection():
    driver = webdriver.Chrome(service=Service((ChromeDriverManager().install())))

    return driver

def search_first_entry(driver, link):
    driver.get(link)

    # wait until page has buffered
    wait = WebDriverWait(driver, 5)
    element_prev = 0
    element = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#info>tbody>tr:nth-child(2)')))
    while True:
        element = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#info>tbody>tr:nth-child(2)')))
        print(element)
        time.sleep(1)
        if element != element_prev:
            element_prev = element


def main():
    link = 'https://bloks.io/live'
    driver = connection()
    search_first_entry(driver, link)

main()
1

There are 1 best solutions below

1
dimay On

To better understand what happened I suggest you to make screen and save html. If you'll do it you'll see that you need some time to wait (or sleep - not the best solution)

# save screen
driver.save_screenshot("webpage.png")

# save html
html_code = driver.page_source

with open("webpage.html", "w", encoding="utf-8") as file:
    file.write(html_code)

about sleep/wait https://selenium-python.readthedocs.io/waits.html