I’m trying to navigate through a pagination element on a webpage using Selenium WebDriver in Python. The HTML structure of the pagination element is as follows:
<ul class="pagination">
<li class="paginate_button previous" id="datatable_previous">
<a href="#" aria-controls="datatable" data-dt-idx="0" tabindex="0">Précédent</a>
</li>
<li class="paginate_button ">
<a href="#" aria-controls="datatable" data-dt-idx="1" tabindex="0">1</a>
</li>
<li class="paginate_button active">
<a href="#" aria-controls="datatable" data-dt-idx="2" tabindex="0">2</a>
</li>
<!-- More page numbers... -->
<li class="paginate_button next" id="datatable_next">
<a href="#" aria-controls="datatable" data-dt-idx="8" tabindex="0">Suivant</a>
</li>
</ul>
I want to click the Suivant button to go to the next page. However, the <a> tag inside the Suivant button has href="#", so it doesn’t change the URL when clicked. I tried using Selenium’s click() method to click the button, but the page content doesn’t change.
Here’s the code I’m using:
def scrape_bloc_files(soup):
for i, pdf in enumerate(soup.select("tr a[target='_blank']")):
pdf_link = pdf['href']
driver.get(pdf_link)
time.sleep(1)
options = webdriver.ChromeOptions()
options.add_experimental_option('prefs', {
"download.default_directory": "/content/drive/MyDrive/Colab Notebooks/Files/resumes",
"download.prompt_for_download": False,
"download.directory_upgrade": True,
"plugins.always_open_pdf_externally": True
})
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.binary_location = '/usr/bin/chromium-browser'
driver = webdriver.Chrome(options=options)
driver.get('https://www.offre-emploi.tn/consulter-cv/')
time.sleep(1)
try:
html = driver.page_source
except UnexpectedAlertPresentException:
alert = driver.switch_to.alert
alert.accept()
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
while True:
try:
next_button = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.CSS_SELECTOR, "li#datatable_next a"))
)
driver.execute_script("arguments[0].click();", next_button)
scrape_bloc_files(soup)
except Exception as e:
print(f"Couldn't click the next button: {e}")
break
driver.quit()
Does anyone know why the page content isn’t changing and how I can successfully navigate to the next page? Any help would be greatly appreciated!