Selenium Pagination

57 Views Asked by At

I am trying to get all links with pagination. I am using selenium. Since there is more than ten thousand data, it will take time to get all links and some informations in it. I want to use pagination. After taking ten linkS, it should click next button. and take other ten links. If you have any suggestion instead of using selenium, I am happy to hear that. Here is my code, how can integrate pagination into my code? Thanks for your help

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import pandas as pd

import undetected_chromedriver as uc


import time




website='https://clinicaltrials.gov/ct2/results?cond=&term=&cntry=TR&state=&city=&dist='
path=r"C:\Users\kaant\Downloads\chromedriver.exe"
service=Service(executable_path=path)
#driver=uc.Chrome(service=service)
#driver.get(website)


options = uc.ChromeOptions() 
options.headless = True 
driver = uc.Chrome() 
driver.get(website) 
driver.maximize_window() 
time.sleep(2) 


b=driver.find_elements(By.XPATH, value=".//td/a")

country_links = [element.get_attribute("href") for element in
                driver.find_elements(By.XPATH, value=".//td/a")]
    
cc=country_links[1:-1:2]
cc.append(country_links[-1])

then it should go to next page and do the same

1

There are 1 best solutions below

0
Andrej Kesely On

You can use their Ajax API to download the data, for example:

import requests
from bs4 import BeautifulSoup

payload = {
    "draw": "4",
    "columns[0][data]": "0",
    "columns[0][name]": "",
    "columns[0][searchable]": "true",
    "columns[0][orderable]": "false",
    "columns[0][search][value]": "",
    "columns[0][search][regex]": "false",
    "columns[1][data]": "1",
    "columns[1][name]": "",
    "columns[1][searchable]": "false",
    "columns[1][orderable]": "false",
    "columns[1][search][value]": "",
    "columns[1][search][regex]": "false",
    "columns[2][data]": "2",
    "columns[2][name]": "",
    "columns[2][searchable]": "true",
    "columns[2][orderable]": "false",
    "columns[2][search][value]": "",
    "columns[2][search][regex]": "false",
    "columns[3][data]": "3",
    "columns[3][name]": "",
    "columns[3][searchable]": "true",
    "columns[3][orderable]": "false",
    "columns[3][search][value]": "",
    "columns[3][search][regex]": "false",
    "columns[4][data]": "4",
    "columns[4][name]": "",
    "columns[4][searchable]": "true",
    "columns[4][orderable]": "false",
    "columns[4][search][value]": "",
    "columns[4][search][regex]": "false",
    "columns[5][data]": "5",
    "columns[5][name]": "",
    "columns[5][searchable]": "true",
    "columns[5][orderable]": "false",
    "columns[5][search][value]": "",
    "columns[5][search][regex]": "false",
    "columns[6][data]": "6",
    "columns[6][name]": "",
    "columns[6][searchable]": "true",
    "columns[6][orderable]": "false",
    "columns[6][search][value]": "",
    "columns[6][search][regex]": "false",
    "columns[7][data]": "7",
    "columns[7][name]": "",
    "columns[7][searchable]": "true",
    "columns[7][orderable]": "false",
    "columns[7][search][value]": "",
    "columns[7][search][regex]": "false",
    "columns[8][data]": "8",
    "columns[8][name]": "",
    "columns[8][searchable]": "true",
    "columns[8][orderable]": "false",
    "columns[8][search][value]": "",
    "columns[8][search][regex]": "false",
    "columns[9][data]": "9",
    "columns[9][name]": "",
    "columns[9][searchable]": "true",
    "columns[9][orderable]": "false",
    "columns[9][search][value]": "",
    "columns[9][search][regex]": "false",
    "columns[10][data]": "10",
    "columns[10][name]": "",
    "columns[10][searchable]": "true",
    "columns[10][orderable]": "false",
    "columns[10][search][value]": "",
    "columns[10][search][regex]": "false",
    "columns[11][data]": "11",
    "columns[11][name]": "",
    "columns[11][searchable]": "true",
    "columns[11][orderable]": "false",
    "columns[11][search][value]": "",
    "columns[11][search][regex]": "false",
    "columns[12][data]": "12",
    "columns[12][name]": "",
    "columns[12][searchable]": "true",
    "columns[12][orderable]": "false",
    "columns[12][search][value]": "",
    "columns[12][search][regex]": "false",
    "columns[13][data]": "13",
    "columns[13][name]": "",
    "columns[13][searchable]": "true",
    "columns[13][orderable]": "false",
    "columns[13][search][value]": "",
    "columns[13][search][regex]": "false",
    "columns[14][data]": "14",
    "columns[14][name]": "",
    "columns[14][searchable]": "true",
    "columns[14][orderable]": "false",
    "columns[14][search][value]": "",
    "columns[14][search][regex]": "false",
    "columns[15][data]": "15",
    "columns[15][name]": "",
    "columns[15][searchable]": "true",
    "columns[15][orderable]": "false",
    "columns[15][search][value]": "",
    "columns[15][search][regex]": "false",
    "columns[16][data]": "16",
    "columns[16][name]": "",
    "columns[16][searchable]": "true",
    "columns[16][orderable]": "false",
    "columns[16][search][value]": "",
    "columns[16][search][regex]": "false",
    "columns[17][data]": "17",
    "columns[17][name]": "",
    "columns[17][searchable]": "true",
    "columns[17][orderable]": "false",
    "columns[17][search][value]": "",
    "columns[17][search][regex]": "false",
    "columns[18][data]": "18",
    "columns[18][name]": "",
    "columns[18][searchable]": "true",
    "columns[18][orderable]": "false",
    "columns[18][search][value]": "",
    "columns[18][search][regex]": "false",
    "columns[19][data]": "19",
    "columns[19][name]": "",
    "columns[19][searchable]": "true",
    "columns[19][orderable]": "false",
    "columns[19][search][value]": "",
    "columns[19][search][regex]": "false",
    "columns[20][data]": "20",
    "columns[20][name]": "",
    "columns[20][searchable]": "true",
    "columns[20][orderable]": "false",
    "columns[20][search][value]": "",
    "columns[20][search][regex]": "false",
    "columns[21][data]": "21",
    "columns[21][name]": "",
    "columns[21][searchable]": "true",
    "columns[21][orderable]": "false",
    "columns[21][search][value]": "",
    "columns[21][search][regex]": "false",
    "columns[22][data]": "22",
    "columns[22][name]": "",
    "columns[22][searchable]": "true",
    "columns[22][orderable]": "false",
    "columns[22][search][value]": "",
    "columns[22][search][regex]": "false",
    "columns[23][data]": "23",
    "columns[23][name]": "",
    "columns[23][searchable]": "true",
    "columns[23][orderable]": "false",
    "columns[23][search][value]": "",
    "columns[23][search][regex]": "false",
    "columns[24][data]": "24",
    "columns[24][name]": "",
    "columns[24][searchable]": "true",
    "columns[24][orderable]": "false",
    "columns[24][search][value]": "",
    "columns[24][search][regex]": "false",
    "columns[25][data]": "25",
    "columns[25][name]": "",
    "columns[25][searchable]": "true",
    "columns[25][orderable]": "false",
    "columns[25][search][value]": "",
    "columns[25][search][regex]": "false",
    "start": 0,
    "length": 100,
    "search[value]": "",
    "search[regex]": "false",
}


api_url = 'https://clinicaltrials.gov/ct2/results/rpc/Xi0yqBcxWwU59HGn'

for payload['start'] in range(0, 300, 100):  # <--- increase number of pages here
    data = requests.post(api_url, data=payload).json()
    # print only data from column 3:
    for row in data['data']:
        soup = BeautifulSoup(row[3], 'html.parser')
        print('{:<80} {}'.format(soup.a.text[:80], 'https://clinicaltrials.gov' + soup.a['href']))

Prints:

Two Different Dietary Interventions for Girls With Polycystic Ovary Syndrome     https://clinicaltrials.gov/ct2/show/NCT05768724?cntry=TR&draw=4&rank=1
Laparoscopic Extraperitoneal ( Modified) Burch Colposuspension                   https://clinicaltrials.gov/ct2/show/NCT05768607?cntry=TR&draw=4&rank=2
Whole Body Vibration Training Applied With Different Frequencies in Hypertensive https://clinicaltrials.gov/ct2/show/NCT05768555?cntry=TR&draw=4&rank=3
Assignment of the Verbal Component Score and Addition of Pupil Reaction to the G https://clinicaltrials.gov/ct2/show/NCT05767502?cntry=TR&draw=4&rank=4
Comparison of 3 Different Protocols for Preventing Premature Ovulation in Ovaria https://clinicaltrials.gov/ct2/show/NCT05767086?cntry=TR&draw=4&rank=5
Web-Based Intercultural Midwifery Training Given to Midwifery Students           https://clinicaltrials.gov/ct2/show/NCT05766228?cntry=TR&draw=4&rank=6
The Effect of Motor Imagery and Action Observation on Motor Skill Acquisition    https://clinicaltrials.gov/ct2/show/NCT05765708?cntry=TR&draw=4&rank=7
Effects of Dual-Task Training In Individuals With Alzheimer                      https://clinicaltrials.gov/ct2/show/NCT05765552?cntry=TR&draw=4&rank=8
The Effects of Topical Hyaluronic Acid and Laser Application on Wound Healing in https://clinicaltrials.gov/ct2/show/NCT05765240?cntry=TR&draw=4&rank=9
Investigation of the Efficiency of Spinal Mobilization in Patients With Adolesce https://clinicaltrials.gov/ct2/show/NCT05765019?cntry=TR&draw=4&rank=10
Endorphin Massage Application to Women Who Gave Birth Vaginally                  https://clinicaltrials.gov/ct2/show/NCT05764512?cntry=TR&draw=4&rank=11

...and so on.