Extracting Disqus comments using python

218 Views Asked by At

I want to scrape disqus comments but it does not appear since it is on iframe I did search online but some were using other languages or API requiring public and secret key is there a way to do it with python?

I am using bs4 and python requests this is my current code

from bs4 import BeautifulSoup
import requests
import json
import MySQLdb


headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en-US,en;q=0.9',
    'Connection': 'keep-alive',
    'DNT': '1',
    'Host': 'kolnovel.com',
    'sec-ch-ua': '"Google Chrome";v="105", "Not)A;Brand";v="8", "Chromium";v="105"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'none',
    'Sec-Fetch-User': '?1',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/???? Safari/537.36'
}
data = {
    'sitekey': '6LczjqQeAAAAAPZAEeL8qpc8-Hgi0iPVpD6ZSR-u',
    'token': '03AIIukzh0JOVba9CKdlX0NvOZwbzNzDfvlQ2f2ZeCY5DF6HMIFOihWbqEUtmrlrQ3vDGPx8EuCO3wls1q2m-Z9uFOSIDeoIDOiG4QOsv0S2ceL4JX9WrQFO2ZI7my97ZTWCS3rmn9CjNxZOmreEJX2PzDnRzjfCBqMfiMLvYbZ8QiW-04-HPsy_E0nbiGSbO7OYHGungumE4eE-Glt2mi9jiwqXsDvVPozZSWPrXcv4jGAQeRaTUdp1SbkFYdImB-EoO2YOahmQGtQ5WUGYeGiviuq0FDAQxsSXDRq4fzgSyBkYI4njRnOSX6eIHrYcQUNyEUL6s2xsoF2bX68rJ42xxcER83u9O3mb1Y4Yckgjbo0mfzbk7JBn5gi1bOYG5MzE05BLeEh5J_3fvdD1A72TP5bxjzgSN56fzg9UhCg2tYYVt6dW1h31_l9hXLJMD9-kztZIm72Xlj0cHOUsJH8JFgxeQhDbTYvp_sv5kplPvOHh3U2HEsC1d9U9lHwWUF7U1NAPABmF_JarsasrOhbFtefarcpynFf4XFYL4dveA3IpfSjMI2x2-pXEOOFLEYjiDmdTOHnJRrSoVuekENfVRc5pctQamv8A',
    'recaptcha_for_all_button': ''
}


link = 'https://kolnovel.com/swallowed-star-75403/'
html_text = requests.post(link, data=data, headers=headers).text
soup = BeautifulSoup(html_text, 'lxml')
postJson = json.loads(requests.get(soup.find_all('link')[7]['href'], headers=headers).text)
next = soup.find_all('a')[16]['href']

and this is the link I am trying to scrape https://kolnovel.com/swallowed-star-76124/

this is some related data not sure if it would help

this.page.url = "https://kolnovel.com/swallowed-star-76124/";
this.page.identifier = "76124 https://kolnovel.com/?p=76124";
this.page.title = "swallowed star 13";
s.src = 'https://https-rewayatthima-blogspot-com-eg.disqus.com/embed.js';

0

There are 0 best solutions below