I'm trying to parse imdb movie connections (https://www.imdb.com/title/tt0090887/movieconnections/), but more button don't load info. I'm using got-scraping as HTTP-CLIENT and cheerio as parser, but I'can't load the show more button.
This is the code I've used:
// Loading the dependencies. We don't need pretty
// because we shall not log html to the terminal
const cheerio = require("cheerio");
const fs = require("fs");
const {exit} = require("process");
var HTMLParser = require("node-html-parser");
const {gotScraping} = require("got-scraping");
// Async function which scrapes the data
const scrapeData = async function (id) {
try {
const url =
"https://www.imdb.com/title/" + id + "/movieconnections/";
// Fetch HTML of the page we want to scrape
let data = await gotScraping.get(url);
// Load HTML we fetched in the previous line
const $ = cheerio.load(data.body);
// Select all the list items in plainlist class
var urls = [];
var connections = [];
var i = 0;
let es_peli = "";
let list = $(".ipc-page-grid__item").html();
// initializing the data structure
// that will contain the scraped data
let tot = [];
let conexions = [];
// scraping the section
$(".ipc-page-grid__item")
.find(".ipc-page-section")
.each((index, element) => {
let conex;
let cat = $(element)
.find(".ipc-title__text > span")
.attr("id");
// extracting the data of
if (cat != undefined) {
$(element)
.find("ul.ipc-metadata-list > li ")
.each((k, elem) => {
//de cada categoria (followed_by) extraemos las conexiones
title_conex = $(elem)
.find("ul.ipc-inline-list > div > div > p")
.text();
href_conex = $(elem)
.find("ul.ipc-inline-list > div > div > p > a")
.attr("href");
description = $(elem).find(
"ul.ipc-inline-list > div > div "
).length;
conexions.push({
titol: title_conex,
href: href_conex,
desc: description,
});
});
tot[cat] = conexions;
conexions = [];
}
});
} catch (err) {
console.error(err);
}
};
