Parsing IMDB movie connections page with load more button

73 Views Asked by At

I'm trying to parse imdb movie connections (https://www.imdb.com/title/tt0090887/movieconnections/), but more button don't load info. I'm using got-scraping as HTTP-CLIENT and cheerio as parser, but I'can't load the show more button.

enter image description here

This is the code I've used:

// Loading the dependencies. We don't need pretty
// because we shall not log html to the terminal
const cheerio = require("cheerio");
const fs = require("fs");
const {exit} = require("process");
var HTMLParser = require("node-html-parser");
const {gotScraping} = require("got-scraping");

// Async function which scrapes the data
const scrapeData = async function (id) {
  try {
    const url =
      "https://www.imdb.com/title/" + id + "/movieconnections/";
    // Fetch HTML of the page we want to scrape
    let data = await gotScraping.get(url);

    // Load HTML we fetched in the previous line
    const $ = cheerio.load(data.body);
    // Select all the list items in plainlist class
    var urls = [];
    var connections = [];
    var i = 0;
    let es_peli = "";
    let list = $(".ipc-page-grid__item").html();

    // initializing the data structure
    // that will contain the scraped data

    let tot = [];
    let conexions = [];

    // scraping the section
    $(".ipc-page-grid__item")
      .find(".ipc-page-section")
      .each((index, element) => {
        let conex;
        let cat = $(element)
          .find(".ipc-title__text > span")
          .attr("id");

        // extracting the data of

        if (cat != undefined) {
          $(element)
            .find("ul.ipc-metadata-list > li ")
            .each((k, elem) => {
              //de cada categoria (followed_by) extraemos las conexiones

              title_conex = $(elem)
                .find("ul.ipc-inline-list > div > div > p")
                .text();
              href_conex = $(elem)
                .find("ul.ipc-inline-list > div > div > p > a")
                .attr("href");
              description = $(elem).find(
                "ul.ipc-inline-list > div > div "
              ).length;
              conexions.push({
                titol: title_conex,
                href: href_conex,
                desc: description,
              });
            });

          tot[cat] = conexions;
          conexions = [];
        }
      });
  } catch (err) {
    console.error(err);
  }
};
0

There are 0 best solutions below