then not working as expected in JS promise

385 Views Asked by At

I'm trying to extract text from a pdf and then return a number that represents how many pages of the pdf are matched by a regex that I define.

My problem is that, rather than periodically checking whether or not the text of a single page is part of the match, my function divides the pieces up into smaller sections than pages. Count is meant to increment only after an entire page has been read.

getnopages: function(){
     var fulltext = ""
     var partialmatch;       
     var somerx = /something/
     return pdfjs.getDocument(data).then(function(pdf) {
     var pages = [];
     pageNumbers = [];
     for (var i = 0; i <= 6; i++) {
         pages.push(i);
     }
     var found = false;
     var count = 1;
     return Promise.all(pages.map(function(pageNumber) {
         pageNumbers.push(pageNumber);

         return pdf.getPage(pageNumber + 1).then(function(page) 

             return page.getTextContent().then(function(textContent) {
                 return textContent.items.map(function(item) {
                     fulltext+=item.str+'&&&';

                     return item.str;
                 }).join('&&&');
             });
         }).then(function(){
             count++;
             console.log('the count is ' + count)
             var partialmatch;
             try {
                 partialmatch = fulltext.match(somerx)[0]
                 console.log('the match: ' + partialmatch)
                 var full = fulltext.slice(0, fulltext.length-3)
             console.log('the full text ' + full)
             if (fulltext && partialmatch!==full && !found){
             found = true;
             console.log('now we found our number: ' + count)   // this finds where the full text differs from the partial text but returns a number too large to be a page number
             }                   
         }
         catch(e){
             console.log(e)
         }                               


         });             
     }));
}

Can anyone help me figure out how to rewrite this so that count is incrementing page numbers correctly?

1

There are 1 best solutions below

0
On

I don't really know where is the problem in your code but I just suggest you to avoid too many nestings with promises. You can reduce nesting by chaining your promise like below:

getnopages: function() {

    var somerx = /something/

    return pdfjs.getDocument(data).then(function(pdf) {     

        var pages = [];
        pageNumbers = [];
        for (var i = 0; i <= 6; i++) {
            pages.push(i);
        }
        var found = false;
        var count = 1;

        var promises = pages.map(pageNumber => {
            pageNumbers.push(pageNumber);
            return pdf.getPage(pageNumber + 1).then(page => {
                return page.getTextContent();
            }).then(textContent => {
                return textContent.items.map(item => { 
                    fulltext += item.str +'&&&'; 
                    return item.str;
                }).join('&&&');
            });
        });
        return Promise.all(promises).then(() => {
            ... 
        });

    });
}