Pergunta

I have a bit of trouble getting into this whole async stuff. I'm using pdf.js to read the contents of a pdf file. Everything is working, however the execution order is causing me troubles. This is the code I have:

function getText(data) {
    var contents = [];
    PDFJS.getDocument(data).then(function(pdf) {
        var numPages = pdf.pdfInfo.numPages;
        for (var i = 1; i <= numPages; i++) {
            pdf.getPage(i).then(function(page) {
                page.getTextContent().then(function(content) {
                    contents.concat(content.bidiTexts);
                })
            });
        }
    })
}

This is working as intended, however, I would like to pass contents when all pages are processed and their bidiTexts are concated to contents. If I just put a function call with contents as argument before the last closing } then it gets called to early.

Foi útil?

Solução

Try using promises:

function getText(data) {
    var contents = [];
    var promises = [];

    PDFJS.getDocument(data).then(function(pdf) {
        var numPages = pdf.pdfInfo.numPages;

        for (var i = 1; i <= numPages; i++) {
            var deferred = $.Deferred(); //Create a deferred object
            promises.push(deferred.promise()); //push promise to the list

            pdf.getPage(i).then(function(page) {
                page.getTextContent().then(function(content) {
                    contents.concat(content.bidiTexts);
                    deferred.resolve(); //resolve the deferred object
                })
            });
        }

        $.when.apply($,promises).then(function(){ //callback executed when all deferreds are resolved
            //do your task with contents
        });
    })
}

This is just a demo how to use promises. In real applications, you have to take care of errors by using deferred.reject and handle it in the second callback to $.when

Outras dicas

you can use async/await

async function getText(data) {
    var contents = [];
    var pdf = await PDFJS.getDocument(data);

    var numPages = pdf.pdfInfo.numPages;

    for (var i = 1; i <= numPages; i++) {
      var page = await pdf.getPage(i);
      var content = await page.getTextContent();

      contents.concat(content.bidiTexts);      
    }    
}

Note that getText will return a promise, you can use async /await again to access the result or getText(data).then(result => console.log(result));

if you want to get the pages in parallel, use Promise.All then await for the .getContent() :

async function getText(data) {
  var pdf = await PDFJS.getDocument(data)
  var numPages = pdf.pdfInfo.numPages;

  var promises = [];

  for (var i = 1; i <= numPages; i++) {
    promises.push(pdf.getPage(i));
  }

  var result = await Promise.All(promises)
    .then(pages => pages.map(page => await page.getTextContent()));

  return result;
}
Licenciado em: CC-BY-SA com atribuição
Não afiliado a StackOverflow
scroll top