I cannot see the content when I wait for it to load the dynamic content of my page


#1

Hi. I have implemented the content of this post:

But the content is still not visible (the “Extras” field):

This is my code:

function pageFunction(context) {
    var $ = context.jQuery;
    var startedAt = Date.now();
    if (context.request.label === 'PAGE') {
    var searchResults = 
    window.__INITIAL_PROPS__.initialSearch.result.realEstates;
    for (var i = 0; i < searchResults.length; i++) {
        var result = searchResults[i];
        context.enqueuePage({ url: 'https://www.fotocasa.es' + result.detail['es-ES'], label: 'DETAIL' });
    }
    var nextPage = $('link[data-react-helmet="true"][rel="next"]');
    if (nextPage.length && nextPage.attr('href')) context.enqueuePage({ url: nextPage.attr('href'), label: 'PAGE' });
} else {
     var extractData = function() {
    // timeout after 10 seconds
    if( Date.now() - startedAt > 10000 ) {
        context.finish("Timed out before #my_element was loaded");
        return;
    }

    // if my element still hasn't been loaded, wait a little more
    if( $('.re-RealestateDetail').length === 0 ) {
        setTimeout(extractData, 500);
        return;
    }

    // refresh page screenshot and HTML for debugging
    context.saveSnapshot();

    // save a result
    var results = [];
    $(".re-RealestateDetail").each(function() {
        results.push({
            titulo: $(this).find(".re-DetailHeader-propertyTitle").text(),
            precio: $(this).find(".re-DetailHeader-price").text().trim(),
            zona: $(this).find(".re-Breadcrumb-text").text().trim(),
            descripcion: $(this).find(".fc-DetailDescription").text().trim(),
            basico: $(this).find(".re-DetailHeader-features li").map(function(){return $(this).text();}).get().join(';'),
            caracteristicas: $(this).find(".detail-section-content li").map(function(){return $(this).text();}).get().join(';'),              
            extras: $(this).find(".detail-section-content li").map(function(){return $(this).text();}).get().join(';'),
            referencia: $(this).find(".re-DetailReference").text().trim(),
            particular: $(this).find(".re-ContactDetail-particularName").text().trim()
        });
    });
     context.finish(results);
};

// tell the crawler that pageFunction will finish asynchronously
context.willFinishLater();

extractData();

}

}

Any ideas to fix this?

Thanks!


#2

Hi,

I can happen. Can you send me link you want to scrape?
I try to help with better CSS selector.

You can also try to use “Infinite scroll height” option, it helps sometimes to load async content.


#3

Hi,

the page is:
https://www.fotocasa.es/es/comprar/casas/bilbao/todas-las-zonas/l

And the detail page are the page inside the links of each of the results.

Some of the content is not visible when the page is loaded.

Thanks :slight_smile:


#4

Hello,

I looked over your code and it seems it could be easily migrated to Apify Web Scraper, our new scraping solution. It’s just better in every way and I think it would easily solve the problem you’re having. You would just need to rewrite few lines of your code.

Check it out at: https://apify.com/apify/web-scraper
Full Tutorial: https://apify.com/docs/scraping/web-scraper-tutorial
Part on Dynamic content: https://apify.com/docs/scraping/web-scraper-tutorial#waiting-for-dynamic-content