Add a field JSON array


#1

Hi,

I need to scrap an array of {foo: foo, bar: bar}. The StartUrl is returning JSON, which I parse into my results array :

var $ = context.jQuery();
var myjson = JSON.parse($('body pre').text());
return myjson.listings.map(function(item) {
    return {
        foo: item.foo,
        bar: ???,
        url: item.url
    }
});

However bar is not in this JSON, I have to crawl url for each listing, and scrap bar on it.

It seems to me the referrer.pageFunctionResult approach won’t work, because all listings have the same referrer.

I think a function wouldn’t work either because of asynchrony, or would it ? And how would the crawler know when to context.finish() ?

What would be a wise approach for this kind of problem ?


#2

I got my hands back on this and got it working using the following :

function pageFunction(context) {
    var $ = context.jQuery;
    var results = [];
    if (context.request.label === 'starturl') {
        context.skipOutput(); // do not save to results
        var jsonObject = JSON.parse($('body pre').text());
        var tmpresults = jsonObject.results.map(function(item){
            return {
                foo: item.foo,
                url: item.url
            }
        });
        for (var idx in tmpresults) {
            context.enqueuePage({
                url: tmpresults[idx].url,
                label: 'details',
                interceptRequestData: {
                    foo: tmpresults[idx].foo
                }
            });
        }
    } else if (context.request.label === 'details') {
        tmpresult = context.request.interceptRequestData;
        tmpresult.bar = $('#barid').text();
        return tmpresult;
    }
}