Log returning Forbidden Error 403, API call failed


#1

Hi,

I am scraping this page.
When I click the “Load more…” button at the bottom, the URL does not change. The page makes a xhr request when the button is clicked and also calls an authentication token before the xhr request. When I run my current crawler the log returns a Forbidden error 403.

This is my code:
function pageFunction(context) {

var = context.jQuery; var result = []; var _category = (".crumb___gkCdp:eq(1)").find(‘a:eq(0)’).text();
var _subcategory = (".crumb___gkCdp:eq(2)").find('a:eq(0)').text(); var _subsubcategory = (".crumbs___SEVo6").find(".currentPage___2fU_q").text();
var loadMoreButton = $(‘button.primary___2xk2l.button___2UT_5’);
result = [];
var extractData = function(page) {

        var token = "https://www.waitrose.com/api/authentication-prod/v3/authentication/token";
        var api = "https://www.waitrose.com/api/content-prod/v2/cms/publish/productcontent/browse/-1?clientType=WEB_APP" + page;
    
        $.ajax({  
            token : token,
            url: api,  
            type: 'GET',
            dataType: 'text'
        })
        
        .done(function(data, textStatus, jqXHR) {
            console.log("Data fetched, page:" + page);
            
            var html = JSON.parse(data).html;
            var status = JSON.parse(data).status;

            var jQueryObject = $.parseHTML("<div>" + html + "</div>");
            
            $(jQueryObject).find(".content___2QGG5").each( function() {
if (_category === "") {
    result.push({
        title : $(this).find(".name___2sgmL").text(),
        price : $(this).find(".prices___1JkR4").find('span:eq(1)').text(),
        image : $(this).find('img').attr('src'),
        
        category : $.trim(_subsubcategory),
        }); 
    }
    
else if (_subcategory === "") {
      result.push({
        title : $(this).find(".name___2sgmL").text(),
        price : $(this).find(".prices___1JkR4").find('span:eq(1)').text(),
        image : $(this).find('img').attr('src'),
        
        category : $.trim(_category),
        subcategory : $.trim(_subsubcategory),
        });
    }

else {
       result.push({
        title : $(this).find(".name___2sgmL").text(),
        price : $(this).find(".prices___1JkR4").find('span:eq(1)').text(),
        image : $(this).find('img').attr('src'),
        
        category : $.trim(_category),
        subcategory : $.trim(_subcategory),
        subsubcategory : $.trim(_subsubcategory),
        });
    }
    
    if (loadMoreButton.hasClass('disabled')) {
        context.finish(result);
    } else {
        console.log('Click next button');
        loadMoreButton[0].click();
        setTimeout(scrapeAndClick, 1000);
    }
    
            });

            if (status === "noresults") {
                context.finish(result);
                return;
            } else {
                extractData(++page);    
                return;
            }
        })
        .fail(function(xhr, textStatus, errorThrown) {
            console.log("API call failed");
            console.log(errorThrown);
            console.log(textStatus);
            context.finish();
        });
    };
    extractData(1);
    context.willFinishLater();
}

Any way to resolve this error? Thanks!


#2

Hello @Benjamin_Doyle,
have you tried to enable the Disable web security in advanced settings of the crawler?
Also you need that token for the POST, so you need to do a GET to the URL https://www.waitrose.com/api/authentication-prod/v3/authentication/token and from the response save the token and that pass as in a header of the POST request.
Give it a shot.
Best,
Vaclav