How to crawl all the links in a single page

#1

Is it possible to crawl all the links shown in the screen shot , can any one help ??

#2

Hi @mprajapati944,

yes it is possible, it looks like typical use case for Apify Crawler. You can set up crawler for it. A good start is our Getting started tutorial.

If you got stuck somewhere during implementations let me know, I’ll try to help you.

Best,
Jakub D.

#3

Thanks @drobnikj, can you help me to solve the error

Now it shows this error : Error invoking user-provided ‘pageFunction’: Error: TypeError: undefined is not a function (evaluating ‘$(’.jss92 li’)’).

I wrote this , but end up having above error ,

function pageFunction(context) {

var category = [];
var category_value;
var i,j;
('.jss92 li').each(function(i){ category_value = (this).text();

    var obj = {category_value:category_value};
    category.push(obj);

});

if(context.request.label === “category”)
{
var noresult = $(’.jss2 h1’).text();

for(j=0;j<=category.length();i++){
    
    if(noresult == "No Results Available"){
       return;
   }    
    var get_pageno =  $('.search-results-footer .visible-xs').text();
    var pages = get_pageno.split("/");
     
    for(i=2;i<=pages[1];i++){
            context.enqueuePage({
             url: "https://www.bbb.org/en/ca/ab/calgary/category/"+category_value+"?page="+i,
            });
    }
}

}

if (context.request.label === "details")
{
     var $ = context.jQuery;
     var manufacturer = []; 
     context.skipLinks();   
     $('address').find('br').remove();
     $('address').text().replace(/"/g, '');
     var company   = $('.address').find('h1').text().trim();
     var contact   = $('.address').find('h3').text().trim();
     var address   = $('address').text().trim();
     var emails     = $('.business-buttons').find('a').eq(0).attr('href');
     var email;
     if(emails){
             email = emails.substring(7, 100);
     }else{
             email = " "; 
     }
     var website   = $('.business-buttons').find('a').eq(1).attr('href');
     
      var obj = {
         
         company:company,
         contact:contact,
         address:address,
         email:email,
         website:website
         
     }
    manufacturer.push(obj);    
}else{
    
    context.skipOutput();
}

return manufacturer;
}

#4

i wrote this but having error

Error invoking user-provided ‘pageFunction’: Error: TypeError: undefined is not a function (evaluating ‘$(’.jss92 li’)’).

var category = [];
var category_value;
var i,j;
('.jss92 li').each(function(i){ category_value = (this).text();

    var obj = {category_value:category_value};
    category.push(obj);

});

if(context.request.label === “category”)
{
var noresult = $(’.jss2 h1’).text();

for(j=0;j<=category.length();i++){
    
    if(noresult == "No Results Available"){
       return;
   }    
    var get_pageno =  $('.search-results-footer .visible-xs').text();
    var pages = get_pageno.split("/");
     
    for(i=2;i<=pages[1];i++){
            context.enqueuePage({
             url: "https://www.bbb.org/en/ca/ab/calgary/category/"+category_value+"?page="+i,
            });
    }
}

}

if (context.request.label === "details")
{
     var $ = context.jQuery;
     var manufacturer = []; 
     context.skipLinks();   
     $('address').find('br').remove();
     $('address').text().replace(/"/g, '');
     var company   = $('.address').find('h1').text().trim();
     var contact   = $('.address').find('h3').text().trim();
     var address   = $('address').text().trim();
     var emails     = $('.business-buttons').find('a').eq(0).attr('href');
     var email;
     if(emails){
             email = emails.substring(7, 100);
     }else{
             email = " "; 
     }
     var website   = $('.business-buttons').find('a').eq(1).attr('href');
      var obj = {
         company:company,
         contact:contact,
         address:address,
         email:email,
         website:website
     }
    manufacturer.push(obj);    
}else{
    context.skipOutput();
}

return manufacturer;

#5

Can u help me ??

Error invoking user-provided ‘pageFunction’: Error: TypeError: undefined is not a function (evaluating ‘$(’.jss92 li’)’).

#6

Hi @mprajapati944,

you probably deleted var $ = context.jQuery; from your pageFunction. It causes that jquery is undefinied.

#7

I have posting the code in parts

function pageFunction(context) {
var = context.jQuery; var category = []; var category_value; var i,j; (’.jss92 li’).each(function(i){ category_value = (this).text(); var obj = {category_value:category_value}; category.push(obj); }); var noresult = (’.jss2 h1’).text();

for(j=0; j<= 100;i++){
if(noresult == “No Results Available”){
return;
}
var get_pageno = $(’.search-results-footer .visible-xs’).text();
var pages = get_pageno.split("/");
for(i=2;i<=pages[1];i++){
context.enqueuePage({
url: “https://www.bbb.org/en/ca/ab/calgary/category/"+category_value+"?page=”+i,
});
}
}

if (context.request.label === “details”)
{
var manufacturer = [];
context.skipLinks();
('address').find('br').remove(); (‘address’).text().replace(/"/g, ‘’);
var company = ('.address').find('h1').text().trim(); var contact = (’.address’).find(‘h3’).text().trim();
var address = ('address').text().trim(); var emails = (’.business-buttons’).find(‘a’).eq(0).attr(‘href’);
var email;
if(emails){
email = emails.substring(7, 100);
}else{
email = " ";
}
var website = $(’.business-buttons’).find(‘a’).eq(1).attr(‘href’);
var obj = {
company:company,
contact:contact,
address:address,
email:email,
website:website
}
manufacturer.push(obj);
}else{
context.skipOutput();
}
return manufacturer;
}

#8

Hello @mprajapati944,
you need to have this on the second page of the code:

  var $ = context.jQuery;

That will be probably the problem that @drobnikj pointed out.

Let me know if that works or if you need some help with that.

Best,
Vaclav

#9

@rut.vaclav still not able to solve it .

Also i donot understand how to write var $ = context.jQuery for second page

#10

You are missing $ there.

As Vasek and me mention you have to have
var $ = context.jQuery;
on the top of your page function.

Best,
Jakub D.