How to get full address for google map listing

#1

Hello ,

How to scrap this in google listings , please can anybody guide me through it

I want to get the full address, check the image with section marked red.

Please find the link : https://www.google.com/search?safe=off&q=accountants+london&npsic=0&rflfq=1&rlha=0&rllag=51541758,-169706,7089&tbm=lcl&ved=0ahUKEwjM76rt1v3aAhUBLo8KHZYfBUoQjGoIfw&tbs=lrf:!2m1!1e2!2m1!1e3!3sIAE,lf:1,lf_ui:2&rldoc=1#rlfi=hd:;si:;mv:!1m3!1d66792.1966013642!2d-0.1035778!3d51.54175864999999!2m3!1f0!2f0!3f0!3m2!1i306!2i323!4f13.1;tbs:lrf:!2m1!1e2!2m1!1e3!3sIAE,lf:1,lf_ui:2

I have managed to get the left side

function pageFunction(context) {
var = context.jQuery; console.log();
var club_details = [];
$(’.VkpGBb’).each(function(i){

     var club_name  = $(this).find('.dbg0pd').eq('0').text(); 
     var club_desc1 = $(this).find('.rllt__details').find('div').eq('0').text();
     var club_desc2 = $(this).find('.rllt__details').find('div').eq('1').text();
     var club_desc3 = $(this).find('.rllt__details').find('div').eq('2').text();
     var club_desc4 = $(this).find('.rllt__details').find('div').eq('3').text();
     var club_desc5 = $(this).find('.rllt__details').find('div').eq('4').text();
     var club_web   =  $(this).find('a').eq(1).attr('href');
    
     
      var obj = {
         
         clubname:club_name,
         clubdesc1:club_desc1,
         clubdesc2:club_desc2,
         clubdesc3:club_desc3,
         clubdesc4:club_desc4,
         clubdesc5:club_desc5,
         club_web:club_web
         
         
     }
     club_web = "";
    club_details.push(obj);      
});

return club_details;

}

#2

Can any body help ??

#3

Hi @virtualqube,

I found one solution. You can enqueue page with opened detail to your queue as next page and them scrape it.
You need to add parameter si:4568900; to url. You can find it in element with attribute data-cid.

There is redesign pageFunction which can help you:

function pageFunction(context) {
    // called on every page the crawler visits, use it to extract data from it
    var $ = context.jQuery;
    console.log($);
  
    
    if (context.request.label === 'DETAIL') {
        var detail = {
            address: $('.LrzXr').text()
        };
        return detail;
    } else {
        // var club_na = $('.rllt__details').find('div').eq('0').text();
        //var club_ma = $('.rllt__details').find('div').eq('1').text();
        //var club_ba = $('.rllt__details').find('div').eq('3').text();
        //var club_da = $('.rllt__details').find('div').eq('2').text();
        //var club_ca = $('.rllt__details').find('div').eq('4').text();
        //cXedhc
        //VkpGBb
        $('.VkpGBb').each(function(i){
            // gets si from each club and enqueue it as new page
            var si = $(this).find('[data-cid]').eq('0').attr('data-cid');
            var url = context.request.loadedUrl.replace('si:;', 'si:'+si+';');
            console.log(context.request.loadedUrl);
            console.log(url)
            context.enqueuePage({
                url: url,
                uniqueKey: si,
                label: 'DETAIL',
                queuePosition: 'FIRST'
            });
        });
        context.skipOutput();
    }
}

I hope it helps.

#4

@drobnikj it worked , thanks a lot. I am learning the scrapping it helped me…

#5

@drobnikj , it solved but , output stops even if pages are crawled.
Check the number of pages crawled but output only 250 why does it stops outputting

function pageFunction(context) {

var $ = context.jQuery;
console.log($);
 
 var Business_listings = [];

var Business_name;
var Business_description;
var Business_address;
var Business_timings;
var Business_contact;
var Business_website;


if (context.request.label === 'DETAIL') {

  setTimeout(function(){
       
        Business_name  = $('.d1rFIf').text(); 
        Business_description = $('.YhemCb').text();
        Business_address = $('.LrzXr').text();
        Business_timings = $('.TLou0b').text();
        Business_contact = $('.kno-fv').text();
        Business_website = $('.LJOFid').attr('href');
        
        var obj = {
             
             Business_name:Business_name,
             Business_description:Business_description,
             Business_address:Business_address,
             Business_timings:Business_timings,
             Business_contact:Business_contact,
             Business_website:Business_website
             
         }
       
      Business_listings.push(obj); 
       
      context.finish(Business_listings);
      }, 2100);
      context.willFinishLater();
 
  
   
    return Business_listings;
   
} else {
   
    $('.VkpGBb').each(function(i){
        
        var si = $(this).find('[data-cid]').eq('0').attr('data-cid');
        var url = "https://www.google.com/search?safe=off&q=accountants+london&npsic=0&rflfq=1&rlha=0&rllag=51541758,-169706,7089&tbm=lcl&ved=0ahUKEwjM76rt1v3aAhUBLo8KHZYfBUoQjGoIfw&tbs=lrf:!2m1!1e2!2m1!1e3!3sIAE,lf:1,lf_ui:2&rldoc=1#rlfi=hd:;si:"+si+";mv:!1m3!1d66792.1966013642!2d-0.1035778!3d51.54175864999999!2m3!1f0!2f0!3f0!3m2!1i306!2i323!4f13.1;tbs:lrf:!2m1!1e2!2m1!1e3!3sIAE,lf:1,lf_ui:2"
       
        context.enqueuePage({
            url: url,
            uniqueKey: si,
            label: 'DETAIL',
            queuePosition: 'FIRST'
        });
    });
    context.skipOutput();
}

}

#6

You are using context.willFinishLater(); on a wrong place. It should be called before timeout function.

function pageFunction(context) {

var $ = context.jQuery;
console.log($);
 
 var Business_listings = [];

var Business_name;
var Business_description;
var Business_address;
var Business_timings;
var Business_contact;
var Business_website;


if (context.request.label === 'DETAIL') {

  context.willFinishLater();
  setTimeout(function(){
       
        Business_name  = $('.d1rFIf').text(); 
        Business_description = $('.YhemCb').text();
        Business_address = $('.LrzXr').text();
        Business_timings = $('.TLou0b').text();
        Business_contact = $('.kno-fv').text();
        Business_website = $('.LJOFid').attr('href');
        
        var obj = {
             
             Business_name:Business_name,
             Business_description:Business_description,
             Business_address:Business_address,
             Business_timings:Business_timings,
             Business_contact:Business_contact,
             Business_website:Business_website
             
         }
       
      Business_listings.push(obj); 
       
      context.finish(Business_listings);
      }, 2100);
 
  
   
    return Business_listings;
   
} else {
   
    $('.VkpGBb').each(function(i){
        
        var si = $(this).find('[data-cid]').eq('0').attr('data-cid');
        var url = "https://www.google.com/search?safe=off&q=accountants+london&npsic=0&rflfq=1&rlha=0&rllag=51541758,-169706,7089&tbm=lcl&ved=0ahUKEwjM76rt1v3aAhUBLo8KHZYfBUoQjGoIfw&tbs=lrf:!2m1!1e2!2m1!1e3!3sIAE,lf:1,lf_ui:2&rldoc=1#rlfi=hd:;si:"+si+";mv:!1m3!1d66792.1966013642!2d-0.1035778!3d51.54175864999999!2m3!1f0!2f0!3f0!3m2!1i306!2i323!4f13.1;tbs:lrf:!2m1!1e2!2m1!1e3!3sIAE,lf:1,lf_ui:2"
       
        context.enqueuePage({
            url: url,
            uniqueKey: si,
            label: 'DETAIL',
            queuePosition: 'FIRST'
        });
    });
    context.skipOutput();
}
}
#7

@drobnikj no change it still stops at the same point …

#8

Hi @virtualqube,

Did you check the screen from the crawler. Because for me is shows, that Google blocks our IPs:

It this case, I recommend using some dedicates proxy IPs, we can offer you some if you want to, just let us know in chat on Apify page.