Which XPath to use when cleaning up the Google Play store?

I am browsing the google play store to view applications. I can only get 40 reviews. The problem is the xhr path when reaching the scrapy throws error:

Http status code not processed or resolved

code:

import scrapy
from scrapy.exceptions import CloseSpider
from scrapy.spider import BaseSpider
from scrapy.http import Request


class Product(scrapy.Item):
    brand = scrapy.Field()
    title = scrapy.Field()


class aqaqspider(BaseSpider):
    name = "gaana"
    allowed_domains = ["play.google.com"]
    start_urls = [
        "https://play.google.com/store/apps/details?id=com.gaana&hl=en",
    ]
    page = 1


    def parse(self, response):
        products = response.xpath('//div[@class="single-review"]')

        if not products:
            raise CloseSpider("No more products!")

        for product in products:
            item = Product()
            #item['brand'] = product.xpath(".//span[contains(@class, 'qa-brandName')]/text()").extract()[0].strip()
            item['title'] = product.xpath('.//.//div/div/span[@class="author-name"]/a/text()').extract()[0].strip()
            yield item

        self.page += 1
        yield Request(url="https://play.google.com/store/getreviews?authuser=1" ,
                      headers={"Referer": "https://play.google.com/store/apps/details?id=com.gaana&hl=en", "X-Requested-With": "XMLHttpRequest"},
                      callback=self.parse, 
                      dont_filter=True)

      

Please don't say this is against the Terms of Service. I know this, but I need to learn and move on. I am not using anything.

+3


source to share





All Articles