I need scrapy to take an argument (-a FILE_NAME = "stuff") from the command line and apply it to the file created in my CSVWriterPipeLine in the file. (The reason I went with was because the built-in exporter was iterating over the data and repeating the header in the output file. Same code, but writing in the pipeline fixed it.)

I tried from scrapy.utils.project import get_project_settings as shown in

How to access purge settings from the Pipeline item

but I cannot change the filename from the command line.

I also tried to implement @ avleske's solution that is in the page as it specifically addresses this issue, but I don't know where to put the code it talks about in my scrapy folder.


BOT_NAME = 'internal_links'

SPIDER_MODULES = ['internal_links.spiders']
NEWSPIDER_MODULE = 'internal_links.spiders'
ITEM_PIPELINES = ['internal_links.pipelines.CsvWriterPipeline']
# Crawl responsibly by identifying yourself (and your website) on the       user-agent
USER_AGENT = 'internal_links (+'
FILE_NAME = "mytestfilename"

import csv

class CsvWriterPipeline(object):

    def __init__(self, file_name):
        header = ["URL"]
        self.file_name = file_name
        self.csvwriter = csv.writer(open(self.file_name, 'wb'))

    def process_item(self, item, internallinkspider):
        # build your row to export, then export the row
        row = [item['url']]
        return item

from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.contrib.spiders import CrawlSpider, Rule
from internal_links.items import MyItem

class MySpider(CrawlSpider):
    name = 'internallinkspider'
    allowed_domains = ['']
    start_urls = ['']

    rules = (Rule(SgmlLinkExtractor(), callback='parse_url', follow=True), )

    def parse_url(self, response):
        item = MyItem()
        item['url'] = response.url

        return item



You can use the concept of "settings" and a command line argument -s


scrapy crawl internallinkspider -s FILE_NAME="stuff"


Then in the pipeline:

import csv

class CsvWriterPipeline(object):
    def from_crawler(cls, crawler):
        settings = crawler.settings
        file_name = settings.get("FILE_NAME")
        return cls(file_name)

    def __init__(self, file_name):
        header = ["URL"]
        self.csvwriter = csv.writer(open(file_name, 'wb'))

    def process_item(self, item, internallinkspider):
        # build your row to export, then export the row
        row = [item['url']]
        return item




