Question 1

My spider runs without displaying any errors but the images are not stored in the folder here are my scrapy files:

Spider.py:

import scrapy
import re
import os
import urlparse
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from scrapy.loader.processors import Join, MapCompose, TakeFirst
from scrapy.pipelines.images import ImagesPipeline
from production.items import ProductionItem, ListResidentialItemclass productionSpider(scrapy.Spider):name = "production"allowed_domains = ["someurl.com"]start_urls = ["someurl.com"
]def parse(self, response):for sel in response.xpath('//html/body'):item = ProductionItem()img_url = sel.xpath('//a[@data-tealium-id="detail_nav_showphotos"]/@href').extract()[0]yield scrapy.Request(urlparse.urljoin(response.url, img_url),callback=self.parseBasicListingInfo,  meta={'item': item})def parseBasicListingInfo(item, response):item = response.request.meta['item']item = ListResidentialItem()try:image_urls = map(unicode.strip,response.xpath('//a[@itemprop="contentUrl"]/@data-href').extract())item['image_urls'] = [ x for x in image_urls]except IndexError:item['image_urls'] = ''return item

settings.py:

from scrapy.settings.default_settings import ITEM_PIPELINES
from scrapy.pipelines.images import ImagesPipelineBOT_NAME = 'production'SPIDER_MODULES = ['production.spiders']
NEWSPIDER_MODULE = 'production.spiders'
DEFAULT_ITEM_CLASS = 'production.items'ROBOTSTXT_OBEY = True
DEPTH_PRIORITY = 1
IMAGE_STORE = '/images'CONCURRENT_REQUESTS = 250DOWNLOAD_DELAY = 2ITEM_PIPELINES = {'scrapy.contrib.pipeline.images.ImagesPipeline': 300,
}

items.py

# -*- coding: utf-8 -*-
import scrapyclass ProductionItem(scrapy.Item):img_url = scrapy.Field()# ScrapingList Residential & Yield Estate for sale
class ListResidentialItem(scrapy.Item):image_urls = scrapy.Field()images = scrapy.Field()pass

My pipeline file is empty i'm not sure what i am suppose to add to the pipeline.py file.

Any help is greatly appreciated.

Question 2

My Working end result:

spider.py:

import scrapy
import re
import urlparse
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from scrapy.loader.processors import Join, MapCompose, TakeFirst
from scrapy.pipelines.images import ImagesPipeline
from production.items import ProductionItem
from production.items import ImageItemclass productionSpider(scrapy.Spider):name = "production"allowed_domains = ["url"]start_urls = ["startingurl.com"]def parse(self, response):for sel in response.xpath('//html/body'):item = ProductionItem()img_url = sel.xpath('//a[@idd="followclaslink"]/@href').extract()[0]yield scrapy.Request(urlparse.urljoin(response.url, img_url),callback=self.parseImages,  meta={'item': item})def parseImages(self, response):for elem in response.xpath("//img"):img_url = elem.xpath("@src").extract_first()yield ImageItem(image_urls=[img_url])

Settings.py

BOT_NAME = 'production'SPIDER_MODULES = ['production.spiders']
NEWSPIDER_MODULE = 'production.spiders'
DEFAULT_ITEM_CLASS = 'production.items'
ROBOTSTXT_OBEY = True
IMAGES_STORE = '/Users/home/images'DOWNLOAD_DELAY = 2ITEM_PIPELINES = {'scrapy.pipelines.images.ImagesPipeline': 1}
# Disable cookies (enabled by default)

items.py

# -*- coding: utf-8 -*-
import scrapyclass ProductionItem(scrapy.Item):img_url = scrapy.Field()
# ScrapingList Residential & Yield Estate for sale
class ListResidentialItem(scrapy.Item):image_urls = scrapy.Field()images = scrapy.Field()class ImageItem(scrapy.Item):image_urls = scrapy.Field()images = scrapy.Field()

pipelines.py

import scrapy
from scrapy.pipelines.images import ImagesPipeline
from scrapy.exceptions import DropItemclass MyImagesPipeline(ImagesPipeline):def get_media_requests(self, item, info):for image_url in item['image_urls']:yield scrapy.Request(image_url)def item_completed(self, results, item, info):image_paths = [x['path'] for ok, x in results if ok]if not image_paths:raise DropItem("Item contains no images")item['image_paths'] = image_pathsreturn item

Scrapy Images Downloading

Related Q&A

A full and minimal example for a class (not method) with Python C Extension?

Python: Grouping into timeslots (minutes) for days of data

signal.alarm not triggering exception on time

Execute Python (selenium) script in crontab

Get post data from ajax post request in python file

How to implement maclaurin series in keras?

Rowwise min() and max() fails for column with NaNs

Convert column suffixes from pandas join into a MultiIndex

sys-package-mgr*: cant create package cache dir when run python script with Jython

Python WWW macro