How to scrape all the image url and alt tag within it using scrapy

Question 1

Here is the below code using which I achieved the result , but depth is still 1

class MySpider(CrawlSpider):
name = 'imageaggr'
start_urls = ['http://www.dmoz.org/','http://timesofindia.indiatimes.com/','http://www.nytimes.com','http://www.washingtonpost.com/','http://www.jpost.com','http://www.rediff.com/']

rules = (
    # Extract links matching 'category.php' (but not matching 'subsection.php')
    # and follow links from them (since no callback means follow=True by default).

    Rule(SgmlLinkExtractor(allow=('', ), deny=('defghi\.txt')), callback='parse_item'),

    # Extract links matching 'item.php' and parse them with the spider's method parse_item
   # Rule(SgmlLinkExtractor(allow=('\.cms','\.html' )), deny=('parse_item\.html'))),


    #Rule(SgmlLinkExtractor(allow=('news', )), callback='parse_item'),
)

def parse_item(self, response):
 sel = Selector(response)
 images = sel.xpath('//img')
 image_count = len(images)
 count = 0
 while(count < image_count):
    item = imageItem()
    item['url'] = response.url
    title = sel.xpath('//img/@alt').extract()[count] or ''
    if title == '':
     break
    item['title'] = title
    iurl = sel.xpath('//img/@src').extract()[count] or ''
    item['iurl'] = iurl
    item['crawl_time'] = time.asctime( time.localtime(time.time()))
    crawl_date = time.strftime("%Y%m%d")
    item['crawl_date'] = crawl_date
    count = count + 1
    return item

Question 2

Some issues there:

You already have sel selector. But you use hxs in the loop
in the loop, you are using node instead of img
does it make more sense that each loop should yield one image item

This is my tested and working code:

def parse_item(self, response):
    sel = Selector(response)
    images = sel.xpath('//img') 
    for img in images: 
        item = imageItem()        

        item['url'] = response.url
        title = img.xpath('./@alt').extract() or ''
        item_title = title[0] if title else ''
        item['title'] = item_title

        iurl = img.xpath('./@src').extract() or ''            
        item_iurl = iurl[0] if iurl else ''
        item['iurl'] = item_iurl
        yield item