سؤال

وهذا هو المثال BaseSpider من البرنامج التعليمي Scrapy:

from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector

from dmoz.items import DmozItem

class DmozSpider(BaseSpider):
   domain_name = "dmoz.org"
   start_urls = [
       "http://www.dmoz.org/Computers/Programming/Languages/Python/Books/",
       "http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/"
   ]

   def parse(self, response):
       hxs = HtmlXPathSelector(response)
       sites = hxs.select('//ul[2]/li')
       items = []
       for site in sites:
           item = DmozItem()
           item['title'] = site.select('a/text()').extract()
           item['link'] = site.select('a/@href').extract()
           item['desc'] = site.select('text()').extract()
           items.append(item)
       return items

SPIDER = DmozSpider()

وأنا نسخ مع التغييرات لمشروعي:

from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.selector import HtmlXPathSelector
from scrapy.item import Item
from firm.items import FirmItem

class Spider1(CrawlSpider):
    domain_name = 'wc2'
    start_urls = ['http://www.whitecase.com/Attorneys/List.aspx?LastName=A']

    def parse(self, response):
        hxs = HtmlXPathSelector(response)
        sites = hxs.select('//td[@class="altRow"][1]/a/@href').re('/.a\w+')
        items = []
        for site in sites:
            item = FirmItem
            item['school'] = hxs.select('//td[@class="mainColumnTDa"]').re('(JD)(.*?)(\d+)')
            items.append(item)
        return items

SPIDER = Spider1()    

ووأحصل على الخطأ

[wc2] ERROR: Spider exception caught while processing   
<http://www.whitecase.com/Attorneys/List.aspx?LastName=A> (referer: <None>): 
[Failure instance: Traceback: <type 'exceptions.TypeError'>: 
'ItemMeta' object does not support item assignment

وسأكون ممتنا لو الخبراء هنا نلقي نظرة على رمز وتعطيني فكرة عن أين أنا ذاهب خاطئ.

وشكرا

هل كانت مفيدة؟

المحلول

وربما كنت تعني item = FirmItem() بدلا من item = FirmItem؟

مرخصة بموجب: CC-BY-SA مع الإسناد
لا تنتمي إلى StackOverflow
scroll top