質問

I'm trying to subclass Scrapy's XPathSelector and patch in support for CSS3 selectors.

XPathSelector is defined like this:

class XPathSelector(object_ref):
    __slots__ = ['doc', 'xmlNode', 'expr', '__weakref__']

    def __init__(self, response=None, text=None, node=None, parent=None, expr=None):
        if parent is not None:
            self.doc = parent.doc
        ...

I subclass XPathSelector and override __init__:

class CSSSelector(XPathSelector):
    def __init__(self, *args, **kwargs):
        translator = kwargs.get('translator', 'html').lower()

        if 'translator' in kwargs:
            del kwargs['translator']

        super(XPathSelector, self).__init__(*args, **kwargs)

When I try to use CSSSelector, I get AttributeErrors errors for doc, xmlNode and expr. Manually adding in those slots into CSSSelector doesn't help either.

What is the proper way to subclass a class with __slot__s?


My complete code is here:

"""
Extends `XPathSelector` to allow CSS3 selectors via the `cssselect` library.
"""

from cssselect import HTMLTranslator, GenericTranslator
from scrapy.selector import XPathSelector, XPathSelectorList

__all__ = ['CSSSelector', 'CSSSelectorList']

class CSSSelector(XPathSelector):
    __slots__ = ['doc', 'xmlNode', 'expr', 'translator']

    def __init__(self, *args, **kwargs):
        translator = kwargs.get('translator', 'html').lower()

        if 'translator' in kwargs:
            del kwargs['translator']

        super(CSSSelector, self).__init__(*args, **kwargs)

        if translator == 'html':
            self.translator = HTMLTranslator()
        elif translator == 'xhtml':
            self.translator = HTMLTranslator(xhtml=True)
        elif translator == 'xml':
            self.translator = GenericTranslator()
        else:
            raise ValueError("Invalid translator: %s. Valid translators are 'html' (default), 'xhtml' and 'xml'." % translator)

    def _select_xpath(self, xpath):
        if hasattr(self.xmlNode, 'xpathEval'):
            self.doc.xpathContext.setContextNode(self.xmlNode)
            xpath = unicode_to_str(xpath, 'utf-8')

            try:
                xpath_result = self.doc.xpathContext.xpathEval(xpath)
            except libxml2.xpathError:
                raise ValueError("Invalid XPath: %s" % xpath)

            if hasattr(xpath_result, '__iter__'):
                return CSSSelectorList([self.__class__(node=node, parent=self, expr=xpath) for node in xpath_result])
            else:
                return CSSSelectorList([self.__class__(node=xpath_result, parent=self, expr=xpath)])
        else:
            return CSSSelectorList([])

    def select(self, selector):
        xpath = self.translator.css_to_xpath(selector)

        return self._select_xpath(xpath)

    def attribute(self, name):
        return self._select_xpath('self::@' + name)

    def text(self):
        return self._select_xpath('self::text()')

class CSSSelectorList(XPathSelectorList):
    def attribute(self, name):
        return [x.attribute(name) for x in self]

    def text(self, name):
        return [x.text() for x in self]

I can initialize the class just fine:

>>> css_selector = CSSSelector(response)

But I get AttributeErrors all over the place:

>>> css_selector.select('title')
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-150-d21b0f17d4cc> in <module>()
----> 1 css_selector.select('title')

<ipython-input-147-c855c7eaf9fa> in select(self, selector)
     57 
     58 
---> 59         return self._select_xpath(xpath)
     60 
     61 

<ipython-input-147-c855c7eaf9fa> in _select_xpath(self, xpath)
     34 
     35     def _select_xpath(self, xpath):
---> 36         if hasattr(self.xmlNode, 'xpathEval'):
     37             self.doc.xpathContext.setContextNode(self.xmlNode)
     38             xpath = unicode_to_str(xpath, 'utf-8')

AttributeError: xmlNode
役に立ちましたか?

解決

There is nothing wrong with using __slots__. The problem is that you don't call __init__ of XPathSelector from subclass.

Instead of super(XPathSelector, self) there should be super(CSSSelector, self):

class CSSSelector(XPathSelector):
    def __init__(self, *args, **kwargs):
        # ...
        super(CSSSelector, self).__init__(*args, **kwargs)

See a good topic on super in Python: Understanding Python super() with __init__() methods.

UPD.

For what it's worth, if you extend a class with __slots__, generally you should add __slots__ in a subclass too, at least an empty one. Otherwise, a per-instance dictionary will be created anyway, making __slots__ of base class effectively useless. From Python Reference:

The action of a __slots__ declaration is limited to the class where it is defined. As a result, subclasses will have a __dict__ unless they also define __slots__ (which must only contain names of any additional slots).

ライセンス: CC-BY-SA帰属
所属していません StackOverflow
scroll top