Python eTree Parser isn't appending an element

https://stackoverflow.com/questions/16467640

21-04-2022
|

Frage

Look at my log and see how it says that the row I'm getting back from Postgres has been turned from a string into an element (and I print the string, print the element, print the isElement boolean!) and yet when I try to append it, the error is that it's not an element. Huff, puff.

import sys
from HTMLParser import HTMLParser
from xml.etree import cElementTree as etree
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import Element, SubElement, tostring
import psycopg2
import psycopg2.extras

def main():
    # Connect to an existing database
    conn = psycopg2.connect(dbname="**", user="**", password="**", host="/tmp/", port="**")

    # Open a cursor to perform database operations
    cur = conn.cursor(cursor_factory = psycopg2.extras.RealDictCursor)

    cur.execute("SELECT * FROM landingpagedata;")
    rows = cur.fetchall()

    class LinksParser(HTMLParser):
      def __init__(self):
          HTMLParser.__init__(self)
          self.tb = etree.TreeBuilder()

      def handle_starttag(self, tag, attributes):
          self.tb.start(tag, dict(attributes))

      def handle_endtag(self, tag):
          self.tb.end(tag)

      def handle_data(self, data):
          self.tb.data(data)

      def close(self):
          HTMLParser.close(self)
          return self.tb.close()

    template = 'template.html'



    # parser.feed(open('landingIndex.html').read()) #for testing
    # root = parser.close()

    for row in rows:
        parser = LinksParser()

        parser.feed(open(template).read())
        root = parser.close()




        #title
        title = root.find(".//title")
        title.text = row['title']

        #headline
        h1_id_headline = root.find(".//h1")
        h1_id_headline.text = row['h1_id_headline']
        # print row['h1_id_headline']

        #intro
        p_class_intro = root.find(".//p[@class='intro']")
        p_class_intro.text = row['p_class_intro']
        # print row['p_class_intro']

Here is where the problems occur!

        #recommended
        p_class_recommendedbackground = root.find(".//div[@class='recommended_background_div']")
        print p_class_recommendedbackground
        p_class_recommendedbackground.clear()
        newElement = ET.fromstring(row['p_class_recommendedbackground'])
        print row['p_class_recommendedbackground']
        print ET.iselement(newElement)
        p_class_recommendedbackground.append(newElement)

        html = tostring(root)
        f = open(row['page_name'], 'w').close()
        f = open(row['page_name'], 'w')
        f.write(html)
        f.close()
        # f = ''
        # html = ''
        parser.reset()
        root = ''

    # Close communication with the database
    cur.close()
    conn.close()

if __name__ == "__main__":
  main()

My log is this:

{background: url(/images/courses/azRealEstate.png) center no-repeat;}
<Element 'div' at 0x10a999720>
<p class="recommended_background">Materials are are aimed to all aspiring real estate sales associates who wish to obtain the Arizona Real Estate Salesperson license, which is provided by the <a href="http://www.re.state.az.us/" style="text-decoration: underline;">Arizona Department of Real Estate</a>.</p>
True
Traceback (most recent call last):
  File "/Users/Morgan13/Programming/LandingPageBuilder/landingPages/landingBuilderTest.py", line 108, in <module> main()
  File "/Users/Morgan13/Programming/LandingPageBuilder/landingPages/landingBuilderTest.py", line 84, in main
    p_class_recommendedbackground.append(newElement)
TypeError: must be Element, not Element
[Finished in 0.1s with exit code 1]

Lösung

I can reproduce the error message this way:

from xml.etree import cElementTree as etree
import xml.etree.ElementTree as ET

croot = etree.Element('root')
child = ET.Element('child')
croot.append(child)
# TypeError: must be Element, not Element

The root cause of the problem is that we are mixing the cElementTree implementation of ElementTree with the xml.etree.ElementTree implementation of ElementTree. Never the twain should meet.

So the fix is simply to pick one, say etree, and replace all occurrences of the other (e.g. replace ET with etree).

Lizenziert unter: CC-BY-SA mit Zuschreibung

Nicht verbunden mit StackOverflow