XML أدوات الكتابة عن الثعبان

https://stackoverflow.com/questions/56229

09-06-2019
|

سؤال

أنا حاليا أحاول ElementTree ويبدو على ما يرام ، فإنه يهرب HTML الكيانات وهلم جرا وهكذا دواليك.أنا في عداد المفقودين شيء رائع حقا لم أسمع ؟

هذا هو على غرار ما أقوم به فعلا:

import xml.etree.ElementTree as ET
root = ET.Element('html')
head = ET.SubElement(root,'head')
script = ET.SubElement(head,'script')
script.set('type','text/javascript')
script.text = "var a = 'I love &aacute; letters'"
body = ET.SubElement(root,'body')
h1 = ET.SubElement(body,'h1')
h1.text = "And I like the fact that 3 > 1"
tree = ET.ElementTree(root)
tree.write('foo.xhtml')

# more foo.xhtml
<html><head><script type="text/javascript">var a = 'I love &amp;aacute;
letters'</script></head><body><h1>And I like the fact that 3 &gt; 1</h1>
</body></html>

المحلول

أفترض أن هذا هو في الواقع خلق XML DOM شجرة ، لأنك ترغب في التحقق من أن ما يدور في هذا الملف XML صالحة لأن خلاف ذلك كنت مجرد كتابة ثابت سلسلة إلى ملف.إذا كان التحقق من صحة الإخراج الخاص بك هو في الواقع الهدف الخاص بك ، ثم أقترح

from xml.dom.minidom import parseString

doc = parseString("""<html>
    <head>
        <script type="text/javascript">
            var a = 'I love &amp;aacute; letters'
        </script>
    </head>
    <body>
        <h1>And I like the fact that 3 &gt; 1</h1>
    </body>
    </html>""")

with open("foo.xhtml", "w") as f:
    f.write( doc.toxml() )

هذا يتيح لك مجرد كتابة XML تريد إخراج التحقق من أنه من الصحيح (منذ parseString رفع استثناء إذا كان غير صالح) و الكود الخاص بك تبدو أجمل بكثير.

ويفترض أن كنت لا مجرد كتابة نفس ثابت XML في كل مرة تريد بعض الاستبدال.في هذه الحالة يجب خطوط مثل

var a = '%(message)s'

ومن ثم استخدام % مشغل للقيام الاستبدال ، مثل

</html>""" % {"message": "I love &amp;aacute; letters"})

نصائح أخرى

طريقة أخرى هي استخدام هـ مصنع باني من lxml (متوفر في Elementtree جدا)

>>> from lxml import etree

>>> from lxml.builder import E

>>> def CLASS(*args): # class is a reserved word in Python
...     return {"class":' '.join(args)}

>>> html = page = (
...   E.html(       # create an Element called "html"
...     E.head(
...       E.title("This is a sample document")
...     ),
...     E.body(
...       E.h1("Hello!", CLASS("title")),
...       E.p("This is a paragraph with ", E.b("bold"), " text in it!"),
...       E.p("This is another paragraph, with a", "\n      ",
...         E.a("link", href="http://www.python.org"), "."),
...       E.p("Here are some reserved characters: <spam&egg>."),
...       etree.XML("<p>And finally an embedded XHTML fragment.</p>"),
...     )
...   )
... )

>>> print(etree.tostring(page, pretty_print=True))
<html>
  <head>
    <title>This is a sample document</title>
  </head>
  <body>
    <h1 class="title">Hello!</h1>
    <p>This is a paragraph with <b>bold</b> text in it!</p>
    <p>This is another paragraph, with a
      <a href="http://www.python.org">link</a>.</p>
    <p>Here are some reservered characters: &lt;spam&amp;egg&gt;.</p>
    <p>And finally an embedded XHTML fragment.</p>
  </body>
</html>

هناك دائما SimpleXMLWriter, جزء من ElementTree أدوات.واجهة بسيطة القتلى.

هنا مثال:

from elementtree.SimpleXMLWriter import XMLWriter
import sys

w = XMLWriter(sys.stdout)
html = w.start("html")

w.start("head")
w.element("title", "my document")
w.element("meta", name="generator", value="my application 1.0")
w.end()

w.start("body")
w.element("h1", "this is a heading")
w.element("p", "this is a paragraph")

w.start("p")
w.data("this is ")
w.element("b", "bold")
w.data(" and ")
w.element("i", "italic")
w.data(".")
w.end("p")

w.close(html)

https://github.com/galvez/xmlwitch:

import xmlwitch
xml = xmlwitch.Builder(version='1.0', encoding='utf-8')
with xml.feed(xmlns='http://www.w3.org/2005/Atom'):
    xml.title('Example Feed')
    xml.updated('2003-12-13T18:30:02Z')
    with xml.author:
        xml.name('John Doe')
    xml.id('urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6')
    with xml.entry:
        xml.title('Atom-Powered Robots Run Amok')
        xml.id('urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a')
        xml.updated('2003-12-13T18:30:02Z')
        xml.summary('Some text.')
print(xml)

لا كنت فعلا تريد شيئا مثل:

html(head(script(type='text/javascript', content='var a = ...')),
body(h1('And I like the fact that 3 < 1'), p('just some paragraph'))

أعتقد أنني رأيت شيئا من هذا القبيل في مكان ما.هذا سيكون رائعا.

تحرير: في الواقع ، لقد ذهبت كتب المكتبة اليوم أن تفعل فقط: magictree

يمكنك استخدامه مثل هذا:

from magictree import html, head, script, body, h1, p
root = html(
         head(
           script('''var a = 'I love &amp;aacute; letters''', 
                  type='text/javascript')),
         body(
           h1('And I like the fact that 3 > 1')))

# root is a plain Element object, like those created with ET.Element...
# so you can write it out using ElementTree :)
tree = ET.ElementTree(root)
tree.write('foo.xhtml')

السحر في magictree يكمن في كيفية استيراد يعمل:على Element المصانع التي تم إنشاؤها عند الحاجة.لديك انظر المصدر, هو استنادا إلى إجابة أخرى ستاكوفيرفلوو السؤال.

انتهى بي الأمر باستخدام saxutils.الهروب(str) لتوليد صالح XML سلاسل ثم التحقق من ذلك مع ايلي نهج متأكد أنني لم تفوت أي الوسم

from xml.sax import saxutils
from xml.dom.minidom import parseString
from xml.parsers.expat import ExpatError

xml = '''<?xml version="1.0" encoding="%s"?>\n
<contents title="%s" crawl_date="%s" in_text_date="%s" 
url="%s">\n<main_post>%s</main_post>\n</contents>''' %
(self.encoding, saxutils.escape(title), saxutils.escape(time), 
saxutils.escape(date), saxutils.escape(url), saxutils.escape(contents))
try:
    minidoc = parseString(xml)
catch ExpatError:
    print "Invalid xml"

لأي شخص يواجه هذا الآن, في الواقع هناك طريقة للقيام بذلك مخبأة بعيدا في بيثون المكتبة القياسية في xml.ساكس.utils.XMLGenerator.وهنا مثال على ذلك في العمل:

>>> from xml.sax.saxutils import XMLGenerator
>>> import StringIO
>>> w = XMLGenerator(out, 'utf-8')
>>> w.startDocument()
>>> w.startElement("test", {'bar': 'baz'})
>>> w.characters("Foo")
>>> w.endElement("test")
>>> w.endDocument()
>>> print out.getvalue()
<?xml version="1.0" encoding="utf-8"?>
<test bar="baz">Foo</test>

محاولة http://uche.ogbuji.net/tech/4suite/amara.انها كاملة تماما ويحتوي على التوالي إلى الأمام مجموعة من أدوات الوصول.عادي دعم يونيكود ، إلخ.

#
#Output the XML entry
#
def genFileOLD(out,label,term,idval):
    filename=entryTime() + ".html"
    writer=MarkupWriter(out, indent=u"yes")
    writer.startDocument()
    #Test element and attribute writing
    ans=namespace=u'http://www.w3.org/2005/Atom'
    xns=namespace=u'http://www.w3.org/1999/xhtml'
    writer.startElement(u'entry',
       ans,
       extraNss={u'x':u'http://www.w3.org/1999/xhtml' ,
                 u'dc':u'http://purl.org/dc/elements/1.1'})
    #u'a':u'http://www.w3.org/2005/Atom',
    #writer.attribute(u'xml:lang',unicode("en-UK"))

    writer.simpleElement(u'title',ans,content=unicode(label))
    #writer.simpleElement(u'a:subtitle',ans,content=u' ')
    id=unicode("http://www.dpawson.co.uk/nodesets/"+afn.split(".")[0])
    writer.simpleElement(u'id',ans,content=id)
    writer.simpleElement(u'updated',ans,content=unicode(dtime()))
    writer.startElement(u'author',ans)
    writer.simpleElement(u'name',ans,content=u'Dave ')
    writer.simpleElement(u'uri',ans,
      content=u'http://www.dpawson.co.uk/nodesets/'+afn+".xml")
    writer.endElement(u'author')
    writer.startElement(u'category', ans)
    if (prompt):
        label=unicode(raw_input("Enter label "))
    writer.attribute(u'label',unicode(label))
    if (prompt):
        term = unicode(raw_input("Enter term to use "))
    writer.attribute(u'term', unicode(term))
    writer.endElement(u'category')
    writer.simpleElement(u'rights',ans,content=u'\u00A9 Dave 2005-2008')
    writer.startElement(u'link',ans)
    writer.attribute(u'href',
         unicode("http://www.dpawson.co.uk/nodesets/entries/"+afn+".html"))
    writer.attribute(u'rel',unicode("alternate"))
    writer.endElement(u'link')
    writer.startElement(u'published', ans)
    dt=dtime()
    dtu=unicode(dt)
    writer.text(dtu)
    writer.endElement(u'published')
    writer.simpleElement(u'summary',ans,content=unicode(label))
    writer.startElement(u'content',ans)
    writer.attribute(u'type',unicode("xhtml"))
    writer.startElement(u'div',xns)
    writer.simpleElement(u'h3',xns,content=unicode(label))
    writer.endElement(u'div')
    writer.endElement(u'content')
    writer.endElement(u'entry')

مرخصة بموجب: CC-BY-SA مع الإسناد

لا تنتمي إلى StackOverflow