XML 쓰기 위한 도구를 Python

https://stackoverflow.com/questions/56229

09-06-2019
|

문제

내가 하려는 현재 ElementTree 고 그것은,그것을 탈출 HTML 엔터티 및 등등.나는 뭔가가 정말 멋진 나 들리지 않았는가?

이것은 내가 실제로 일:

import xml.etree.ElementTree as ET
root = ET.Element('html')
head = ET.SubElement(root,'head')
script = ET.SubElement(head,'script')
script.set('type','text/javascript')
script.text = "var a = 'I love &aacute; letters'"
body = ET.SubElement(root,'body')
h1 = ET.SubElement(body,'h1')
h1.text = "And I like the fact that 3 > 1"
tree = ET.ElementTree(root)
tree.write('foo.xhtml')

# more foo.xhtml
<html><head><script type="text/javascript">var a = 'I love &amp;aacute;
letters'</script></head><body><h1>And I like the fact that 3 &gt; 1</h1>
</body></html>

해결책

내가 상황을 가정하여 설명하고 있습니다 실제로 만드는 XML DOM 트리기 때문에,당신은 유효성을 검증하려는 무엇이 들어가 이 파일은 유효한 XML 때문에,그렇지 않으면 당신은 단지 작성 정적 문자열하는 파일입니다.는 경우를 검증하는 출력이 실제로 당신의 목표는 다음요

from xml.dom.minidom import parseString

doc = parseString("""<html>
    <head>
        <script type="text/javascript">
            var a = 'I love &amp;aacute; letters'
        </script>
    </head>
    <body>
        <h1>And I like the fact that 3 &gt; 1</h1>
    </body>
    </html>""")

with open("foo.xhtml", "w") as f:
    f.write( doc.toxml() )

이것은 당신이 쓸 원하는 XML 을 출력하고,검증하는 그것의 올바른(이후 parseString 예외가 발생하면 그것은 잘못된)그리고 당신의 코드는 모습이 훨씬 더 좋습니다.

아마도 당신은 단지 쓰는 동일한 정적 XML 매 시간과 원하는 변경하십시오.이 경우에서 나는 라인이 같은

var a = '%(message)s'

다음 사용하%자 하는 대체처럼,

</html>""" % {"message": "I love &amp;aacute; letters"})

다른 팁

또 다른 방법은 사용 E Factory 빌더에서 lxml(에서 사용 가능 Elementtree 도)

>>> from lxml import etree

>>> from lxml.builder import E

>>> def CLASS(*args): # class is a reserved word in Python
...     return {"class":' '.join(args)}

>>> html = page = (
...   E.html(       # create an Element called "html"
...     E.head(
...       E.title("This is a sample document")
...     ),
...     E.body(
...       E.h1("Hello!", CLASS("title")),
...       E.p("This is a paragraph with ", E.b("bold"), " text in it!"),
...       E.p("This is another paragraph, with a", "\n      ",
...         E.a("link", href="http://www.python.org"), "."),
...       E.p("Here are some reserved characters: <spam&egg>."),
...       etree.XML("<p>And finally an embedded XHTML fragment.</p>"),
...     )
...   )
... )

>>> print(etree.tostring(page, pretty_print=True))
<html>
  <head>
    <title>This is a sample document</title>
  </head>
  <body>
    <h1 class="title">Hello!</h1>
    <p>This is a paragraph with <b>bold</b> text in it!</p>
    <p>This is another paragraph, with a
      <a href="http://www.python.org">link</a>.</p>
    <p>Here are some reservered characters: &lt;spam&amp;egg&gt;.</p>
    <p>And finally an embedded XHTML fragment.</p>
  </body>
</html>

항상 거기에 SimpleXMLWriter, 의 일부 ElementTree 구하지 않습니다.인터페이스가 죽은 간단합니다.

예를 들어 다음과 같습니다.

from elementtree.SimpleXMLWriter import XMLWriter
import sys

w = XMLWriter(sys.stdout)
html = w.start("html")

w.start("head")
w.element("title", "my document")
w.element("meta", name="generator", value="my application 1.0")
w.end()

w.start("body")
w.element("h1", "this is a heading")
w.element("p", "this is a paragraph")

w.start("p")
w.data("this is ")
w.element("b", "bold")
w.data(" and ")
w.element("i", "italic")
w.data(".")
w.end("p")

w.close(html)

https://github.com/galvez/xmlwitch:

import xmlwitch
xml = xmlwitch.Builder(version='1.0', encoding='utf-8')
with xml.feed(xmlns='http://www.w3.org/2005/Atom'):
    xml.title('Example Feed')
    xml.updated('2003-12-13T18:30:02Z')
    with xml.author:
        xml.name('John Doe')
    xml.id('urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6')
    with xml.entry:
        xml.title('Atom-Powered Robots Run Amok')
        xml.id('urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a')
        xml.updated('2003-12-13T18:30:02Z')
        xml.summary('Some text.')
print(xml)

지 않는 실제로 원하는 무언가 다음과 같:

html(head(script(type='text/javascript', content='var a = ...')),
body(h1('And I like the fact that 3 < 1'), p('just some paragraph'))

내 생각엔 무언가를 좋아하는 곳이다.이 할 수 있습니다.

편집: 실제로 갔고 썼 라이브러리를 하는 오늘 그: magictree

당신처럼 사용할 수 있습니다:

from magictree import html, head, script, body, h1, p
root = html(
         head(
           script('''var a = 'I love &amp;aacute; letters''', 
                  type='text/javascript')),
         body(
           h1('And I like the fact that 3 > 1')))

# root is a plain Element object, like those created with ET.Element...
# so you can write it out using ElementTree :)
tree = ET.ElementTree(root)
tree.write('foo.xhtml')

직 magictree 거짓말하는 방법에 가져오기 작동:이 Element 공장할 때 만들어 필요합니다.가 보 원,그것은 답변에 따라 다른 유래 질문.

내가 사용하여 종료 saxutils.탈출(str)를 생성하는 유효한 XML 문자열한 후 유효성을 검사하 엘리와 함께 접근해야하는 없었 태그

from xml.sax import saxutils
from xml.dom.minidom import parseString
from xml.parsers.expat import ExpatError

xml = '''<?xml version="1.0" encoding="%s"?>\n
<contents title="%s" crawl_date="%s" in_text_date="%s" 
url="%s">\n<main_post>%s</main_post>\n</contents>''' %
(self.encoding, saxutils.escape(title), saxutils.escape(time), 
saxutils.escape(date), saxutils.escape(url), saxutils.escape(contents))
try:
    minidoc = parseString(xml)
catch ExpatError:
    print "Invalid xml"

누군가를 위해가 발생이 있었습니다 실제로 이렇게 하는 방법에 숨겨져 있는 파이썬의 표준 라이브러리 xml.sax.utils.XMLGenerator.여기에서 그것의 예에서 작업:

>>> from xml.sax.saxutils import XMLGenerator
>>> import StringIO
>>> w = XMLGenerator(out, 'utf-8')
>>> w.startDocument()
>>> w.startElement("test", {'bar': 'baz'})
>>> w.characters("Foo")
>>> w.endElement("test")
>>> w.endDocument()
>>> print out.getvalue()
<?xml version="1.0" encoding="utf-8"?>
<test bar="baz">Foo</test>

도 http://uche.ogbuji.net/tech/4suite/amara.그것은 아주 전체가 바로 앞으로의 설정에 액세스 도구입니다.정상적인 유니코드 지원,등등.

#
#Output the XML entry
#
def genFileOLD(out,label,term,idval):
    filename=entryTime() + ".html"
    writer=MarkupWriter(out, indent=u"yes")
    writer.startDocument()
    #Test element and attribute writing
    ans=namespace=u'http://www.w3.org/2005/Atom'
    xns=namespace=u'http://www.w3.org/1999/xhtml'
    writer.startElement(u'entry',
       ans,
       extraNss={u'x':u'http://www.w3.org/1999/xhtml' ,
                 u'dc':u'http://purl.org/dc/elements/1.1'})
    #u'a':u'http://www.w3.org/2005/Atom',
    #writer.attribute(u'xml:lang',unicode("en-UK"))

    writer.simpleElement(u'title',ans,content=unicode(label))
    #writer.simpleElement(u'a:subtitle',ans,content=u' ')
    id=unicode("http://www.dpawson.co.uk/nodesets/"+afn.split(".")[0])
    writer.simpleElement(u'id',ans,content=id)
    writer.simpleElement(u'updated',ans,content=unicode(dtime()))
    writer.startElement(u'author',ans)
    writer.simpleElement(u'name',ans,content=u'Dave ')
    writer.simpleElement(u'uri',ans,
      content=u'http://www.dpawson.co.uk/nodesets/'+afn+".xml")
    writer.endElement(u'author')
    writer.startElement(u'category', ans)
    if (prompt):
        label=unicode(raw_input("Enter label "))
    writer.attribute(u'label',unicode(label))
    if (prompt):
        term = unicode(raw_input("Enter term to use "))
    writer.attribute(u'term', unicode(term))
    writer.endElement(u'category')
    writer.simpleElement(u'rights',ans,content=u'\u00A9 Dave 2005-2008')
    writer.startElement(u'link',ans)
    writer.attribute(u'href',
         unicode("http://www.dpawson.co.uk/nodesets/entries/"+afn+".html"))
    writer.attribute(u'rel',unicode("alternate"))
    writer.endElement(u'link')
    writer.startElement(u'published', ans)
    dt=dtime()
    dtu=unicode(dt)
    writer.text(dtu)
    writer.endElement(u'published')
    writer.simpleElement(u'summary',ans,content=unicode(label))
    writer.startElement(u'content',ans)
    writer.attribute(u'type',unicode("xhtml"))
    writer.startElement(u'div',xns)
    writer.simpleElement(u'h3',xns,content=unicode(label))
    writer.endElement(u'div')
    writer.endElement(u'content')
    writer.endElement(u'entry')

라이센스 : CC-BY-SA ~와 함께 속성

제휴하지 않습니다 StackOverflow