Encode named HTML entities with Python
http://beckism.com/2009/03/named_entities_python/
There is also a django app for both decoding and encoding:
https://github.com/cobrateam/python-htmlentities
For Python 2.x (Change to html.entities.codepoint2name
in Python 3.x):
'''
Registers a special handler for named HTML entities
Usage:
import named_entities
text = u'Some string with Unicode characters'
text = text.encode('ascii', 'named_entities')
'''
import codecs
from htmlentitydefs import codepoint2name
def named_entities(text):
if isinstance(text, (UnicodeEncodeError, UnicodeTranslateError)):
s = []
for c in text.object[text.start:text.end]:
if ord(c) in codepoint2name:
s.append(u'&%s;' % codepoint2name[ord(c)])
else:
s.append(u'&#%s;' % ord(c))
return ''.join(s), text.end
else:
raise TypeError("Can't handle %s" % text.__name__)
codecs.register_error('named_entities', named_entities)