Revision: 17583
Updated Code
at September 10, 2009 11:54 by manatlan
Updated Code
from htmlentitydefs import name2codepoint as n2cp import re def substitute_entity(match): ent = match.group(3) if match.group(1) == "#": if match.group(2) == '': return unichr(int(ent)) elif match.group(2) == 'x': return unichr(int('0x'+ent, 16)) else: cp = n2cp.get(ent) if cp: return unichr(cp) else: return match.group() def decode_htmlentities(string): entity_re = re.compile(r'&(#?)(x?)(\w+);') return entity_re.subn(substitute_entity, string)[0]
Revision: 17582
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at September 10, 2009 03:11 by manatlan
Initial Code
from htmlentitydefs import name2codepoint as n2cp import re def substitute_entity(match): ent = match.group(2) if match.group(1) == "#": return unichr(int(ent)) else: cp = n2cp.get(ent) if cp: return unichr(cp) else: return match.group() def decode_htmlentities(string): entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});") return entity_re.subn(substitute_entity, string)[0]
Initial URL
Initial Description
ex : decode_htmlentities("l'eau")
Initial Title
decode html entities
Initial Tags
html, python, text
Initial Language
Python