1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
|
#
# ElementTree
# $Id: SimpleXMLTreeBuilder.py 3225 2007-08-27 21:32:08Z fredrik $
#
# A simple XML tree builder, based on Python's xmllib
#
# Note that due to bugs in xmllib, this builder does not fully support
# namespaces (unqualified attributes are put in the default namespace,
# instead of being left as is). Run this module as a script to find
# out if this affects your Python version.
#
# history:
# 2001-10-20 fl created
# 2002-05-01 fl added namespace support for xmllib
# 2002-08-17 fl added xmllib sanity test
#
# Copyright (c) 1999-2004 by Fredrik Lundh. All rights reserved.
#
# [email protected]
# http://www.pythonware.com
#
# --------------------------------------------------------------------
# The ElementTree toolkit is
#
# Copyright (c) 1999-2007 by Fredrik Lundh
#
# By obtaining, using, and/or copying this software and/or its
# associated documentation, you agree that you have read, understood,
# and will comply with the following terms and conditions:
#
# Permission to use, copy, modify, and distribute this software and
# its associated documentation for any purpose and without fee is
# hereby granted, provided that the above copyright notice appears in
# all copies, and that both that copyright notice and this permission
# notice appear in supporting documentation, and that the name of
# Secret Labs AB or the author not be used in advertising or publicity
# pertaining to distribution of the software without specific, written
# prior permission.
#
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
# OF THIS SOFTWARE.
# --------------------------------------------------------------------
##
# Tools to build element trees from XML files, using <b>xmllib</b>.
# This module can be used instead of the standard tree builder, for
# Python versions where "expat" is not available (such as 1.5.2).
# <p>
# Note that due to bugs in <b>xmllib</b>, the namespace support is
# not reliable (you can run the module as a script to find out exactly
# how unreliable it is on your Python version).
##
import xmllib, string
import ElementTree
##
# ElementTree builder for XML source data.
#
# @see elementtree.ElementTree
class TreeBuilder(xmllib.XMLParser):
def __init__(self, html=0, target=None, encoding=None):
self.__builder = ElementTree.TreeBuilder()
if html:
import htmlentitydefs
self.entitydefs.update(htmlentitydefs.entitydefs)
xmllib.XMLParser.__init__(self)
##
# Feeds data to the parser.
#
# @param data Encoded data.
def feed(self, data):
xmllib.XMLParser.feed(self, data)
##
# Finishes feeding data to the parser.
#
# @return An element structure.
# @defreturn Element
def close(self):
xmllib.XMLParser.close(self)
return self.__builder.close()
def handle_data(self, data):
self.__builder.data(data)
handle_cdata = handle_data
def unknown_starttag(self, tag, attrs):
attrib = {}
for key, value in attrs.items():
attrib[fixname(key)] = value
self.__builder.start(fixname(tag), attrib)
def unknown_endtag(self, tag):
self.__builder.end(fixname(tag))
def fixname(name, split=string.split):
# xmllib in 2.0 and later provides limited (and slightly broken)
# support for XML namespaces.
if " " not in name:
return name
return "{%s}%s" % tuple(split(name, " ", 1))
if __name__ == "__main__":
import sys
# sanity check: look for known namespace bugs in xmllib
p = TreeBuilder()
text = """\
<root xmlns='default'>
<tag attribute='value' />
</root>
"""
p.feed(text)
tree = p.close()
status = []
# check for bugs in the xmllib implementation
tag = tree.find("{default}tag")
if tag is None:
status.append("namespaces not supported")
if tag is not None and tag.get("{default}attribute"):
status.append("default namespace applied to unqualified attribute")
# report bugs
if status:
print "xmllib doesn't work properly in this Python version:"
for bug in status:
print "-", bug
else:
print "congratulations; no problems found in xmllib"
|