Added a SP formatter function to output mere fragments of documents.
[wrw.git] / wrw / sp / util.py
CommitLineData
ff79cdbf
FT
1import cons
2
3def findnsnames(el):
4 names = {}
5 nid = [1]
6 def proc(el):
7 if isinstance(el, cons.element):
8 if el.ns not in names:
9 names[el.ns] = u"n" + unicode(nid[0])
10 nid[:] = [nid[0] + 1]
11 for ch in el.children:
12 proc(ch)
13 proc(el)
14 if None in names:
15 names[None] = None
16 else:
17 names[el.ns] = None
18 return names
19
20class formatter(object):
21 def __init__(self, out, root, nsnames=None, charset="utf-8", doctype=None):
22 self.root = root
23 if nsnames is None:
24 nsnames = findnsnames(root)
25 self.nsnames = nsnames
26 self.out = out
27 self.charset = charset
28 self.doctype = doctype
29
30 def write(self, text):
31 self.out.write(text.encode(self.charset))
32
33 def quotewrite(self, buf):
34 for ch in buf:
35 if ch == u'&':
36 self.write(u"&")
37 elif ch == u'<':
38 self.write(u"&lt;")
39 elif ch == u'>':
40 self.write(u"&gt;")
41 else:
42 self.write(ch)
43
44 def text(self, el):
45 self.quotewrite(el)
46
f3464a4a
FT
47 def rawcode(self, el):
48 self.write(el)
49
ff79cdbf
FT
50 def attrval(self, buf):
51 qc, qt = (u"'", u"&apos;") if u'"' in buf else (u'"', u"&quot;")
52 self.write(qc)
53 for ch in buf:
54 if ch == u'&':
55 self.write(u"&amp;")
56 elif ch == u'<':
57 self.write(u"&lt;")
58 elif ch == u'>':
59 self.write(u"&gt;")
60 elif ch == qc:
61 self.write(qt)
62 else:
63 self.write(ch)
64 self.write(qc)
65
66 def attr(self, k, v):
67 self.write(k)
68 self.write(u'=')
69 self.attrval(v)
70
71 def shorttag(self, el, **extra):
72 self.write(u'<' + self.elname(el))
73 for k, v in el.attrs.iteritems():
74 self.write(u' ')
75 self.attr(k, v)
76 for k, v in extra.iteritems():
77 self.write(u' ')
78 self.attr(k, v)
79 self.write(u" />")
80
81 def elname(self, el):
82 ns = self.nsnames[el.ns]
83 if ns is None:
84 return el.name
85 else:
86 return ns + u':' + el.name
87
88 def starttag(self, el, **extra):
89 self.write(u'<' + self.elname(el))
90 for k, v in el.attrs.iteritems():
91 self.write(u' ')
92 self.attr(k, v)
93 for k, v in extra.iteritems():
94 self.write(u' ')
95 self.attr(k, v)
96 self.write(u'>')
97
98 def endtag(self, el):
99 self.write(u'</' + self.elname(el) + u'>')
100
101 def longtag(self, el):
102 self.starttag(el, **extra)
103 for ch in el.children:
104 self.node(ch)
105 self.endtag(el)
106
107 def element(self, el, **extra):
108 if len(el.children) == 0:
109 self.shorttag(el, **extra)
110 else:
111 self.longtag(el, **extra)
112
113 def node(self, el):
114 if isinstance(el, cons.element):
115 self.element(el)
116 elif isinstance(el, cons.text):
117 self.text(el)
f3464a4a
FT
118 elif isinstance(el, cons.raw):
119 self.rawcode(el)
ff79cdbf
FT
120 else:
121 raise Exception("Unknown object in element tree: " + el)
122
123 def start(self):
124 self.write(u'<?xml version="1.0" encoding="' + self.charset + u'" ?>\n')
125 if self.doctype:
126 self.write(u'<!DOCTYPE %s PUBLIC "%s" "%s">\n' % (self.root.name,
127 self.doctype[0],
128 self.doctype[1]))
129 extra = {}
130 for uri, nm in self.nsnames.iteritems():
131 if uri is None:
132 continue
133 if nm is None:
134 extra[u"xmlns"] = uri
135 else:
136 extra[u"xmlns:" + nm] = uri
137 self.element(self.root, **extra)
138
139 @classmethod
140 def output(cls, out, el, *args, **kw):
141 cls(out=out, root=el, *args, **kw).start()
142
3f48e448
FT
143 @classmethod
144 def fragment(cls, out, el, *args, **kw):
145 cls(out=out, root=el, *args, **kw).element(el)
146
ff79cdbf
FT
147 def update(self, **ch):
148 ret = type(self).__new__(type(self))
149 ret.__dict__.update(self.__dict__)
150 ret.__dict__.update(ch)
151 return ret
152
153class iwriter(object):
154 def __init__(self, out):
155 self.out = out
156 self.atbol = True
157 self.col = 0
158
159 def write(self, buf):
160 for c in buf:
161 if c == '\n':
162 self.col = 0
163 else:
164 self.col += 1
165 self.out.write(c)
166 self.atbol = False
167
168 def indent(self, indent):
169 if self.atbol:
170 return
171 if self.col != 0:
172 self.write('\n')
173 self.write(indent)
174 self.atbol = True
175
176class indenter(formatter):
177 def __init__(self, indent=u" ", *args, **kw):
178 super(indenter, self).__init__(*args, **kw)
179 self.out = iwriter(self.out)
180 self.indent = indent
181 self.curind = u""
182
183 def simple(self, el):
184 for ch in el.children:
185 if not isinstance(ch, cons.text):
186 return False
187 return True
188
189 def longtag(self, el, **extra):
190 self.starttag(el, **extra)
191 sub = self
192 reind = False
193 if not self.simple(el):
194 sub = self.update(curind=self.curind + self.indent)
ee213a9b 195 sub.reindent()
ff79cdbf
FT
196 reind = True
197 for ch in el.children:
198 sub.node(ch)
199 if reind:
ee213a9b 200 self.reindent()
ff79cdbf
FT
201 self.endtag(el)
202
203 def element(self, el, **extra):
204 super(indenter, self).element(el, **extra)
205 if self.out.col > 80 and self.simple(el):
ee213a9b
FT
206 self.reindent()
207
208 def reindent(self):
209 self.out.indent(self.curind.encode(self.charset))
ff79cdbf
FT
210
211 def start(self):
212 super(indenter, self).start()
213 self.write('\n')