Commit | Line | Data |
---|---|---|
a5e6bd24 FT |
1 | package dolda.jsvc.next; |
2 | ||
3 | import java.io.*; | |
4 | import java.util.*; | |
5 | import org.w3c.dom.*; | |
6 | import org.w3c.dom.bootstrap.*; | |
7 | ||
8 | public class Parser { | |
9 | private static final DOMImplementation domimp; | |
10 | ||
11 | static { | |
12 | DOMImplementationRegistry reg; | |
13 | try { | |
14 | reg = DOMImplementationRegistry.newInstance(); | |
15 | } catch(Exception e) { | |
16 | throw(new Error(e)); | |
17 | } | |
18 | DOMImplementation di = reg.getDOMImplementation(""); | |
19 | if(di == null) | |
20 | throw(new RuntimeException("Could not get a DOM implemenation")); | |
21 | domimp = di; | |
22 | } | |
23 | ||
24 | private static boolean namechar(char c) { | |
25 | return((c == ':') || (c == '_') || (c == '$') || (c == '.') || (c == '-') || ((c >= '0') && (c <= '9')) || ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z'))); | |
26 | } | |
27 | ||
28 | protected String entity(String name) { | |
29 | if(name.equals("amp")) | |
30 | return("&"); | |
31 | if(name.equals("lt")) | |
32 | return("<"); | |
33 | if(name.equals("gt")) | |
34 | return(">"); | |
35 | if(name.equals("apos")) | |
36 | return("'"); | |
37 | if(name.equals("quot")) | |
38 | return("\""); | |
39 | return(null); | |
40 | } | |
41 | ||
42 | protected Element makenode(Document doc, String name) { | |
43 | return(doc.createElementNS(null, name)); | |
44 | } | |
45 | ||
46 | protected Attr makeattr(Document doc, Element el, String name) { | |
47 | return(doc.createAttributeNS(el.getNamespaceURI(), name)); | |
48 | } | |
49 | ||
50 | public DocumentFragment parse(Reader in) throws IOException { | |
51 | Stack<Node> stack = new Stack<Node>(); | |
52 | Document doc = domimp.createDocument(null, "dummy", null); | |
53 | DocumentFragment frag = doc.createDocumentFragment(); | |
54 | stack.push(frag); | |
55 | String st = "content"; | |
56 | int c = in.read(); | |
57 | StringBuilder buf = new StringBuilder(); | |
58 | StringBuilder ebuf = new StringBuilder(); | |
59 | char atype = 0; | |
60 | int cdashcnt = 0; | |
61 | while(true) { | |
62 | if(st == "content") { | |
63 | if(c == '<') { | |
64 | st = "tag"; | |
65 | c = in.read(); | |
66 | } else if(c < 0) { | |
67 | if(stack.peek() == frag) | |
68 | return(frag); | |
69 | else | |
70 | throw(new ParseException("Unexpected end-of-file while parsing non-root element")); | |
71 | } else { | |
72 | st = "text"; | |
73 | } | |
74 | } else if(st == "tag") { | |
75 | if(Character.isWhitespace((char)c)) { | |
76 | c = in.read(); | |
77 | } else if(c == '!') { | |
78 | cdashcnt = 0; | |
79 | c = in.read(); | |
80 | st = "comment"; | |
81 | } else if(namechar((char)c)) { | |
82 | st = "stag"; | |
83 | } else if(c == '/') { | |
84 | c = in.read(); | |
85 | st = "etag"; | |
86 | } else if(c < 0) { | |
87 | throw(new ParseException("Unexpected end-of-file while parsing tag")); | |
88 | } else { | |
89 | throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered in tag name")); | |
90 | } | |
91 | } else if(st == "stag") { | |
92 | boolean flush = false; | |
93 | if(namechar((char)c)) { | |
94 | buf.append((char)c); | |
95 | c = in.read(); | |
96 | } else if(c == '>') { | |
97 | flush = true; | |
98 | } else if(Character.isWhitespace((char)c)) { | |
99 | flush = true; | |
100 | c = in.read(); | |
101 | } else if(c < 0) { | |
102 | throw(new ParseException("Unexpected end-of-file while parsing tag name")); | |
103 | } else { | |
104 | throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered in tag name")); | |
105 | } | |
106 | if(flush) { | |
107 | Element n = makenode(doc, buf.toString()); | |
108 | buf = new StringBuilder(); | |
109 | stack.peek().appendChild(n); | |
110 | stack.push(n); | |
111 | st = "attr"; | |
112 | } | |
113 | } else if(st == "comment") { | |
114 | if(c == '-') { | |
115 | cdashcnt++; | |
116 | c = in.read(); | |
117 | } else if((c == '>') && (cdashcnt == 4)) { | |
118 | stack.peek().appendChild(doc.createComment(buf.toString())); | |
119 | buf = new StringBuilder(); | |
120 | st = "content"; | |
121 | c = in.read(); | |
122 | } else if(cdashcnt >= 2) { | |
123 | if(cdashcnt > 2) | |
124 | cdashcnt = 2; | |
125 | buf.append((char)c); | |
126 | c = in.read(); | |
127 | } else if(c < 0) { | |
128 | throw(new ParseException("Unexpected end-of-file while parsing comment")); | |
129 | } else { | |
130 | throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered in comment")); | |
131 | } | |
132 | } else if(st == "attr") { | |
133 | if(namechar((char)c)) { | |
134 | st = "aname"; | |
135 | } else if(c == '>') { | |
136 | st = "content"; | |
137 | c = in.read(); | |
138 | } else if(c == '/') { | |
139 | st = "stagend"; | |
140 | c = in.read(); | |
141 | } else if(Character.isWhitespace((char)c)) { | |
142 | c = in.read(); | |
143 | } else if(c < 0) { | |
144 | throw(new ParseException("Unexpected end-of-file while parsing attributes")); | |
145 | } else { | |
146 | throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered inside tag")); | |
147 | } | |
148 | } else if(st == "stagend") { | |
149 | if(c == '>') { | |
150 | stack.pop(); | |
151 | c = in.read(); | |
152 | st = "content"; | |
153 | } else if(Character.isWhitespace((char)c)) { | |
154 | c = in.read(); | |
155 | } else if(c < 0) { | |
156 | throw(new ParseException("Unexpected end-of-file at end of empty tag")); | |
157 | } else { | |
158 | throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered at and of empty tag")); | |
159 | } | |
160 | } else if(st == "aname") { | |
161 | if(namechar((char)c)) { | |
162 | buf.append((char)c); | |
163 | c = in.read(); | |
164 | } else if(Character.isWhitespace((char)c)) { | |
165 | c = in.read(); | |
166 | } else if(c == '=') { | |
167 | Element el = (Element)stack.peek(); | |
168 | Attr attr = makeattr(doc, el, buf.toString()); | |
169 | el.setAttributeNodeNS(attr); | |
170 | buf = new StringBuilder(); | |
171 | stack.push(attr); | |
172 | st = "avalstart"; | |
173 | c = in.read(); | |
174 | } else if(c < 0) { | |
175 | throw(new ParseException("Unexpected end-of-file while parsing attribute name")); | |
176 | } else { | |
177 | throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered in attribute name")); | |
178 | } | |
179 | } else if(st == "avalstart") { | |
180 | if((c == '\'') || (c == '"')) { | |
181 | atype = (char)c; | |
182 | c = in.read(); | |
183 | st = "aval"; | |
184 | } else if(Character.isWhitespace((char)c)) { | |
185 | c = in.read(); | |
186 | } else if(c < 0) { | |
187 | throw(new ParseException("Unexpected end-of-file while parsing attribute value")); | |
188 | } else { | |
189 | throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered in attribute value")); | |
190 | } | |
191 | } else if(st == "aval") { | |
192 | if(c == atype) { | |
193 | c = in.read(); | |
194 | Attr a = (Attr)stack.pop(); | |
195 | a.setValue(buf.toString()); | |
196 | buf = new StringBuilder(); | |
197 | st = "attr"; | |
198 | } else if(c == '&') { | |
199 | c = in.read(); | |
200 | st = "aent"; | |
201 | } else if(c < 0) { | |
202 | throw(new ParseException("Unexpected end-of-file while parsing attribute value")); | |
203 | } else { | |
204 | buf.append((char)c); | |
205 | c = in.read(); | |
206 | } | |
207 | } else if(st == "etag") { | |
208 | if(namechar((char)c)) { | |
209 | buf.append((char)c); | |
210 | c = in.read(); | |
211 | } else if(c == '>') { | |
212 | String nm = buf.toString(); | |
213 | buf = new StringBuilder(); | |
214 | Node n = stack.pop(); | |
215 | if(n instanceof DocumentFragment) | |
216 | throw(new ParseException("Unexpected end tag for `" + nm + "' while parsing root content")); | |
217 | Element el = (Element)n; | |
218 | if(!nm.equals(el.getTagName())) | |
219 | throw(new ParseException("Unexpected end tag for `" + nm + "' while parsing `" + el.getTagName() + "'")); | |
220 | c = in.read(); | |
221 | st = "content"; | |
222 | } else if(c < 0) { | |
223 | throw(new ParseException("Unexpected end-of-file while parsing end tag")); | |
224 | } else { | |
225 | throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered in end tag")); | |
226 | } | |
227 | } else if(st == "text") { | |
228 | boolean flush = false; | |
229 | if(c == '&') { | |
230 | st = "ent"; | |
231 | c = in.read(); | |
232 | } else if(c == '<') { | |
233 | flush = true; | |
234 | st = "content"; | |
235 | } else if(c < 0) { | |
236 | flush = true; | |
237 | st = "content"; | |
238 | } else { | |
239 | buf.append((char)c); | |
240 | c = in.read(); | |
241 | } | |
242 | if(flush) { | |
243 | Text n = doc.createTextNode(buf.toString()); | |
244 | buf = new StringBuilder(); | |
245 | stack.peek().appendChild(n); | |
246 | } | |
247 | } else if(st == "ent") { | |
248 | if(c == ';') { | |
249 | String ename = ebuf.toString(); | |
250 | ebuf = new StringBuilder(); | |
251 | String rep = entity(ename); | |
252 | if(rep == null) | |
253 | throw(new ParseException("Unknown entity `" + ename + "' encountered")); | |
254 | buf.append(rep); | |
255 | st = "text"; | |
256 | c = in.read(); | |
257 | } else if(c < 0) { | |
258 | throw(new ParseException("Unexpected end-of-file while parsing entity name")); | |
259 | } else if(namechar((char)c)) { | |
260 | ebuf.append((char)c); | |
261 | c = in.read(); | |
262 | } else { | |
263 | throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered in entity name")); | |
264 | } | |
265 | } else if(st == "aent") { | |
266 | if(c == ';') { | |
267 | String ename = ebuf.toString(); | |
268 | ebuf = new StringBuilder(); | |
269 | String rep = entity(ename); | |
270 | if(rep == null) | |
271 | throw(new ParseException("Unknown entity `" + ename + "' encountered")); | |
272 | buf.append(rep); | |
273 | st = "aval"; | |
274 | c = in.read(); | |
275 | } else if(c < 0) { | |
276 | throw(new ParseException("Unexpected end-of-file while parsing entity name")); | |
277 | } else if(namechar((char)c)) { | |
278 | ebuf.append((char)c); | |
279 | c = in.read(); | |
280 | } else { | |
281 | throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered in entity name")); | |
282 | } | |
283 | } else { | |
284 | throw(new Error("BUG: Typoed state " + st)); | |
285 | } | |
286 | } | |
287 | } | |
288 | ||
289 | private static String printable(char c) { | |
290 | if(c < 32) | |
291 | return(String.format("\\%03o", (int)c)); | |
292 | return(Character.toString(c)); | |
293 | } | |
294 | ||
295 | public static void main(String[] args) throws Exception { | |
296 | Parser p = new Parser(); | |
297 | DocumentFragment f = p.parse(new FileReader(args[0])); | |
298 | javax.xml.transform.TransformerFactory fac = javax.xml.transform.TransformerFactory.newInstance(); | |
299 | fac.setAttribute("indent-number", 2); | |
300 | javax.xml.transform.Transformer t = fac.newTransformer(); | |
301 | t.setOutputProperty(javax.xml.transform.OutputKeys.INDENT, "yes"); | |
302 | t.transform(new javax.xml.transform.dom.DOMSource(f), new javax.xml.transform.stream.StreamResult(System.out)); | |
303 | System.out.println(t.getClass()); | |
304 | } | |
305 | } |