Commit | Line | Data |
---|---|---|
a5e6bd24 FT |
1 | package dolda.jsvc.next; |
2 | ||
3 | import java.io.*; | |
4 | import java.util.*; | |
5 | import org.w3c.dom.*; | |
a5e6bd24 FT |
6 | |
7 | public class Parser { | |
7c0e72ac | 8 | public class State { |
816cbb00 | 9 | public final Document doc = DomUtil.document(null, "dummy"); |
7c0e72ac FT |
10 | public final PeekReader in; |
11 | ||
12 | private State(Reader in) { | |
13 | this.in = new PeekReader(in); | |
14 | } | |
15 | } | |
16 | ||
a5e6bd24 FT |
17 | private static boolean namechar(char c) { |
18 | return((c == ':') || (c == '_') || (c == '$') || (c == '.') || (c == '-') || ((c >= '0') && (c <= '9')) || ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z'))); | |
19 | } | |
20 | ||
21 | protected String entity(String name) { | |
22 | if(name.equals("amp")) | |
23 | return("&"); | |
24 | if(name.equals("lt")) | |
25 | return("<"); | |
26 | if(name.equals("gt")) | |
27 | return(">"); | |
28 | if(name.equals("apos")) | |
29 | return("'"); | |
30 | if(name.equals("quot")) | |
31 | return("\""); | |
32 | return(null); | |
33 | } | |
34 | ||
35 | protected Element makenode(Document doc, String name) { | |
36 | return(doc.createElementNS(null, name)); | |
37 | } | |
38 | ||
7c0e72ac | 39 | protected String name(State s) throws IOException { |
a5e6bd24 | 40 | StringBuilder buf = new StringBuilder(); |
a5e6bd24 | 41 | while(true) { |
7c0e72ac FT |
42 | int c = s.in.peek(); |
43 | if(c < 0) { | |
44 | break; | |
45 | } else if(namechar((char)c)) { | |
46 | buf.append((char)s.in.read()); | |
47 | } else { | |
48 | break; | |
49 | } | |
50 | } | |
51 | if(buf.length() == 0) | |
52 | throw(new ParseException("Expected name, got `" + printable(s.in.peek()) + "'")); | |
53 | return(buf.toString()); | |
54 | } | |
55 | ||
56 | protected String entity(State s) throws IOException { | |
57 | int c = s.in.read(); | |
58 | if(c != '&') | |
59 | throw(new ParseException("Expected `&' while reading entity, got `" + printable(c) + "'")); | |
60 | String nm = name(s); | |
61 | c = s.in.read(); | |
62 | if(c != ';') | |
63 | throw(new ParseException("Expected `;' while reading entity, got `" + printable(c) + "'")); | |
64 | return(entity(nm)); | |
65 | } | |
66 | ||
67 | protected Attr attribute(State s, Element el) throws IOException { | |
3de0fa23 | 68 | Attr a = s.doc.createAttributeNS(null, name(s)); |
7c0e72ac FT |
69 | s.in.peek(true); |
70 | int c = s.in.read(); | |
71 | if(c != '=') | |
72 | throw(new ParseException("Expected `=' while reading attribute, got `" + printable(c) + "'")); | |
73 | s.in.peek(true); | |
74 | int qt = s.in.read(); | |
75 | if((qt != '"') && (qt != '\'')) | |
76 | throw(new ParseException("Expected double or single quote while reading attribute, got `" + printable(qt) + "'")); | |
77 | StringBuilder buf = new StringBuilder(); | |
78 | while(true) { | |
79 | c = s.in.peek(); | |
80 | if(c < 0) { | |
81 | throw(new ParseException("Unexpected end-of-file while reading attribute value")); | |
82 | } else if(c == qt) { | |
83 | s.in.read(); | |
84 | break; | |
85 | } else if(c == '&') { | |
86 | buf.append(entity(s)); | |
87 | } else { | |
88 | buf.append((char)s.in.read()); | |
89 | } | |
90 | } | |
3de0fa23 FT |
91 | a.setValue(buf.toString()); |
92 | return(a); | |
7c0e72ac FT |
93 | } |
94 | ||
95 | protected Element element(State s) throws IOException { | |
96 | Element n = makenode(s.doc, name(s)); | |
97 | while(true) { | |
98 | int c = s.in.peek(true); | |
99 | if(c < 0) { | |
100 | throw(new ParseException("Unexpected end-of-file while parsing start tag")); | |
101 | } else if(c == '>') { | |
102 | s.in.read(); | |
103 | break; | |
104 | } else if(c == '/') { | |
105 | s.in.read(); | |
106 | s.in.peek(true); | |
107 | c = s.in.read(); | |
108 | if(c != '>') | |
109 | throw(new ParseException("Unexpected character `" + printable(c) + "' encountered in end of empty tag")); | |
110 | return(n); | |
111 | } else if(namechar((char)c)) { | |
112 | n.setAttributeNodeNS(attribute(s, n)); | |
113 | } else { | |
114 | throw(new ParseException("Unexpected character `" + printable(c) + "' encountered in start tag")); | |
115 | } | |
116 | } | |
117 | while(true) { | |
118 | int c = s.in.peek(); | |
119 | if(c < 0) { | |
120 | break; | |
121 | } else if(c == '<') { | |
122 | s.in.read(); | |
123 | c = s.in.peek(true); | |
124 | if(c == '/') { | |
125 | s.in.read(); | |
126 | s.in.peek(true); | |
127 | String nm = name(s); | |
128 | if(!nm.equals(n.getTagName())) | |
129 | throw(new ParseException("Unexpected end tag for `" + nm + "' while parsing `" + n.getTagName() + "'")); | |
130 | if(s.in.peek(true) != '>') | |
131 | throw(new ParseException("Expected `>' while reading end tag, got `" + printable(c) + "'")); | |
132 | s.in.read(); | |
133 | break; | |
a5e6bd24 | 134 | } else { |
7c0e72ac | 135 | n.appendChild(stag(s)); |
a5e6bd24 | 136 | } |
7c0e72ac FT |
137 | } else { |
138 | n.appendChild(text(s)); | |
139 | } | |
140 | } | |
141 | return(n); | |
142 | } | |
143 | ||
144 | protected Comment comment(State s) throws IOException { | |
145 | if((s.in.read() != '!') || | |
146 | (s.in.read() != '-') || | |
147 | (s.in.read() != '-')) | |
148 | throw(new ParseException("Illegal start of comment")); | |
149 | StringBuilder buf = new StringBuilder(); | |
150 | while(true) { | |
151 | int c = s.in.peek(); | |
152 | if(c < 0) { | |
153 | throw(new ParseException("Unexpected end-of-file while parsing comment")); | |
154 | } else if(c == '-') { | |
155 | s.in.read(); | |
156 | if(s.in.peek() == '-') { | |
157 | s.in.read(); | |
158 | if(s.in.peek() == '>') { | |
159 | s.in.read(); | |
160 | break; | |
161 | } else { | |
162 | buf.append("--"); | |
163 | } | |
a5e6bd24 | 164 | } else { |
7c0e72ac | 165 | buf.append("-"); |
a5e6bd24 FT |
166 | } |
167 | } else { | |
7c0e72ac | 168 | buf.append((char)s.in.read()); |
a5e6bd24 FT |
169 | } |
170 | } | |
7c0e72ac | 171 | return(s.doc.createComment(buf.toString())); |
a5e6bd24 | 172 | } |
7c0e72ac FT |
173 | |
174 | protected Node stag(State s) throws IOException { | |
175 | int c = s.in.peek(true); | |
176 | if(c < 0) { | |
177 | throw(new ParseException("Unexpected end-of-file while parsing tag type")); | |
178 | } else if(c == '!') { | |
179 | return(comment(s)); | |
180 | } else { | |
181 | return(element(s)); | |
182 | } | |
183 | } | |
184 | ||
185 | protected Text text(State s) throws IOException { | |
186 | StringBuilder buf = new StringBuilder(); | |
187 | while(true) { | |
188 | int c = s.in.peek(); | |
189 | if(c < 0) { | |
190 | break; | |
191 | } else if(c == '<') { | |
192 | break; | |
193 | } else if(c == '&') { | |
194 | buf.append(entity(s)); | |
195 | } else { | |
196 | buf.append((char)s.in.read()); | |
197 | } | |
198 | } | |
199 | return(s.doc.createTextNode(buf.toString())); | |
200 | } | |
201 | ||
202 | public DocumentFragment parse(Reader in) throws IOException { | |
203 | State s = new State(in); | |
204 | DocumentFragment frag = s.doc.createDocumentFragment(); | |
205 | while(true) { | |
206 | int c = s.in.peek(); | |
207 | if(c < 0) { | |
208 | return(frag); | |
209 | } else if(c == '<') { | |
210 | s.in.read(); | |
211 | frag.appendChild(stag(s)); | |
212 | } else { | |
213 | frag.appendChild(text(s)); | |
214 | } | |
215 | } | |
216 | } | |
217 | ||
218 | private static String printable(int c) { | |
219 | if(c < 0) | |
220 | return("EOF"); | |
a5e6bd24 FT |
221 | if(c < 32) |
222 | return(String.format("\\%03o", (int)c)); | |
7c0e72ac | 223 | return(Character.toString((char)c)); |
a5e6bd24 | 224 | } |
a5e6bd24 | 225 | } |