| 1 | package dolda.jsvc.next; |
| 2 | |
| 3 | import java.io.*; |
| 4 | import java.util.*; |
| 5 | import org.w3c.dom.*; |
| 6 | import org.w3c.dom.bootstrap.*; |
| 7 | |
| 8 | public class Parser { |
| 9 | private static final DOMImplementation domimp; |
| 10 | |
| 11 | static { |
| 12 | DOMImplementationRegistry reg; |
| 13 | try { |
| 14 | reg = DOMImplementationRegistry.newInstance(); |
| 15 | } catch(Exception e) { |
| 16 | throw(new Error(e)); |
| 17 | } |
| 18 | DOMImplementation di = reg.getDOMImplementation(""); |
| 19 | if(di == null) |
| 20 | throw(new RuntimeException("Could not get a DOM implemenation")); |
| 21 | domimp = di; |
| 22 | } |
| 23 | |
| 24 | public class State { |
| 25 | public final Document doc = domimp.createDocument(null, "dummy", null); |
| 26 | public final PeekReader in; |
| 27 | |
| 28 | private State(Reader in) { |
| 29 | this.in = new PeekReader(in); |
| 30 | } |
| 31 | } |
| 32 | |
| 33 | private static boolean namechar(char c) { |
| 34 | return((c == ':') || (c == '_') || (c == '$') || (c == '.') || (c == '-') || ((c >= '0') && (c <= '9')) || ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z'))); |
| 35 | } |
| 36 | |
| 37 | protected String entity(String name) { |
| 38 | if(name.equals("amp")) |
| 39 | return("&"); |
| 40 | if(name.equals("lt")) |
| 41 | return("<"); |
| 42 | if(name.equals("gt")) |
| 43 | return(">"); |
| 44 | if(name.equals("apos")) |
| 45 | return("'"); |
| 46 | if(name.equals("quot")) |
| 47 | return("\""); |
| 48 | return(null); |
| 49 | } |
| 50 | |
| 51 | protected Element makenode(Document doc, String name) { |
| 52 | return(doc.createElementNS(null, name)); |
| 53 | } |
| 54 | |
| 55 | protected Attr makeattr(Document doc, Element el, String name, String val) { |
| 56 | Attr a = doc.createAttributeNS(el.getNamespaceURI(), name); |
| 57 | a.setValue(val); |
| 58 | return(a); |
| 59 | } |
| 60 | |
| 61 | protected Attr makeattr(Document doc, Element el, String name) { |
| 62 | return(doc.createAttributeNS(el.getNamespaceURI(), name)); |
| 63 | } |
| 64 | |
| 65 | protected String name(State s) throws IOException { |
| 66 | StringBuilder buf = new StringBuilder(); |
| 67 | while(true) { |
| 68 | int c = s.in.peek(); |
| 69 | if(c < 0) { |
| 70 | break; |
| 71 | } else if(namechar((char)c)) { |
| 72 | buf.append((char)s.in.read()); |
| 73 | } else { |
| 74 | break; |
| 75 | } |
| 76 | } |
| 77 | if(buf.length() == 0) |
| 78 | throw(new ParseException("Expected name, got `" + printable(s.in.peek()) + "'")); |
| 79 | return(buf.toString()); |
| 80 | } |
| 81 | |
| 82 | protected String entity(State s) throws IOException { |
| 83 | int c = s.in.read(); |
| 84 | if(c != '&') |
| 85 | throw(new ParseException("Expected `&' while reading entity, got `" + printable(c) + "'")); |
| 86 | String nm = name(s); |
| 87 | c = s.in.read(); |
| 88 | if(c != ';') |
| 89 | throw(new ParseException("Expected `;' while reading entity, got `" + printable(c) + "'")); |
| 90 | return(entity(nm)); |
| 91 | } |
| 92 | |
| 93 | protected Attr attribute(State s, Element el) throws IOException { |
| 94 | String nm = name(s); |
| 95 | s.in.peek(true); |
| 96 | int c = s.in.read(); |
| 97 | if(c != '=') |
| 98 | throw(new ParseException("Expected `=' while reading attribute, got `" + printable(c) + "'")); |
| 99 | s.in.peek(true); |
| 100 | int qt = s.in.read(); |
| 101 | if((qt != '"') && (qt != '\'')) |
| 102 | throw(new ParseException("Expected double or single quote while reading attribute, got `" + printable(qt) + "'")); |
| 103 | StringBuilder buf = new StringBuilder(); |
| 104 | while(true) { |
| 105 | c = s.in.peek(); |
| 106 | if(c < 0) { |
| 107 | throw(new ParseException("Unexpected end-of-file while reading attribute value")); |
| 108 | } else if(c == qt) { |
| 109 | s.in.read(); |
| 110 | break; |
| 111 | } else if(c == '&') { |
| 112 | buf.append(entity(s)); |
| 113 | } else { |
| 114 | buf.append((char)s.in.read()); |
| 115 | } |
| 116 | } |
| 117 | return(makeattr(s.doc, el, nm, buf.toString())); |
| 118 | } |
| 119 | |
| 120 | protected Element element(State s) throws IOException { |
| 121 | Element n = makenode(s.doc, name(s)); |
| 122 | while(true) { |
| 123 | int c = s.in.peek(true); |
| 124 | if(c < 0) { |
| 125 | throw(new ParseException("Unexpected end-of-file while parsing start tag")); |
| 126 | } else if(c == '>') { |
| 127 | s.in.read(); |
| 128 | break; |
| 129 | } else if(c == '/') { |
| 130 | s.in.read(); |
| 131 | s.in.peek(true); |
| 132 | c = s.in.read(); |
| 133 | if(c != '>') |
| 134 | throw(new ParseException("Unexpected character `" + printable(c) + "' encountered in end of empty tag")); |
| 135 | return(n); |
| 136 | } else if(namechar((char)c)) { |
| 137 | n.setAttributeNodeNS(attribute(s, n)); |
| 138 | } else { |
| 139 | throw(new ParseException("Unexpected character `" + printable(c) + "' encountered in start tag")); |
| 140 | } |
| 141 | } |
| 142 | while(true) { |
| 143 | int c = s.in.peek(); |
| 144 | if(c < 0) { |
| 145 | break; |
| 146 | } else if(c == '<') { |
| 147 | s.in.read(); |
| 148 | c = s.in.peek(true); |
| 149 | if(c == '/') { |
| 150 | s.in.read(); |
| 151 | s.in.peek(true); |
| 152 | String nm = name(s); |
| 153 | if(!nm.equals(n.getTagName())) |
| 154 | throw(new ParseException("Unexpected end tag for `" + nm + "' while parsing `" + n.getTagName() + "'")); |
| 155 | if(s.in.peek(true) != '>') |
| 156 | throw(new ParseException("Expected `>' while reading end tag, got `" + printable(c) + "'")); |
| 157 | s.in.read(); |
| 158 | break; |
| 159 | } else { |
| 160 | n.appendChild(stag(s)); |
| 161 | } |
| 162 | } else { |
| 163 | n.appendChild(text(s)); |
| 164 | } |
| 165 | } |
| 166 | return(n); |
| 167 | } |
| 168 | |
| 169 | protected Comment comment(State s) throws IOException { |
| 170 | if((s.in.read() != '!') || |
| 171 | (s.in.read() != '-') || |
| 172 | (s.in.read() != '-')) |
| 173 | throw(new ParseException("Illegal start of comment")); |
| 174 | StringBuilder buf = new StringBuilder(); |
| 175 | while(true) { |
| 176 | int c = s.in.peek(); |
| 177 | if(c < 0) { |
| 178 | throw(new ParseException("Unexpected end-of-file while parsing comment")); |
| 179 | } else if(c == '-') { |
| 180 | s.in.read(); |
| 181 | if(s.in.peek() == '-') { |
| 182 | s.in.read(); |
| 183 | if(s.in.peek() == '>') { |
| 184 | s.in.read(); |
| 185 | break; |
| 186 | } else { |
| 187 | buf.append("--"); |
| 188 | } |
| 189 | } else { |
| 190 | buf.append("-"); |
| 191 | } |
| 192 | } else { |
| 193 | buf.append((char)s.in.read()); |
| 194 | } |
| 195 | } |
| 196 | return(s.doc.createComment(buf.toString())); |
| 197 | } |
| 198 | |
| 199 | protected Node stag(State s) throws IOException { |
| 200 | int c = s.in.peek(true); |
| 201 | if(c < 0) { |
| 202 | throw(new ParseException("Unexpected end-of-file while parsing tag type")); |
| 203 | } else if(c == '!') { |
| 204 | return(comment(s)); |
| 205 | } else { |
| 206 | return(element(s)); |
| 207 | } |
| 208 | } |
| 209 | |
| 210 | protected Text text(State s) throws IOException { |
| 211 | StringBuilder buf = new StringBuilder(); |
| 212 | while(true) { |
| 213 | int c = s.in.peek(); |
| 214 | if(c < 0) { |
| 215 | break; |
| 216 | } else if(c == '<') { |
| 217 | break; |
| 218 | } else if(c == '&') { |
| 219 | buf.append(entity(s)); |
| 220 | } else { |
| 221 | buf.append((char)s.in.read()); |
| 222 | } |
| 223 | } |
| 224 | return(s.doc.createTextNode(buf.toString())); |
| 225 | } |
| 226 | |
| 227 | public DocumentFragment parse(Reader in) throws IOException { |
| 228 | State s = new State(in); |
| 229 | DocumentFragment frag = s.doc.createDocumentFragment(); |
| 230 | while(true) { |
| 231 | int c = s.in.peek(); |
| 232 | if(c < 0) { |
| 233 | return(frag); |
| 234 | } else if(c == '<') { |
| 235 | s.in.read(); |
| 236 | frag.appendChild(stag(s)); |
| 237 | } else { |
| 238 | frag.appendChild(text(s)); |
| 239 | } |
| 240 | } |
| 241 | } |
| 242 | |
| 243 | private static String printable(int c) { |
| 244 | if(c < 0) |
| 245 | return("EOF"); |
| 246 | if(c < 32) |
| 247 | return(String.format("\\%03o", (int)c)); |
| 248 | return(Character.toString((char)c)); |
| 249 | } |
| 250 | |
| 251 | public static void main(String[] args) throws Exception { |
| 252 | Parser p = new Parser(); |
| 253 | DocumentFragment f = p.parse(new FileReader(args[0])); |
| 254 | javax.xml.transform.TransformerFactory fac = javax.xml.transform.TransformerFactory.newInstance(); |
| 255 | fac.setAttribute("indent-number", 2); |
| 256 | javax.xml.transform.Transformer t = fac.newTransformer(); |
| 257 | t.setOutputProperty(javax.xml.transform.OutputKeys.INDENT, "yes"); |
| 258 | t.transform(new javax.xml.transform.dom.DOMSource(f), new javax.xml.transform.stream.StreamResult(System.out)); |
| 259 | System.out.println(t.getClass()); |
| 260 | } |
| 261 | } |