Issue with Java version 6u41 Using HTMLEditorKit to parse pre-existing HTML. If we parse this document, it works as expected: <body>/ at start inside body is okay</body> But if we don't have the <body> tag, then we get only a handleEndOfLineString callback, and do not get the actual text. This is the example document: / at start is bad Testcase: import javax.swing.text.MutableAttributeSet; import javax.swing.text.html.HTML; import javax.swing.text.html.HTMLDocument; import javax.swing.text.html.HTMLEditorKit; import javax.swing.text.html.parser.DTD; import javax.swing.text.html.parser.DocumentParser; import javax.swing.text.html.parser.Entity; import javax.swing.text.html.parser.ParserDelegator; import java.io.IOException; import java.io.Reader; import java.io.StringReader; /** * Bug in Parser when the document starts with a slash. */ public class ParserTest { public static void main(String[] args) throws IOException { doTest( "/ at start is bad" ); doTest( "<body>/ at start inside body is okay</body>" ); } private static void doTest(String text) throws IOException { System.out.println( "doTest: " + text ); ParserCB cb = new ParserCB(); HTMLEditorKit htmlKit = new HTMLEditorKit(); HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument(); htmlDoc.getParser().parse(new StringReader(text), cb, true); System.out.println(); } private static class ParserCB extends HTMLEditorKit.ParserCallback { @Override public void handleComment(char[] data, int pos) { System.out.println("handleComment: " + new String(data) ); } @Override public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) { System.out.println("handleStartTag: " + t ); } @Override public void handleEndTag(HTML.Tag t, int pos) { System.out.println("handleEndTag: " + t ); } @Override public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) { System.out.println("handleSimpleTag: " + t ); } @Override public void handleError(String errorMsg, int pos) { System.out.println("handleError: " + errorMsg ); } @Override public void handleEndOfLineString(String eol) { System.out.println("handleEndOfLineString: " + eol ); } @Override public void handleText(char[] data, int pos) { System.out.println("handleText: " + new String(data)); } } } Output from the test case: doTest: / at start is bad handleEndOfLineString: doTest: <body>/ at start inside body is okay</body> handleStartTag: html handleStartTag: head handleEndTag: head handleStartTag: body handleText: / at start inside body is okay handleEndTag: body handleEndTag: html handleEndOfLineString:
|