JDK-7165725 : JAVA6 HTML PARSER CANNOT PARSE MULTIPLE SCRIPT TAGS IN A LINE CORRECTLY
  • Type: Bug
  • Component: client-libs
  • Sub-Component: javax.swing
  • Affected Version: 6u27
  • Priority: P2
  • Status: Resolved
  • Resolution: Fixed
  • OS: windows_xp
  • CPU: x86
  • Submitted: 2012-05-02
  • Updated: 2013-09-12
  • Resolved: 2012-05-08
The Version table provides details related to the release that this issue/RFE will be addressed.

Unresolved : Release in which this issue/RFE will be addressed.
Resolved: Release in which this issue/RFE has been resolved.
Fixed : Release in which this issue/RFE has been fixed. The release containing this fix may be available for download as an Early Access Release or a General Availability Release.

To download the current JDK release, click here.
JDK 6 JDK 7 JDK 8
6u32 b32Fixed 7u40Fixed 8Fixed
Description
Issue Clarification 
  =================== 
  Other 
  General description of the problem 
    Swing HTML Parser does not parse html files which has multiple script tags 
  on a line. 
    Compile attached Main.java and run it as: 
      java Main sample1.html 
   
    Sun bug 7011777 reads Parser.parseScript is new from java6 and it may have 
  issues. 
   
  How often has the customer seen the problem? 
    Always. 
   
  How reproducable is the problem? 
    Parse an HTML file like below with a ParserDelegator object which has an 
  HTMLParserCallback for events like start, end tag, etc. 
   
  Does the customer see the problem in development/staging/production? 
    They see the problem on the production systems. 
   
  system configuration 
  ==================== 
  Full output from java -version 
  java version "1.6.0_27" 
  Java(TM) SE Runtime Environment (build 1.6.0_27-b07) 
  Java HotSpot(TM) Client VM (build 20.2-b06, mixed mode, sharing) 
   
  The symptom was seen on 6u31 and 7u3 too. 
   
  OS and patch level 
    Windows XP SP3.  Problem happens on Linux too 
  CPU architecture 
    X86 
  other

Comments
EVALUATION patch. --- old/src/share/classes/javax/swing/text/html/parser/Parser.java Tue May 1 21:54:55 2012 +++ new/src/share/classes/javax/swing/text/html/parser/Parser.java Tue May 1 21:54:49 2012 @@ -1981,8 +1981,6 @@ if (i == SCRIPT_END_TAG.length) { /* '</script>' tag detected */ - /* Here, ch == '>' */ - ch = readCh(); /* Here, ch == the first character after </script> */ return; } else { @@ -2055,6 +2053,8 @@ handleComment(str.toCharArray()); endTag(false); lastBlockStartPos = currentPosition; + + continue; } else { switch (c) { case '<': --- /dev/null Tue May 1 21:55:10 2012 +++ new/test/javax/swing/text/html/parser/Parser/7165725/bug7165725.java Tue May 1 21:55:05 2012 @@ -0,0 +1,296 @@ +/* @test + @bug 7165725 + @summary Tests if HTML parser can handle successive script tags in a line + and it does not call false text callback after script tags. + @library ../../../../../regtesthelpers + @build Util + @run main bug7165725 +*/ + +import java.awt.BorderLayout; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import javax.swing.*; +import javax.swing.text.AbstractDocument.AbstractElement; +import javax.swing.text.AbstractDocument; +import javax.swing.text.Document; +import javax.swing.text.MutableAttributeSet; +import javax.swing.text.html.HTML; +import javax.swing.text.html.HTMLDocument; +import javax.swing.text.html.HTMLEditorKit; +import javax.swing.text.html.parser.ParserDelegator; + +public class bug7165725 extends JFrame { + private static class GoldenElement { + + private String goldenName; + private List<GoldenElement> goldenChildren; + + GoldenElement(String goldenName, GoldenElement... goldenChildren){ + this.goldenName = goldenName; + if (goldenChildren != null) { + this.goldenChildren = Arrays.asList(goldenChildren); + } else { + this.goldenChildren = new ArrayList<GoldenElement>(); + } + } + + // throws RuntimeException if not ok + public void checkStructureEquivalence(AbstractDocument.AbstractElement elem) { + String name = elem.getName(); + if (!goldenName.equals(name)) { + throw new RuntimeException("Bad structure: expected element name is '" + goldenName + "' but the actual name was '" + name + "'."); + } + int goldenChildCount = goldenChildren.size(); + int childCount = elem.getChildCount(); + if (childCount != goldenChildCount) { + System.out.print("D: children: "); + for (int i = 0; i < childCount; i++) { + System.out.print(" " + elem.getElement(i).getName()); + } + System.out.println(""); + System.out.print("D: goldenChildren: "); + for (GoldenElement ge : goldenChildren) { + System.out.print(" " + ge.goldenName); + } + System.out.println(""); + + throw new RuntimeException("Bad structure: expected child count of element '" + goldenName + "' is '" + goldenChildCount + "' but the actual count was '" + childCount + "'."); + } + for (int i = 0; i < childCount; i++) { + AbstractDocument.AbstractElement nextElem = (AbstractDocument.AbstractElement) elem.getElement(i); + GoldenElement goldenElement = goldenChildren.get(i); + goldenElement.checkStructureEquivalence(nextElem); + } + } + } + + private JEditorPane editorPane; + public void execute(final String urlStr, final GoldenElement goldenElement) throws Exception { + System.out.println(); + System.out.println("***** TEST: " + urlStr + " *****"); + System.out.println(); + + SwingUtilities.invokeAndWait(new Runnable() { + public void run() { + try { + editorPane = new JEditorPane(); + editorPane.setEditorKit(new HTMLEditorKit() { + public Document createDefaultDocument() { + AbstractDocument doc = + (AbstractDocument) super.createDefaultDocument(); + doc.setAsynchronousLoadPriority(-1); + return doc; + } + }); + editorPane.setPage(new URL(urlStr)); + } catch (IOException ex) { + throw new RuntimeException("Test failed", ex); + } + editorPane.setEditable(false); + JScrollPane scroller = new JScrollPane(); + JViewport vp = scroller.getViewport(); + vp.add(editorPane); + add(scroller, BorderLayout.CENTER); + setDefaultCloseOperation(EXIT_ON_CLOSE); + setSize(400, 400); + setLocationRelativeTo(null); + setVisible(true); + } + }); + + Util.blockTillDisplayed(this); + + SwingUtilities.invokeAndWait(new Runnable() { + public void run() { + HTMLDocument doc = (HTMLDocument) editorPane.getDocument(); + doc.dump(System.out); + goldenElement.checkStructureEquivalence((AbstractElement) doc.getDefaultRootElement()); + dispose(); + } + }); + + System.out.println(); + System.out.println("*********************************"); + System.out.println(); + } + + public static void main(String[] args) throws Exception { + + String dirURL = getDirURL(); + + System.out.println("dirURL = " + dirURL); + + new bug7165725().execute(dirURL + "successive-script-tag.html", createSuccessiveScriptTags()); + new bug7165725().execute(dirURL + "false-text-after-script.html", createFalseTextAfterScript()); + + checkByCallbackForSuccessiveScript(); + checkByCallbackForFalseTextAfterScript(); + + System.out.println(); + System.out.println(); + System.out.println("Test passed."); + } + + static String getDirURL() { + return "file:///" + + new File(System.getProperty("test.src", ".")).getAbsolutePath() + + File.separator; + } + + static String getParsedContentOneLine(String path) throws Exception { + File f = new File(path); + FileReader fr = new FileReader(f); + ParserDelegator pd = new ParserDelegator(); + SBParserCallback sbcallback = new SBParserCallback(); + pd.parse(fr, sbcallback, true); + fr.close(); + return sbcallback.getStringOneLine(); + } + + static String getParsedContentOneLine(URL url) throws Exception { + return getParsedContentOneLine(url.getPath()); + } + + static String getParsedContent(String path) throws Exception { + File f = new File(path); + FileReader fr = new FileReader(f); + ParserDelegator pd = new ParserDelegator(); + SBParserCallback sbcallback = new SBParserCallback(); + pd.parse(fr, sbcallback, true); + fr.close(); + return sbcallback.toString(); + } + + static void checkByCallbackForSuccessiveScript() throws Exception { + String content = getParsedContentOneLine(new URL(getDirURL() + "successive-script-tag.html")); + if (!content.matches(".*<script .*/js/js1\\.js.*<script .*/js/js2\\.js.*<script .*/js/js3\\.js.*")) + throw new RuntimeException("Failed to lookup script tags/attributes."); + if (!content.matches(".*<style .*stylesheets/base\\.css.*<style .*stylesheets/adv\\.css.*")) + throw new RuntimeException("Failed to lookup style tags."); + } + + static void checkByCallbackForFalseTextAfterScript() throws Exception { + String content = getParsedContentOneLine(new URL(getDirURL() + "false-text-after-script.html")); + final int bodyIdx = content.indexOf("<body "); + if (bodyIdx > 0) { + String sbody = content.substring(bodyIdx); + // There should be no Text(...) in this html + if (sbody.indexOf("Text(") >= 0) + throw new RuntimeException("Unexpected text found."); + } else { + throw new RuntimeException("Failed to find body tag."); + } + } + + private static GoldenElement createSuccessiveScriptTags() { + return new GoldenElement("html", + new GoldenElement("head", + new GoldenElement("p-implied", + new GoldenElement("title"), + new GoldenElement("title"), + new GoldenElement("script"), + new GoldenElement("comment"), + new GoldenElement("script"), + new GoldenElement("script"), + new GoldenElement("comment"), + new GoldenElement("script"), + new GoldenElement("script"), + new GoldenElement("comment"), + new GoldenElement("script"), + new GoldenElement("content"))), + new GoldenElement("body", + new GoldenElement("p-implied", + new GoldenElement("content")))); + } + + private static GoldenElement createFalseTextAfterScript() { + return new GoldenElement("html", + new GoldenElement("head", + new GoldenElement("p-implied", + new GoldenElement("title"), + new GoldenElement("title"), + new GoldenElement("content"))), + new GoldenElement("body", + new GoldenElement("form", + new GoldenElement("p-implied", + new GoldenElement("input"), + new GoldenElement("input"), + new GoldenElement("content"))), + new GoldenElement("p-implied", + new GoldenElement("script"), + new GoldenElement("comment"), + new GoldenElement("script"), + new GoldenElement("script"), + new GoldenElement("comment"), + new GoldenElement("script"), + new GoldenElement("content")))); + } + + static class SBParserCallback extends HTMLEditorKit.ParserCallback + { + private int indentSize = 0; + private ArrayList<String> elist = new ArrayList<String>(); + + public String getStringOneLine() { + StringBuilder sb = new StringBuilder(); + for (String s : elist) sb.append(s); + return sb.toString(); + } + + public String toString() { + StringBuffer sb = new StringBuffer(); + for (String s : elist) sb.append(s + "\n"); + return sb.toString(); + } + + public void reset() { + elist = new ArrayList<String>(); + } + + protected void indent() { + indentSize += 3; + } + protected void unIndent() { + indentSize -= 3; if (indentSize < 0) indentSize = 0; + } + + protected String pIndent() { + StringBuilder sb = new StringBuilder(); + for(int i = 0; i < indentSize; i++) sb.append(" "); + return sb.toString(); + } + + public void handleText(char[] data, int pos) { + elist.add(pIndent() + "Text(" + data.length + " chars) \"" + new String(data) + "\""); + } + + public void handleComment(char[] data, int pos) { + elist.add(pIndent() + "Comment(" + data.length + " chars)"); + } + + public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) { + elist.add(pIndent() + "Tag start(<" + t.toString() + " " + a + ">, " + + a.getAttributeCount() + " attrs)"); + indent(); + } + + public void handleEndTag(HTML.Tag t, int pos) { + unIndent(); + elist.add(pIndent() + "Tag end(</" + t.toString() + ">)"); + } + + public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) { + elist.add(pIndent() + "Tag(<" + t.toString() + ">, " + + a.getAttributeCount() + " attrs)"); + } + + public void handleError(String errorMsg, int pos){ + } + } +} --- /dev/null Tue May 1 21:55:23 2012 +++ new/test/javax/swing/text/html/parser/Parser/7165725/false-text-after-script.html Tue May 1 21:55:18 2012 @@ -0,0 +1,20 @@ +<html> +<head> <title> Testing </title> </head> +<body> +<form> + + <input type="text" name="text1" > + <input type="button" name="button1" value="button" onclick="test1(this.form)"> + +</form> + +<SCRIPT LANGUAGE="JavaScript"> + function test1(form) { + alert(form.text1.value); + } +</SCRIPT> +<SCRIPT> + history.forward(); +</SCRIPT> +</body> +</html> --- /dev/null Tue May 1 21:55:36 2012 +++ new/test/javax/swing/text/html/parser/Parser/7165725/successive-script-tag.html Tue May 1 21:55:31 2012 @@ -0,0 +1,8 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<html> +<head><title>my title</title> + <script src="../../js/js1.js" language="JavaScript"></script><script src="../../js/js2.js" language="JavaScript"></script><script src="../../js/js3.js" language="JavaScript"></script><style type="text/css" media="screen">@import "stylesheets/base.css";</style><style type="text/css" media="screen">@import "stylesheets/adv.css";</style> +</head> +<body> +</body> +</html> --- old/test/javax/swing/text/html/parser/Parser/6325159/bug6325159.java Tue May 1 21:55:48 2012 +++ new/test/javax/swing/text/html/parser/Parser/6325159/bug6325159.java Tue May 1 21:55:43 2012 @@ -122,7 +122,6 @@ new bug6325159().execute(dirURL + "simple_script.html", commonEE); new bug6325159().execute(dirURL + "script.html", commonEE); new bug6325159().execute(dirURL + "comment_in_script.html", commonEE); - new bug6325159().execute(dirURL + "commented_end_script_tag.html", commonEE); new bug6325159().execute(dirURL + "js_comment_in_script.html", commonEE); new bug6325159().execute(dirURL + "no_end_script_tag.html", create_no_end_script_tag_Golden()); --- old/test/javax/swing/text/html/parser/Parser/6325159/commented_end_script_tag.html Tue May 1 21:56:01 2012 +++ /dev/null Tue May 1 21:56:01 2012 @@ -1,9 +0,0 @@ -<html> - <script language=javascript> - document.write('') - document.write('<frameset cols="200,*">') - <!--</script>--> - <body> - <input type="text" value="text"/> - </body> -</html>
02-05-2012