Duplicate :
|
FULL PRODUCT VERSION : java version "1.7.0" Java(TM) SE Runtime Environment (build 1.7.0-b147) Java HotSpot(TM) Client VM (build 21.0-b17, mixed mode, sharing) java version "1.6.0_25" Java(TM) SE Runtime Environment (build 1.6.0_25-b06) Java HotSpot(TM) Client VM (build 20.0-b11, mixed mode, sharing) ADDITIONAL OS VERSION INFORMATION : Microsoft Windows XP Professional Version 2002 Service Pack 3 Also occur under Linux, likely not a OS dependent bug A DESCRIPTION OF THE PROBLEM : The pattern (<[^>]+> *)+$ searching for white-space separated HTML tags at the end of a document throws a StackOverFlow error if the input document has too many matches (eg, 500 tags in a row). Same bug can be triggered with a pattern anchored at the beginning ^(<[^>]+> *)+ STEPS TO FOLLOW TO REPRODUCE THE PROBLEM : Compile and run the following code: import java.util.regex.Pattern; public class RegexBug3 { public static final Pattern trailingTags = Pattern.compile("(<[^>]+> *)+$"); public static final Pattern leadingTags = Pattern.compile("^( *<[^>]+>)+"); public static void main(String[] args) { StringBuilder longDocument = new StringBuilder("<b>"); for (int i = 0; i < 16; i++) { if (trailingTags.matcher(longDocument).find()) { // if (leadingTags.matcher(longDocument).find()) { System.err.println("OK for document with " + (long) (Math.pow(2, i) + 0.5) + " tags"); } longDocument.append(longDocument); } } } EXPECTED VERSUS ACTUAL BEHAVIOR : EXPECTED - OK for document with 1 tags: "" OK for document with 2 tags: "" OK for document with 4 tags: "" OK for document with 8 tags: "" OK for document with 16 tags: "" OK for document with 32 tags: "" OK for document with 64 tags: "" OK for document with 128 tags: "" OK for document with 256 tags: "" OK for document with 512 tags: "" OK for document with 1024 tags: "" OK for document with 2048 tags: "" OK for document with 4096 tags: "" OK for document with 8192 tags: "" OK for document with 16384 tags: "" OK for document with 32768 tags: "" ACTUAL - OK for document with 1 tags: "" OK for document with 2 tags: "" OK for document with 4 tags: "" OK for document with 8 tags: "" OK for document with 16 tags: "" OK for document with 32 tags: "" OK for document with 64 tags: "" OK for document with 128 tags: "" OK for document with 256 tags: "" Exception in thread "main" java.lang.StackOverflowError at java.util.regex.Pattern$CharProperty.match(Pattern.java:3693) at java.util.regex.Pattern$Curly.match(Pattern.java:4125) ... ERROR MESSAGES/STACK TRACES THAT OCCUR : Exception in thread "main" java.lang.StackOverflowError at java.util.regex.Pattern$GroupHead.match(Pattern.java:4554) at java.util.regex.Pattern$Loop.match(Pattern.java:4683) at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615) at java.util.regex.Pattern$Curly.match0(Pattern.java:4177) at java.util.regex.Pattern$Curly.match(Pattern.java:4132) at java.util.regex.Pattern$BmpCharProperty.match(Pattern.java:3715) at java.util.regex.Pattern$Curly.match0(Pattern.java:4177) at java.util.regex.Pattern$Curly.match(Pattern.java:4132) at java.util.regex.Pattern$BmpCharProperty.match(Pattern.java:3715) at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556) at java.util.regex.Pattern$Loop.match(Pattern.java:4683) at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615) at java.util.regex.Pattern$Curly.match0(Pattern.java:4177) at java.util.regex.Pattern$Curly.match(Pattern.java:4132) at java.util.regex.Pattern$BmpCharProperty.match(Pattern.java:3715) etc, a total of about 1000 lines, ending on: at java.util.regex.Pattern$BmpCharProperty.match(Pattern.java:3715) at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556) at java.util.regex.Pattern$Loop.match(Pattern.java:4683) at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615) at java.util.regex.Pattern$Curly.match0(Pattern.java:4177) at java.util.regex.Pattern$Curly.match(Pattern.java:4132) at java.util.regex.Pattern$BmpCharProperty.match(Pattern.java:3715) at java.util.regex.Pattern$Curly.match0(Pattern.java:4177) REPRODUCIBILITY : This bug can be reproduced always. ---------- BEGIN SOURCE ---------- import java.util.regex.Matcher; import java.util.regex.Pattern; public class RegexBug3 { public static final Pattern trailingTags = Pattern.compile("(<[^>]+> *)+$"); public static void main(String[] args) { StringBuilder longDocument = new StringBuilder("<b>"); Matcher m; for (int i = 0; i < 16; i++) { // next line triggers the bug if ((m = trailingTags.matcher(longDocument)).find()) { String prunedString = longDocument.toString().substring(0, m.start()); System.err.println("OK for document with " + (long) (Math.pow(2, i) + 0.5) + " tags: \"" + prunedString + "\""); } longDocument.append(longDocument); } } } ---------- END SOURCE ---------- CUSTOMER SUBMITTED WORKAROUND : import java.util.regex.Matcher; import java.util.regex.Pattern; public class RegexBug4 { // approach 1: apply pattern in a loop, limiting the number of matches // very slow for long inputs, but does not crash public static final Pattern trailingTags = Pattern .compile("(<[^>]+> *){1,100}$"); public static void slow() { StringBuilder longDocument = new StringBuilder("<b>"); Matcher m; for (int i = 0; i < 16; i++) { String prunedString = longDocument.toString(); while ((m = trailingTags.matcher(prunedString)).find()) { prunedString = prunedString.substring(0, m.start()); } if (prunedString.length() < longDocument.length()) { System.err.println("OK for document with " + (long) (Math.pow(2, i) + 0.5) + " tags: \"" + prunedString + "\""); } longDocument.append(longDocument); } } // approach 2: reverse pattern and match from the beginning instead of the // end and apply pattern in a loop, limiting the number of matches: // does not crash, and much faster than matching from the end public static final Pattern trailingTagsReversed = Pattern .compile("^( *>[^<]+<){1,100}"); public static void fast() { StringBuilder longDocument = new StringBuilder("<b>"); Matcher m; for (int i = 0; i < 16; i++) { String prunedStringReversed = (new StringBuilder(longDocument)).reverse().toString(); while ((m = trailingTagsReversed.matcher(prunedStringReversed)) .find()) { prunedStringReversed = prunedStringReversed.substring(m.end()); } if (prunedStringReversed.length() < longDocument.length()) { System.err.println("OK for document with " + (long) (Math.pow(2, i) + 0.5) + " tags: \"" + (new StringBuilder(prunedStringReversed)).reverse() + "\""); } longDocument.append(longDocument); } } // run with optional argument "fast", everything else runs the slow version public static void main(String[] args) { if (args.length > 0 && "fast".equals(args[0])) { fast(); } else { slow(); } } }