JDK-6451717 : Regex: Nodes (Not)?Single[A|U] report wrong hitEnd status when failed to match/find (5.0)
  • Type: Bug
  • Component: core-libs
  • Sub-Component: java.util.regex
  • Affected Version: 5.0
  • Priority: P3
  • Status: Resolved
  • Resolution: Fixed
  • OS: generic
  • CPU: generic
  • Submitted: 2006-07-21
  • Updated: 2010-04-02
  • Resolved: 2006-08-03
The Version table provides details related to the release that this issue/RFE will be addressed.

Unresolved : Release in which this issue/RFE will be addressed.
Resolved: Release in which this issue/RFE has been resolved.
Fixed : Release in which this issue/RFE has been fixed. The release containing this fix may be available for download as an Early Access Release or a General Availability Release.

To download the current JDK release, click here.
Other
5.0u10 b01Fixed
Description
Regex nodes SingleA, NotSingleA, SingleU and NotSingleU always report the
hitEnd() to be true when match/find failed, even the "end" has not been reached.

The cases below show the problem.

        // SingleA
	p = Pattern.compile("^a", Pattern.CASE_INSENSITIVE);
	m = p.matcher("bcd");
	if (m.find() || m.hitEnd()) {
	    failCount++;
	}

        // NotSingleA
	p = Pattern.compile("^[^\u4e00]", Pattern.CASE_INSENSITIVE);
	m = p.matcher("\u4e00cd");
	if (m.find() || m.hitEnd()) {
	    failCount++;
	}

        // SingleU
	p = Pattern.compile("^\u4e00", Pattern.CASE_INSENSITIVE |Pattern.UNICODE_CASE);
	m = p.matcher("abc");
	if (m.find() || m.hitEnd()) {
	    failCount++;
	}

        // NotSingleU
	p = Pattern.compile("^[^\u03b2]", Pattern.CASE_INSENSITIVE |Pattern.UNICODE_CASE);
	m = p.matcher("\u0392bc");
	if (m.find() || m.hitEnd()) {
	    failCount++;
	}

Comments
EVALUATION Removed the "mustang" entry from SRs since this issue has already been addressed in 6.0 release as the side-effect of other changes.
18-01-2008

EVALUATION For 5.0u9. This problem does not exit in Mustang (has been sideeffectly fixed by other changes)
21-07-2006

SUGGESTED FIX --- Pattern.java Fri Jul 21 15:29:06 2006 *************** *** 3366,3378 **** return new SingleA(ch); } boolean match(Matcher matcher, int i, CharSequence seq) { ! if (i < matcher.to) { int c = seq.charAt(i); if (c == ch || ASCII.toLower(c) == ch) { return next.match(matcher, i+1, seq); } } - matcher.hitEnd = true; return false; } --- 3366,3379 ---- return new SingleA(ch); } boolean match(Matcher matcher, int i, CharSequence seq) { ! if (i >= matcher.to) { ! matcher.hitEnd = true; ! } else { int c = seq.charAt(i); if (c == ch || ASCII.toLower(c) == ch) { return next.match(matcher, i+1, seq); } } return false; } *************** *** 3395,3407 **** return new NotSingleA(ch); } boolean match(Matcher matcher, int i, CharSequence seq) { ! if (i < matcher.to) { int c = Character.codePointAt(seq, i); if (c != ch && ASCII.toLower(c) != ch) { return next.match(matcher, i+Character.charCount(c), seq); } } - matcher.hitEnd = true; return false; } --- 3396,3409 ---- return new NotSingleA(ch); } boolean match(Matcher matcher, int i, CharSequence seq) { ! if (i >= matcher.to) { ! matcher.hitEnd = true; ! } else { int c = Character.codePointAt(seq, i); if (c != ch && ASCII.toLower(c) != ch) { return next.match(matcher, i+Character.charCount(c), seq); } } return false; } *************** *** 3429,3435 **** return new SingleU(ch); } boolean match(Matcher matcher, int i, CharSequence seq) { ! if (i < matcher.to) { int c = Character.codePointAt(seq, i); if (c == ch) return next.match(matcher, i+len, seq); --- 3431,3439 ---- return new SingleU(ch); } boolean match(Matcher matcher, int i, CharSequence seq) { ! if (i >= matcher.to) { ! matcher.hitEnd = true; ! } else { int c = Character.codePointAt(seq, i); if (c == ch) return next.match(matcher, i+len, seq); *************** *** 3438,3444 **** if (cc == ch) return next.match(matcher, i+Character.charCount(c), seq); } - matcher.hitEnd = true; return false; } boolean study(TreeInfo info) { --- 3442,3447 ---- *************** *** 3463,3478 **** return new NotSingleU(ch); } boolean match(Matcher matcher, int i, CharSequence seq) { ! if (i < matcher.to) { int c = Character.codePointAt(seq, i); if (c == ch) return false; int cc = Character.toUpperCase(c); cc = Character.toLowerCase(cc); if (cc != ch) return next.match(matcher, i+Character.charCount(c), seq); } - matcher.hitEnd = true; return false; } boolean study(TreeInfo info) { --- 3466,3483 ---- return new NotSingleU(ch); } boolean match(Matcher matcher, int i, CharSequence seq) { ! if (i >= matcher.to) { ! matcher.hitEnd = true; ! } else { int c = Character.codePointAt(seq, i); if (c == ch) return false; int cc = Character.toUpperCase(c); cc = Character.toLowerCase(cc); + if (cc != ch) return next.match(matcher, i+Character.charCount(c), seq); } return false; } boolean study(TreeInfo info) {
21-07-2006