JDK-6402819 : String(a, charset) slower than String(a, charsetname)
  • Type: Bug
  • Component: core-libs
  • Sub-Component: java.lang
  • Affected Version: 6
  • Priority: P2
  • Status: Resolved
  • Resolution: Fixed
  • OS: generic
  • CPU: generic
  • Submitted: 2006-03-23
  • Updated: 2012-01-11
  • Resolved: 2006-06-04
The Version table provides details related to the release that this issue/RFE will be addressed.

Unresolved : Release in which this issue/RFE will be addressed.
Resolved: Release in which this issue/RFE has been resolved.
Fixed : Release in which this issue/RFE has been fixed. The release containing this fix may be available for download as an Early Access Release or a General Availability Release.

To download the current JDK release, click here.
JDK 6
6 b87Fixed
Related Reports
Relates :  
Relates :  
Description
5005831: String constructors and method which take Charset rather than String as argument

introduced new constructors for String that take a Charset.
One would expect that these would be
uniformly faster than the equivalent constructors that take a String,
since the Charset lookup can be elided.

However, it appears that special String-based name optimizations in StringCoding foil that.

For the particular important case of ASCII or Latin-1 text,
we want to discourage the use of the deprecated constructors,
but the benchmark numbers cannot support such a recommendation.

The slowdown is only for small strings, of course.

Here's a microbenchmark, and a sample run:
----------------------------------------------------
import java.nio.*;
import java.nio.charset.*;
import java.util.*;
import java.util.concurrent.*;

public class Latin1StringMicroBenchmark {
    abstract static class Job {
	private final String name;
	public Job(String name) { this.name = name; }
	public String name() { return name; }
	public abstract void work() throws Throwable;
    }

    private static final long SECOND = 1000L*1000L*1000L;

    private static void collectAllGarbage() {
	try {
	    for (int i = 0; i < 2; i++) {
		System.gc();
		Thread.sleep(10);
		System.runFinalization();
		Thread.sleep(10);
	    }
	} catch (InterruptedException e) { throw new Error(e); }
    }

    /**
     * Runs each job for at least 10 seconds.
     * Returns array of average times per job per run.
     */
    private static long[] time0(Job ... jobs) throws Throwable {
	long[] nanoss = new long[jobs.length];
	for (int i = 0; i < jobs.length; i++) {
	    collectAllGarbage();
	    long t0 = System.nanoTime();
	    long t;
	    int j = 0;
	    do { jobs[i].work(); j++; }
	    while ((t = System.nanoTime() - t0) < 10L * SECOND);
	    nanoss[i] = t/j;
	}
	return nanoss;
    }

    private static void time(Job ... jobs) throws Throwable {

	long[] warmup = time0(jobs); // Warm up run
	long[] nanoss = time0(jobs); // Real timing run

	final String nameHeader = "Method";
	int nameWidth  = nameHeader.length();
	for (Job job : jobs)
	    nameWidth = Math.max(nameWidth, job.name().length());

	final String millisHeader = "Millis";
	int millisWidth  = millisHeader.length();
	for (long nanos : nanoss)
	    millisWidth =
		Math.max(millisWidth,
			 String.format("%d", nanos/(1000L * 1000L)).length());

	final String ratioHeader = "Ratio";
	int ratioWidth = ratioHeader.length();

	String format = String.format("%%-%ds %%%dd %%.3f%%n",
				      nameWidth, millisWidth);
	String headerFormat = String.format("%%-%ds %%-%ds %%-%ds%%n",
					    nameWidth, millisWidth, ratioWidth);
	System.out.printf(headerFormat, "Method", "Millis", "Ratio");

	// Print out absolute and relative times, calibrated against first job
	for (int i = 0; i < jobs.length; i++) {
	    long millis = nanoss[i]/(1000L * 1000L);
	    double ratio = (double)nanoss[i] / (double)nanoss[0];
	    System.out.printf(format, jobs[i].name(), millis, ratio);
	}
    }

    private static int intArg(String[] args, int i, int defaultValue) {
	return args.length > i ? Integer.parseInt(args[i]) : defaultValue;
    }

    public static void main(String[] args) throws Throwable {
	final int length = intArg(args, 0, 1000);
	final int iterations = intArg(args, 1, (int) (100000L * 1000L/length));

	final byte[] latin1Bytes = new byte[length];
	new Random().nextBytes(latin1Bytes);
	final String expected = new String(latin1Bytes, "ISO-8859-1");
	final String[] out = new String[1];
	out[0] = "poopie";

	time(
	    new Job("String(byte[], int hibyte)") {
		@SuppressWarnings("deprecation")
		public void work() throws Throwable {
		    for (int i = 0; i < iterations; i++) {
			out[0] = new String(latin1Bytes, 0);
		    }
		    if (! out[0].equals(expected)) throw new Error();
		}},
	    new Job("String(char[], int offset, int length)") {
		public void work() throws Throwable {
		    char[] chars = new char[2*length];
		    for (int i = 0; i < iterations; i++) {
			for (int j = 0; j < latin1Bytes.length; j++)
			    chars[j] = (char) (latin1Bytes[j] & 0xff);
			out[0] = new String(chars, 0, latin1Bytes.length);
		    }
		    if (! out[0].equals(expected)) throw new Error();
		}},
	    new Job("String(byte[], Charset cs)") {
		public void work() throws Throwable {
		    Charset cs = Charset.forName("ISO-8859-1");
		    for (int i = 0; i < iterations; i++) {
			out[0] = new String(latin1Bytes, cs);
		    }
		    if (! out[0].equals(expected)) throw new Error();
		}},
	    new Job("String(byte[], String csn)") {
		public void work() throws Throwable {
		    for (int i = 0; i < iterations; i++) {
			out[0] = new String(latin1Bytes, "ISO-8859-1");
		    }
		    if (! out[0].equals(expected)) throw new Error();
		}},
	    new Job("CharsetDecoder.decode(ByteBuffer, CharBuffer, true)") {
		public void work() throws Throwable {
		    CharBuffer cb = CharBuffer.allocate(2*length);
		    CharsetDecoder coder =
			Charset.forName("ISO-8859-1").newDecoder();
		    for (int i = 0; i < iterations; i++) {
			ByteBuffer bb = ByteBuffer.wrap(latin1Bytes);
			cb.clear();
			coder.decode(bb, cb, true);
			cb.flip();
			out[0] = cb.toString();
		    }
		    if (! out[0].equals(expected)) throw new Error();
		}}
	    );
    }
}
-------------------------------------------------------
 ~/src/toy $ for size in 1 10 100 1000; do echo $size -----; jver mustang jr Latin1StringMicroBenchmark $size; done
1 -----
==> javac -source 1.6 -Xlint:all Latin1StringMicroBenchmark.java
==> java -esa -ea Latin1StringMicroBenchmark 1
Method                                              Millis Ratio
String(byte[], int hibyte)                           12413 1.000
String(char[], int offset, int length)               12360 0.996
String(byte[], Charset cs)                          111204 8.959
String(byte[], String csn)                           63524 5.118
CharsetDecoder.decode(ByteBuffer, CharBuffer, true)  47278 3.809
10 -----
==> javac -source 1.6 -Xlint:all Latin1StringMicroBenchmark.java
==> java -esa -ea Latin1StringMicroBenchmark 10
Method                                              Millis Ratio
String(byte[], int hibyte)                            1805 1.000
String(char[], int offset, int length)                2622 1.452
String(byte[], Charset cs)                           11342 6.282
String(byte[], String csn)                            6688 3.704
CharsetDecoder.decode(ByteBuffer, CharBuffer, true)   5118 2.834
100 -----
==> javac -source 1.6 -Xlint:all Latin1StringMicroBenchmark.java
==> java -esa -ea Latin1StringMicroBenchmark 100
Method                                              Millis Ratio
String(byte[], int hibyte)                            1061 1.000
String(char[], int offset, int length)                1183 1.114
String(byte[], Charset cs)                            1964 1.850
String(byte[], String csn)                            1471 1.386
CharsetDecoder.decode(ByteBuffer, CharBuffer, true)   1461 1.377
1000 -----
==> javac -source 1.6 -Xlint:all Latin1StringMicroBenchmark.java
==> java -esa -ea Latin1StringMicroBenchmark 1000
Method                                              Millis Ratio
String(byte[], int hibyte)                            1066 1.000
String(char[], int offset, int length)                1000 0.938
String(byte[], Charset cs)                            1044 0.979
String(byte[], String csn)                             963 0.903
CharsetDecoder.decode(ByteBuffer, CharBuffer, true)   1058 0.992

Comments
EVALUATION We should investigate this performance problem and provide a solution as soon as possible.
08-05-2006