Name: yyT116575 Date: 11/22/2000
java version "1.3.0"
Java(TM) 2 Runtime Environment, Standard Edition (build 1.3.0-C)
Java HotSpot(TM) Client VM (build 1.3.0-C, mixed mode)
The problem with the UTF16 decoder is more readily noticable
in Java 1.1.x. It appears that some fixes were done in
version 1.2.x but it's not completely gone. I'll explain...
Despite requesting a single character from the reader,
the UTF16 decoder automatically buffers 8K of bytes! (This
part of the problem has NOT changed from JDK version to
version.) And what's worse is that the decoder throws an
internal Error if not supplied with "enough" bytes to do
its decoding. Not an Exception but an Error.
In other words, it assumes that it can always access a set
number of bytes in a block read and is unable to handle
insufficient bytes returned from a block read of the
underlying input stream. A simple example that illustates
this problem is an input stream (such as a socket
connection) that returns only 1 byte at a time, even in
block reads, because more data is not available at that
time.
In JDK 1.1.x the decoder always needs both bytes of every
character in order to perform the decoding operation,
otherwise it throws the Error. This has been partially
fixed in JDK 1.2.x so that it *can* handle block reads of
only a single character. However, this bug *still* remains
for the UTF16 BOM. The entire BOM has to be returned within
a single block read or the decoder fails.
/* Test case. */
import java.io.ByteArrayInputStream;
import java.io.FilterInputStream;
import java.io.FilterReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.IOException;
import java.io.Reader;
public class BrokenUTF16 {
// MAIN
public static void main(String[] argv) throws Exception {
System.out.println("#");
System.out.println("# Byte array");
System.out.println("#");
final byte[] bytes = {
(byte)0xFF, (byte)0xFE, // BOM
(byte)0xE5, (byte)0x65, (byte)0x2C, (byte)0x67
};
for (int i = 0; i < bytes.length; i++) {
int c = bytes[i] & 0x00FF;
System.out.println("byte["+i+"]: 0x"+Integer.toHexString(c));
}
System.out.println("#");
System.out.println("# Reading single byte: new InputStreamReader(bytes,\"UnicodeLittle\")");
System.out.println("#");
{
InputStream stream = new ByteArrayInputStream(bytes);
InputStream streamReporter = new InputStreamReporter(stream);
Reader reader = new InputStreamReader(streamReporter, "UnicodeLittle");
Reader readerReporter = new ReaderReporter(reader);
int c = readerReporter.read();
readerReporter.close();
}
System.out.println("#");
System.out.println("# Limited block reads");
System.out.println("#");
for (int i = 0; i < bytes.length; i++) {
final int limit = i + 1;
System.out.println("# block reads limited to "+limit+" byte"+(limit!=1?"s":""));
InputStream stream = new ByteArrayInputStream(bytes);
InputStream limitedStream = new LimitedInputStream(stream, limit);
InputStream streamReporter = new InputStreamReporter(limitedStream);
Reader reader = new InputStreamReader(streamReporter, "UnicodeLittle");
Reader readerReporter = new ReaderReporter(reader);
try {
int c = readerReporter.read();
}
catch (Error error) {
error.printStackTrace(System.out);
continue;
}
finally {
readerReporter.close();
}
break;
}
System.out.println("#");
System.out.println("# Limited block reads, skipping BOM");
System.out.println("#");
for (int i = 0; i < bytes.length - 2; i++) {
final int limit = i + 1;
System.out.println("# block reads limited to "+limit+" byte"+(limit!=1?"s":""));
InputStream stream = new ByteArrayInputStream(bytes, 2, bytes.length - 2);
InputStream limitedStream = new LimitedInputStream(stream, limit);
InputStream streamReporter = new InputStreamReporter(limitedStream);
Reader reader = new InputStreamReader(streamReporter, "UnicodeLittle");
Reader readerReporter = new ReaderReporter(reader);
try {
int c = readerReporter.read();
}
catch (Error error) {
error.printStackTrace(System.out);
continue;
}
finally {
readerReporter.close();
}
break;
}
System.out.println("#");
System.out.println("# Done.");
System.out.println("#");
}
// Classes
static class ReaderReporter extends FilterReader {
// Constructors
public ReaderReporter(Reader reader) {
super(reader);
}
// Reader methods
public int read() throws IOException {
int c = in.read();
System.out.print("Reader.read(): 0x");
if (c != -1) {
System.out.print(Integer.toHexString(c));
}
else {
System.out.print("EOF");
}
System.out.println();
return c;
}
public int read(char[] buffer, int offset, int length) throws IOException {
int count = super.in.read(buffer, offset, length);
System.out.println("Reader.read(char[],"+offset+','+length+"): "+count);
return count;
}
} // class ReaderReporter
static class InputStreamReporter extends FilterInputStream {
// Constructors
public InputStreamReporter(InputStream stream) {
super(stream);
}
// InputStream methods
public int read() throws IOException {
int c = in.read();
System.out.print("InputStream.read(): 0x");
if (c != -1) {
System.out.print(Integer.toHexString(c));
}
else {
System.out.print("EOF");
}
System.out.println();
return c;
}
public int read(byte[] buffer, int offset, int length) throws IOException {
int count = super.in.read(buffer, offset, length);
System.out.println("InputStream.read(byte[],"+offset+','+length+"): "+count);
return count;
}
} // class InputStreamReporter
static class LimitedInputStream extends FilterInputStream {
// Data
private int limit;
// Constructors
public LimitedInputStream(InputStream stream) {
this(stream, 2);
}
public LimitedInputStream(InputStream stream, int limit) {
super(stream);
this.limit = limit;
}
// InputStream methods
public int read(byte[] buffer, int offset, int length) throws IOException {
if (length > limit) {
length = limit;
}
int count = super.in.read(buffer, offset, length);
System.out.println("LimitedInputStream.read(byte[],"+offset+','+length+"): "+count);
return count;
}
} // class LimitedInputStream
} // class BrokenUTF16
(Review ID: 112650)
======================================================================