FULL PRODUCT VERSION : 1.6.0_10-rc2-b32 ADDITIONAL OS VERSION INFORMATION : Windows XP SR-2 A DESCRIPTION OF THE PROBLEM : I have discovered 4 charsets, which don't completely contain the charsets determined by method contains(): - ISO-8859-15 should contain ISO-8859-1 - UTF-8 should contain ISO-8859-1 - EUC-JP should contain JIS-X-0208 - ISO-2022-JP should contain US-ASCII I assume, there are more which don't match. STEPS TO FOLLOW TO REPRODUCE THE PROBLEM : Run JUnit test below. REPRODUCIBILITY : This bug can be reproduced always. ---------- BEGIN SOURCE ---------- import java.nio.*; import java.nio.charset.*; import java.util.*; import org.junit.*; import static org.junit.Assert.*; /** * * @author Ulf.Zibis @ CoSoCo.de */ public class CharsetContainsTest { // test parameters: private static final int BYTE_RANGE = 1 << Byte.SIZE; private static final byte[] IN_BYTES = new byte[BYTE_RANGE]; static { for (int b = 0; b < BYTE_RANGE;) IN_BYTES[b] = (byte)b++; } // parameters: private ByteBuffer inBytes; public CharsetContainsTest() {} @Before public void setUp() throws Exception { inBytes = ByteBuffer.wrap(IN_BYTES); System.out.println(); } @Test public void testISO8859_15_ISO8859_1() throws CharacterCodingException { inBytes = ByteBuffer.wrap(IN_BYTES); Charset ISO8859_1 = Charset.forName("ISO-8859-1"); Charset ISO8859_15 = Charset.forName("ISO-8859-15"); assertTrue(ISO8859_15.contains(ISO8859_1)); CharBuffer ISO8859_1Chars = ISO8859_1.decode(inBytes); CharBuffer ISO8859_15Chars = ISO8859_15.decode((ByteBuffer)inBytes.rewind()); System.out.printf("ISO8859_1Chars: "); for (char c : ISO8859_1Chars.array()) System.out.printf("\\u%04X,", (int)c); System.out.println(); System.out.printf("ISO8859_15Chars: "); for (char c : ISO8859_15Chars.array()) System.out.printf("\\u%04X,", (int)c); System.out.println(); assertArrayEquals(ISO8859_1Chars.array(), ISO8859_15Chars.array()); } @Test public void testUTF_8_ISO8859_1() throws CharacterCodingException { inBytes = ByteBuffer.wrap(IN_BYTES); Charset ISO8859_1 = Charset.forName("ISO-8859-1"); Charset UTF_8 = Charset.forName("UTF-8"); assertTrue(UTF_8.contains(ISO8859_1)); CharBuffer ISO8859_1Chars = ISO8859_1.decode(inBytes); CharBuffer UTF_8Chars = UTF_8.decode((ByteBuffer)inBytes.rewind()); System.out.printf("ISO8859_1Chars: "); for (char c : ISO8859_1Chars.array()) System.out.printf("\\u%04X,", (int)c); System.out.println(); System.out.printf("UTF_8Chars: "); for (char c : UTF_8Chars.array()) System.out.printf("\\u%04X,", (int)c); System.out.println(); assertArrayEquals(ISO8859_1Chars.array(), UTF_8Chars.array()); } @Test public void testEUC_JP_JIS_X0208() throws CharacterCodingException { inBytes = ByteBuffer.wrap(IN_BYTES); Charset JIS_X0208 = Charset.forName("x-JIS0208"); Charset EUC_JP = Charset.forName("EUC-JP"); assertTrue(EUC_JP.contains(JIS_X0208)); CharBuffer JIS_X0208Chars = JIS_X0208.decode(inBytes); CharBuffer EUC_JPChars = EUC_JP.decode((ByteBuffer)inBytes.rewind()); System.out.printf("JIS_X0208: "); for (char c : JIS_X0208Chars.array()) System.out.printf("\\u%04X,", (int)c); System.out.println(); System.out.printf("EUC-JP: "); for (char c : EUC_JPChars.array()) System.out.printf("\\u%04X,", (int)c); System.out.println(); assertArrayEquals(JIS_X0208Chars.array(), EUC_JPChars.array()); } @Test public void testISO2022_JP_US_ASCII() throws CharacterCodingException { inBytes = ByteBuffer.wrap(Arrays.copyOf(IN_BYTES, 0x80)); Charset US_ASCII = Charset.forName("US-ASCII"); Charset ISO2022_JP = Charset.forName("ISO-2022-JP"); assertTrue(ISO2022_JP.contains(US_ASCII)); CharBuffer US_ASCIIChars = US_ASCII.decode(inBytes); CharBuffer ISO2022_JPChars = ISO2022_JP.decode((ByteBuffer)inBytes.rewind()); System.out.printf("US_ASCIIChars: "); for (char c : US_ASCIIChars.array()) System.out.printf("\\u%04X,", (int)c); System.out.println(); System.out.printf("ISO2022_JPChars: "); for (char c : ISO2022_JPChars.array()) System.out.printf("\\u%04X,", (int)c); System.out.println(); assertArrayEquals(US_ASCIIChars.array(), ISO2022_JPChars.array()); } } ---------- END SOURCE ----------
|