JDK-6761481 : Charset#contains() should be examined
  • Type: Bug
  • Component: core-libs
  • Sub-Component: java.nio.charsets
  • Affected Version: 6
  • Priority: P4
  • Status: Open
  • Resolution: Unresolved
  • OS: windows_xp
  • CPU: x86
  • Submitted: 2008-10-20
  • Updated: 2011-02-16
Description
FULL PRODUCT VERSION :
1.6.0_10-rc2-b32

ADDITIONAL OS VERSION INFORMATION :
Windows XP SR-2

A DESCRIPTION OF THE PROBLEM :
I have discovered 4 charsets, which don't completely contain the charsets determined by method contains():

- ISO-8859-15 should contain ISO-8859-1
- UTF-8 should contain ISO-8859-1
- EUC-JP should contain JIS-X-0208
- ISO-2022-JP should contain US-ASCII

I assume, there are more which don't match.


STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
Run JUnit test below.


REPRODUCIBILITY :
This bug can be reproduced always.

---------- BEGIN SOURCE ----------
import java.nio.*;
import java.nio.charset.*;
import java.util.*;
import org.junit.*;
import static org.junit.Assert.*;

/**
 *
 * @author Ulf.Zibis @ CoSoCo.de
 */
public class CharsetContainsTest {

    // test parameters:
    private static final int BYTE_RANGE = 1 << Byte.SIZE;
    private static final byte[] IN_BYTES = new byte[BYTE_RANGE];
    static {
        for (int b = 0; b < BYTE_RANGE;)
            IN_BYTES[b] = (byte)b++;
    }
    // parameters:
    private ByteBuffer inBytes;

    public CharsetContainsTest() {}

    @Before
    public void setUp() throws Exception {
        inBytes = ByteBuffer.wrap(IN_BYTES);
        System.out.println();
    }

    @Test
    public void testISO8859_15_ISO8859_1() throws CharacterCodingException {
        inBytes = ByteBuffer.wrap(IN_BYTES);
        Charset ISO8859_1 = Charset.forName("ISO-8859-1");
        Charset ISO8859_15 = Charset.forName("ISO-8859-15");
        assertTrue(ISO8859_15.contains(ISO8859_1));
        CharBuffer ISO8859_1Chars = ISO8859_1.decode(inBytes);
        CharBuffer ISO8859_15Chars = ISO8859_15.decode((ByteBuffer)inBytes.rewind());
        System.out.printf("ISO8859_1Chars:  ");
        for (char c : ISO8859_1Chars.array())
            System.out.printf("\\u%04X,", (int)c);
        System.out.println();
        System.out.printf("ISO8859_15Chars: ");
        for (char c : ISO8859_15Chars.array())
            System.out.printf("\\u%04X,", (int)c);
        System.out.println();
        assertArrayEquals(ISO8859_1Chars.array(), ISO8859_15Chars.array());
    }

    @Test
    public void testUTF_8_ISO8859_1() throws CharacterCodingException {
        inBytes = ByteBuffer.wrap(IN_BYTES);
        Charset ISO8859_1 = Charset.forName("ISO-8859-1");
        Charset UTF_8 = Charset.forName("UTF-8");
        assertTrue(UTF_8.contains(ISO8859_1));
        CharBuffer ISO8859_1Chars = ISO8859_1.decode(inBytes);
        CharBuffer UTF_8Chars = UTF_8.decode((ByteBuffer)inBytes.rewind());
        System.out.printf("ISO8859_1Chars:  ");
        for (char c : ISO8859_1Chars.array())
            System.out.printf("\\u%04X,", (int)c);
        System.out.println();
        System.out.printf("UTF_8Chars:      ");
        for (char c : UTF_8Chars.array())
            System.out.printf("\\u%04X,", (int)c);
        System.out.println();
        assertArrayEquals(ISO8859_1Chars.array(), UTF_8Chars.array());
    }

    @Test
    public void testEUC_JP_JIS_X0208() throws CharacterCodingException {
        inBytes = ByteBuffer.wrap(IN_BYTES);
        Charset JIS_X0208 = Charset.forName("x-JIS0208");
        Charset EUC_JP = Charset.forName("EUC-JP");
        assertTrue(EUC_JP.contains(JIS_X0208));
        CharBuffer JIS_X0208Chars = JIS_X0208.decode(inBytes);
        CharBuffer EUC_JPChars = EUC_JP.decode((ByteBuffer)inBytes.rewind());
        System.out.printf("JIS_X0208:       ");
        for (char c : JIS_X0208Chars.array())
            System.out.printf("\\u%04X,", (int)c);
        System.out.println();
        System.out.printf("EUC-JP:          ");
        for (char c : EUC_JPChars.array())
            System.out.printf("\\u%04X,", (int)c);
        System.out.println();
        assertArrayEquals(JIS_X0208Chars.array(), EUC_JPChars.array());
    }

    @Test
    public void testISO2022_JP_US_ASCII() throws CharacterCodingException {
        inBytes = ByteBuffer.wrap(Arrays.copyOf(IN_BYTES, 0x80));
        Charset US_ASCII = Charset.forName("US-ASCII");
        Charset ISO2022_JP = Charset.forName("ISO-2022-JP");
        assertTrue(ISO2022_JP.contains(US_ASCII));
        CharBuffer US_ASCIIChars = US_ASCII.decode(inBytes);
        CharBuffer ISO2022_JPChars = ISO2022_JP.decode((ByteBuffer)inBytes.rewind());
        System.out.printf("US_ASCIIChars:   ");
        for (char c : US_ASCIIChars.array())
            System.out.printf("\\u%04X,", (int)c);
        System.out.println();
        System.out.printf("ISO2022_JPChars: ");
        for (char c : ISO2022_JPChars.array())
            System.out.printf("\\u%04X,", (int)c);
        System.out.println();
        assertArrayEquals(US_ASCIIChars.array(), ISO2022_JPChars.array());
    }
}

---------- END SOURCE ----------

Comments
EVALUATION yes, need a full review of the repository, if resource permitted.
19-11-2008