\ufffd, aka REPLACE_CHAR, should be output by decoders decodeXXXLoop only if the input bytes have the semantics of a REPLACE_CHAR, not simply because the input is malformed or unmappable. After all, there is MALFORMED[n] and UNMAPPABLE[n]. -------------------------------------------------------- public class Decode { private static boolean isAscii(char c) { return c < '\u0080'; } private static boolean isPrintable(char c) { return ('\u0020' < c) && (c < '\u007f'); } public static void main(String[] args) throws Throwable { if (args.length < 2) throw new Exception("Usage: java Decode CHARSET BYTE [BYTE ...]"); String cs = args[0]; byte[] bytes = new byte[args.length-1]; for (int i = 1; i < args.length; i++) { String arg = args[i]; bytes[i-1] = (arg.length() == 1 && isAscii(arg.charAt(0))) ? (byte) arg.charAt(0) : arg.equals("ESC") ? 0x1b : arg.equals("SO") ? 0x0e : arg.equals("SI") ? 0x0f : arg.equals("SS2") ? (byte) 0x8e : arg.equals("SS3") ? (byte) 0x8f : arg.matches("0x.*") ? Integer.decode(arg).byteValue() : Integer.decode("0x"+arg).byteValue(); } String s = new String(bytes, cs); for (int j = 0; j < s.length(); j++) { if (j > 0) System.out.print(' '); char c = s.charAt(j); if (isPrintable(c)) System.out.print(c); else if (c == '\u001b') System.out.print("ESC"); else System.out.printf("\\u%04x", (int) c); } System.out.print("\n"); } } -------------------------------------------------------- $ jver 6 javac Decode.java && for cs in ISO-2022-JP ISO-2022-JP-2 x-windows-50220 x-windows-50221 x-windows-iso2022jp ; do echo $cs; jver 6 java Decode $cs ESC 24 40 00 00; done; echo EUC-TW ; jver 6 java Decode EUC-TW 8e 98 ad e5 ISO-2022-JP \ufffd ISO-2022-JP-2 \ufffd x-windows-50220 \ufffd x-windows-50221 \ufffd x-windows-iso2022jp \ufffd EUC-TW \ufffd
|