package java.io; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import sun.nio.cs.StreamDecoder; public class InputStreamReader extends Reader { private final StreamDecoder sd; /** * Creates an InputStreamReader that uses the default charset. * * @param in An InputStream */ public InputStreamReader(InputStream in) { super(in); try { sd = StreamDecoder.forInputStreamReader(in, this, (String)null); // ## check lock object } catch (UnsupportedEncodingException e) { // The default encoding should always be available throw new Error(e); } } /** * Creates an InputStreamReader that uses the named charset. * * @param in * An InputStream * * @param charsetName * The name of a supported * {@link java.nio.charset.Charset charset} * * @exception UnsupportedEncodingException * If the named charset is not supported */ public InputStreamReader(InputStream in, String charsetName) throws UnsupportedEncodingException { super(in); if (charsetName == null) throw new NullPointerException("charsetName"); sd = StreamDecoder.forInputStreamReader(in, this, charsetName); } /** * Creates an InputStreamReader that uses the given charset.
* * @param in An InputStream * @param cs A charset * * @since 1.4 * @spec JSR-51 */ public InputStreamReader(InputStream in, Charset cs) { super(in); if (cs == null) throw new NullPointerException("charset"); sd = StreamDecoder.forInputStreamReader(in, this, cs); } /** * Creates an InputStreamReader that uses the given charset decoder.
* * @param in An InputStream * @param dec A charset decoder * * @since 1.4 * @spec JSR-51 */ public InputStreamReader(InputStream in, CharsetDecoder dec) { super(in); if (dec == null) throw new NullPointerException("charset decoder"); sd = StreamDecoder.forInputStreamReader(in, this, dec); } }InputStreamReader内部包含一个StreamDecoder实例引用,对具体字节到字符的解码实现,其实是由StreamDecoder来完成的,在StreamDecoder解码过程中必须由用户指定Charset编码格式,若用户未指定Charset,则将使用本地环境中的默认字符集,如在中文环境中将使用GBK编码。
package java.io; import java.nio.charset.Charset; import java.nio.charset.CharsetEncoder; import sun.nio.cs.StreamEncoder; public class OutputStreamWriter extends Writer { private final StreamEncoder se; /** * Creates an OutputStreamWriter that uses the named charset. * * @param out * An OutputStream * * @param charsetName * The name of a supported * {@link java.nio.charset.Charset charset} * * @exception UnsupportedEncodingException * If the named encoding is not supported */ public OutputStreamWriter(OutputStream out, String charsetName) throws UnsupportedEncodingException { super(out); if (charsetName == null) throw new NullPointerException("charsetName"); se = StreamEncoder.forOutputStreamWriter(out, this, charsetName); } /** * Creates an OutputStreamWriter that uses the default character encoding. * * @param out An OutputStream */ public OutputStreamWriter(OutputStream out) { super(out); try { se = StreamEncoder.forOutputStreamWriter(out, this, (String)null); } catch (UnsupportedEncodingException e) { throw new Error(e); } } /** * Creates an OutputStreamWriter that uses the given charset.
* * @param out * An OutputStream * * @param cs * A charset * * @since 1.4 * @spec JSR-51 */ public OutputStreamWriter(OutputStream out, Charset cs) { super(out); if (cs == null) throw new NullPointerException("charset"); se = StreamEncoder.forOutputStreamWriter(out, this, cs); } /** * Creates an OutputStreamWriter that uses the given charset encoder.
* * @param out * An OutputStream * * @param enc * A charset encoder * * @since 1.4 * @spec JSR-51 */ public OutputStreamWriter(OutputStream out, CharsetEncoder enc) { super(out); if (enc == null) throw new NullPointerException("charset encoder"); se = StreamEncoder.forOutputStreamWriter(out, this, enc); } }
以下是使用InputStreamReader和OutputStreamWriter进行字节流到字符流的一个简单示例。
public static void main(String[] args) throws IOException { String file = "d:/stream.txt"; String charset = "UTF-8"; String string = "这是要保存的中文字符"; FileOutputStream fos = new FileOutputStream(file); OutputStreamWriter osw = new OutputStreamWriter(fos, charset); try { osw.write(string); } finally { osw.close(); } FileInputStream fis = new FileInputStream(file); InputStreamReader isr = new InputStreamReader(fis, charset); StringBuffer sb = new StringBuffer(); char[] buf = new char[64]; int count = 0; try { while ((count = isr.read(buf)) != -1) { sb.append(buf, 0, count); } } finally { isr.close(); } System.out.println(sb.toString()); }
String类提供了转换到字节的方法,也支持将字节转换为字符串的构造函数。
public static void main(String[] args) throws UnsupportedEncodingException { String s="这是一段中文字符串"; byte[] bytes=s.getBytes("UTF-8"); String string=new String(bytes,"UTF-8"); }
Charset类提供encode()与decode(),分别对应char[]到byte[]的编码和byte[]到char[]的解码。
public static void main(String[] args) { Charset cs = Charset.forName("UTF-8"); ByteBuffer byteBuffer = cs.encode("这是要编码的字符串"); CharBuffer charBuffer = cs.decode(byteBuffer); }
ByteBuffer提供一种char和byte之间的软转换,它们之间转换不需要编码和解码,只是把一个16bit的char拆分为2个8bit的byte表示,它们的实际值并没有被修改,仅仅是数据的类型做了转换。
public static void main(String[] args) { ByteBuffer heapByteBuffer = ByteBuffer.allocate(1024); ByteBuffer buffer = heapByteBuffer.putChar('中'); System.out.print(Integer.toBinaryString(buffer.get(0)) + " "); System.out.print(Integer.toBinaryString(buffer.get(1))); }
打印结果:
1001110 101101
编码问题(char-encoding-problem)典型示例:
public class EncodeTest { static String toHexString(byte[] bytes) { StringBuilder sb = new StringBuilder(""); if (bytes == null || bytes.length == 0) { return null; } for (int i = 0; i < bytes.length; i++) { int v = bytes[i] & 0xFF; String hv = Integer.toHexString(v); sb.append(hv + " "); } return sb.toString(); } static String toHexString(char[] chars) { StringBuilder sb = new StringBuilder(""); if (chars == null || chars.length == 0) { return null; } for (int i = 0; i < chars.length; i++) { String hv = Integer.toHexString((int) chars[i]); sb.append(hv + " "); } return sb.toString(); } public static void main(String[] args) { String string = "I am 李"; // Unicode十进制数值为: 73 32 97 109 32 26446 // Unicode十六进制字符串: 49 20 61 6d 20 674e // Unicode二进字符串: 01001001 00100000 01100001 01101101 00100000 0110011101001110 try { byte[] iso8859 = string.getBytes("ISO-8859-1"); byte[] gb2312 = string.getBytes("GB2312"); byte[] gbk = string.getBytes("GBK"); byte[] utf16 = string.getBytes("UTF-16"); byte[] utf8 = string.getBytes("UTF-8"); System.out.println(toHexString(string.toCharArray())); // 输出结果:49 20 61 6d 20 674e /** * ISO-8859-1编码会将不支持的字符编码为3f,即"?"字符。 */ System.out.println(toHexString(iso8859)); // 输出结果:49 20 61 6d 20 3f /** * GB2312字符集有一个从char到byte的码表,不同的字符编码就是从这个码表找到与每个字符对应的字节,然后拼装成byte数组。 */ System.out.println(toHexString(gb2312)); // 输出结果:49 20 61 6d 20 c0 ee /** * GBK编码兼容GB2312编码,且GBK包含的汉字字符更多。 */ System.out.println(toHexString(gbk)); // 输出结果:49 20 61 6d 20 c0 ee /** * UTF-16仅将字符的高位与低位进行拆分变成两个字节,特点是编码效率非常高,规则很简单 * 前面用两个字节来保存BYTE_ORDER_MARK值,用来区分是高位字节在前,或者低位字节在前。 */ System.out.println(toHexString(utf16)); // 输出结果:fe ff 0 49 0 20 0 61 0 6d 0 20 67 4e /** * UTF-8编码也不用查表,效率很高,变长存储节省空间。 */ System.out.println(toHexString(utf8)); // 输出结果:49 20 61 6d 20 e6 9d 8e } catch (Exception e) { e.printStackTrace(); } } }
Apache Tomcat对URL的URI部分进行解码的字符集是在Connector的
CATALINA_HOME\conf\server.xml中修改Connector 配置如下:
URL中以Get方式请求的QueryString的解码是在request.getParameter()方法第一次被调用时进行的,解码字符集要么是Header中ContentType定义的Charset,要么是默认的ISO-8859-1,要使用ContentType中定义的编码,就要将Connector的
<script src="script.js" charset="gbk"></script>
如果引入的script.js脚本中有如下代码:
document.write("这是一段中文");
这时如果script没有设置cahrset,浏览器就会以当前这个页面的默认字符集解析这个JS文件。当script.js文件与当前页面的编码格式不一致时,就会出现乱码。
Velocity模板设置编码的格式如下:
services.VelocityService.input.encoding=UTF-8JSP设置编码的格式如下:
<%@page contentType="text/html;charset=UTF-8"%>