中文等字符转化为unicode(UTF-16 LE)

  |   3 |   Code |   javascript WWW 字符编码

嘛,挺不错的。
原文:http://blog.csdn.net/geovindu/article/details/9119725

利用的javascript的 charCodeAt
http://www.w3school.com.cn/jsref/jsref_charCodeAt.asp
相关函数还有unescape() 以及escape()
网页上的unicode码常见的形式除了"\uXXXX"还有"&#xXXXX;",其中“&#xXXXX;”是16进制,“&#XXXX”为10进制








This encoding utility requires JavaScript.

源码

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
<input id="i0" name="lang" onclick="refresh()" type="radio" checked/>
<label for="i0" title="\u00FF only">Java String Encoder</label>
<input id="i1" name="lang" onclick="refresh()" type="radio" />
<label for="i1" title="\xFF and \u0100">JavaScript String Encoder</label>
<input id="i2" name="lang" onclick="refresh()" type="radio" />
<label for="i2" title="\xFF and \u0100">Decoder</label>
<div>
    <textarea onkeyup="refresh()" rows="8" cols="100">// Paste some Java or JavaScript code into this window.
german = "Übergröße 塘㙍镇";
smilie = "☺";
</textarea>
</div>

This encoding utility requires JavaScript.

<script type="text/javascript">


// 127 bytes
var encodeJavaScriptString = function f(a, b)
{
  return ++b                                 //`b` is a number (including 0) when `replace` calls the function
    ? '\\' + (                               //all escape sequences start with a backslash
      (a = a.charCodeAt()) >> 12             //all characters from U+1000 and above
        ? 'u'                                //must start with `\u`
        : a >> 8                             //all characters from U+0100 to U+0FFF
          ? 'u0'                             //must start with `\u0`
          : 'x'                              //characters from U+007F to U+00FF can start with `\u00` or `\x`
      ) + a.toString(16).toUpperCase()       //add the upper case hex string (it does not contain leading zeros)
    : a.replace(/[^\0-~]/g, f)               //else call the function for all non-ASCII characters (all except U+0000 to U+007E)
}
// 115 bytes
var encodeJavaString = function e(a, b)
{
  return ++b                                 //`b` is a number when `replace` calls the function
    ? '\\u' +                                //in Java all escape sequences must start with `\u`
      ('00' + a.charCodeAt().toString(16))   //build a hex string with at least 4 characters
      .slice(-4).toUpperCase()               //use the last 4 characters and make them upper case
    : a.replace(/[^\0-~]/g, e)               //else call the function for all non-ASCII characters (all except U+0000 to U+007E)
}
// 89 bytes
var reconvert = function (str,b){ 
str = str.replace(/(\\u)(\w{4})/gi,function($0){ 
return (String.fromCharCode(parseInt((escape($0).replace(/(%5Cu)(\w{4})/g,"$2")),16))); 
}); 

str = str.replace(/(&#x)(\w{4});/gi,function($0){ 
return String.fromCharCode(parseInt(escape($0).replace(/(%26%23x)(\w{4})(%3B)/g,"$2"),16)); 
}); 
return str; 
} 
var refresh = function()
{
    var t = document.getElementsByTagName('TEXTAREA')[0];
    var p = document.getElementsByTagName('CODE')[0];
    if(document.getElementById('i1').checked)var f = encodeJavaScriptString;
    else if (document.getElementById('i0').checked)var f = encodeJavaString;
    else var f = reconvert;
    p.firstChild.data = f(t.value).replace(/\r\n/g, '\n');
}
refresh();
</script>
Comments
Write a Comment