<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>Playing on the frontier &#187; unicode</title>
	<atom:link href="http://siphon9.net/loune/tag/unicode/feed/" rel="self" type="application/rss+xml" />
	<link>http://siphon9.net/loune</link>
	<description></description>
	<lastBuildDate>Thu, 15 Jul 2010 12:58:06 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.org/?v=3.0.1</generator>
		<item>
		<title>Javascript snippet to convert raw UTF8 to unicode</title>
		<link>http://siphon9.net/loune/2009/10/javascript-snippet-to-convert-raw-utf8-to-unicode/</link>
		<comments>http://siphon9.net/loune/2009/10/javascript-snippet-to-convert-raw-utf8-to-unicode/#comments</comments>
		<pubDate>Sat, 03 Oct 2009 08:29:13 +0000</pubDate>
		<dc:creator>Loune</dc:creator>
				<category><![CDATA[Uncategorized]]></category>
		<category><![CDATA[javascript]]></category>
		<category><![CDATA[unicode]]></category>
		<category><![CDATA[useless]]></category>
		<category><![CDATA[utf-8]]></category>

		<guid isPermaLink="false">http://siphon9.net/loune/?p=101</guid>
		<description><![CDATA[For the I-don&#8217;t-a-sane-use-for-this department comes this piece of code which takes a stream of raw UTF-8 bytes, decodes it and fromCharCode it, rendering it in a unicode supported browser. A possible use would be if the web page character set is not UTF-8 and you want to display UTF-8. To use it, just put it [...]]]></description>
			<content:encoded><![CDATA[<p>For the I-don&#8217;t-a-sane-use-for-this department comes this piece of code which takes a stream of raw UTF-8 bytes, decodes it and fromCharCode it, rendering it in a unicode supported browser. A possible use would be if the web page character set is not UTF-8 and you want to display UTF-8. To use it, just put it in a script tag and call utf8decode(myrawutf8string). But seriously, all web pages should be UTF-8 by default nowadays. Here it is, in case anyone wants it:</p>
<pre class="brush: jscript;">
function TryGetCharUTF8(c, intc, b, i, count)
		{
			/*
			 * 10000000 80
			 * 11000000 C0
			 * 11100000 E0
			 * 11110000 F0
			 * 11111000 F8
			 * 11111100 FC
			 *
			 * FEFF = 65279 = BOM
			 *
			 * string musicalbassclef = &quot;&quot; + (char)0xD834 + (char)0xDD1E; 119070 0x1D11E
			 */

			if ((b.charCodeAt(i) &amp; 0x80) == 0)
			{
				intc = b.charCodeAt(i);
			}
			else
			{
				if ((b.charCodeAt(i) &amp; 0xE0) == 0xC0)
				{
					//if (i+1 &gt;= count) return false;
					intc = ((b.charCodeAt(i) &amp; 0x1F) &lt;&lt; 6) | ((b.charCodeAt(i + 1) &amp; 0x3F));

					i += 1;
				}
				else if ((b.charCodeAt(i) &amp; 0xF0) == 0xE0)
				{
					// 3 bytes Covers the rest of the BMP
					//if (i+2 &gt;= count) return false;
					intc = ((b.charCodeAt(i) &amp; 0xF) &lt;&lt; 12) | ((b.charCodeAt(i + 1) &amp; 0x3F) &lt;&lt; 6) | ((b.charCodeAt(i + 2) &amp; 0x3F));
					alert(b.charCodeAt(i) + ' '+b.charCodeAt(i + 1) +' '+b.charCodeAt(i + 2));
					i += 2;
				}
				else if ((b.charCodeAt(i) &amp; 0xF8) == 0xF0)
				{
					intc = ((b.charCodeAt(i) &amp; 0x7) &lt;&lt; 18) | ((b.charCodeAt(i + 1) &amp; 0x3F) &lt;&lt; 12) | ((b.charCodeAt(i + 2) &amp; 0x3F) &lt;&lt; 6) | ((b.charCodeAt(i + 3) &amp; 0x3F));

					i += 1;
				}
				else
					return false;
			}
window.utf8_out_intc = intc;
window.utf8_out_i = i;
			return true;
		}

function utf8decode(s) {
	var ss = &quot;&quot;;
	for(utf8_out_i = 0; utf8_out_i &lt; s.length; utf8_out_i++) {
		TryGetCharUTF8(window.utf8_out_c, window.utf8_out_intc, s, window.utf8_out_i, s.length);
		ss += String.fromCharCode(window.utf8_out_intc);
	}
	return ss;
}
</pre>
]]></content:encoded>
			<wfw:commentRss>http://siphon9.net/loune/2009/10/javascript-snippet-to-convert-raw-utf8-to-unicode/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
	</channel>
</rss>
