|
2#
楼主 |
发表于 2007-11-22 17:50:08
|
只看该作者
| (((int)(byte[index + 2] & 0x3F)) << 6)
| (byte[index + 3] & 0x3F);
}
else if ((byte[index] & 0xFC) == 0xF8) // 五位
{
if (index + 4 >= count) return false;
unicode = (((int)(byte[index] & 0x03)) << 24)
| (((int)(byte[index + 1] & 0x3F)) << 18)
| (((int)(byte[index + 2] & 0x3F)) << 12)
| (((int)(byte[index + 3] & 0x3F)) << 6)
| (byte[index + 4] & 0x3F);
}
else if ((byte[index] & 0xFE) == 0xFC) // 六位
{
if (index + 5 >= count) return false;
unicode = (((int)(byte[index] & 0x01)) << 30)
| (((int)(byte[index + 1] & 0x3F)) << 24)
| (((int)(byte[index + 2] & 0x3F)) << 18)
| (((int)(byte[index + 3] & 0x3F)) << 12)
| (((int)(byte[index + 4] & 0x3F)) << 6)
| (byte[index + 5] & 0x3F);
}
else
{
return false;
}
return true;
}
bool char2digist(char in, char&out)
{
if ('0' <= in && in <= '9')
out = in - '0' + 0x0;
else if ('A' <= in && in <= 'F')
out = in - 'A' + 0xA;
else if ('a' <= in && in <= 'f')
out = in - 'a' + 0xa;
else
return false;
return true;
}
bool widechar2hexbyte(char* ch, int index, int count, unsigned int& byte)
{
char h, l;
if (index + 1 < count) {
if (char2digist(ch[index], h) && char2digist(ch[index + 1], l))
{
byte = ((unsigned int)(h << 4)) | l;
return true;
}
} else {
if (char2digist(ch[index], l))
{
byte = l;
return true;
}
}
return false;
}
int main(int argc, char* argv[])
{
int bi, i, len, unicode;
char* hex;
unsigned int bytes[10];
if (argc < 2) {
printf("usage: utf82unicode [hex string]n");
return 1;
}
bi = 0, len = strlen(argv[1]);
// printf("argv[1]:%s,len:%dn", argv[1], len);
for (int i = 0; i < len && bi < 10; ++ i)
{
if (!widechar2hexbyte(argv[1], i++, len, bytes[bi++]))
return 1;
}
unicode = 0;
if (utf82unicode(bytes, 0, bi, unicode))
{
printf("unicode: %0Xn", unicode);
return 0;
}
return 1;
} |
|