199 lines
3.5 KiB
C
199 lines
3.5 KiB
C
#include <string.h>
|
||
#include <stdlib.h>
|
||
#include <stdio.h>
|
||
#include <common/bk_typedef.h>
|
||
#include <common/bk_include.h>
|
||
#include <os/mem.h>
|
||
|
||
#if CONFIG_USE_CONV_UTF8
|
||
#include "conv_utf8_gb2312_table.h"
|
||
#include "conv_utf8_pub.h"
|
||
static int conv_utf8_for_gb2312(unsigned char *pin, unsigned char *pout)
|
||
{
|
||
unsigned char hi_bits, low_bit;
|
||
const unsigned char *utf8;
|
||
unsigned short gb_code, dist_cnt;
|
||
|
||
hi_bits = *pin & 0xff;
|
||
low_bit = *(pin + 1) & 0xff;
|
||
//!“#¥%&‘() * + , - . / A3A1~A3AF
|
||
if((hi_bits == 0xA3) &&
|
||
(low_bit >= 0xA1) && (low_bit <= 0xAF)){
|
||
utf8 = t_gb2312_utf8_ex[low_bit - 0xA1];
|
||
memcpy(pout, utf8, 3);
|
||
return 1;
|
||
}
|
||
// chinese char start from 0xB0A1 to 0xF7FE
|
||
if ((hi_bits < 0xB0) || (hi_bits > 0xF7))
|
||
return 0;
|
||
if ((low_bit < 0xA1) || (low_bit > 0xFE))
|
||
return 0;
|
||
|
||
gb_code = (hi_bits << 8) + low_bit;
|
||
|
||
// there are 0xa2 banks between two district
|
||
dist_cnt = hi_bits - 0xB0;
|
||
dist_cnt = dist_cnt * (0xA1 + 0x01);
|
||
|
||
// shift pos in table
|
||
if (gb_code > 0xd7f9)
|
||
gb_code -= 5;
|
||
gb_code = gb_code - 0xB0A1 - dist_cnt;
|
||
utf8 = t_gb2312_utf8[gb_code];
|
||
|
||
memcpy(pout, utf8, 3);
|
||
|
||
return 1;
|
||
}
|
||
|
||
unsigned char *conv_utf8(unsigned char *input)
|
||
{
|
||
int len;
|
||
unsigned char *ptr, *out_bak, *out;
|
||
|
||
len = strlen((char *)input) + 1;
|
||
len = (len / 2) * 3 + 1;
|
||
|
||
out_bak = (unsigned char *)os_malloc(len);
|
||
if (!out_bak)
|
||
return NULL;
|
||
|
||
ptr = input;
|
||
out = out_bak;
|
||
|
||
while (*ptr) {
|
||
if (conv_utf8_for_gb2312(ptr, out)) {
|
||
out += 3;
|
||
ptr += 2;
|
||
} else {
|
||
*out = *ptr;
|
||
out++;
|
||
ptr++;
|
||
}
|
||
}
|
||
*out = 0;
|
||
|
||
|
||
//for(i=0; out_bak[i] != 0; i++)
|
||
// os_printf("%02x", out_bak[i]);
|
||
|
||
//os_printf("\r\n");
|
||
|
||
return out_bak;
|
||
}
|
||
|
||
unsigned short SearchCodeTable(unsigned short unicodeKey)
|
||
{
|
||
int first = 0;
|
||
int end = sizeof(unicode_to_gb2312) / (2 * sizeof(uint16_t)) - 1;
|
||
int mid = 0;
|
||
|
||
while (first <= end) {
|
||
mid = (first + end) / 2;
|
||
|
||
if (unicode_to_gb2312[mid][0] == unicodeKey)
|
||
return unicode_to_gb2312[mid][1];
|
||
else if (unicode_to_gb2312[mid][0] > unicodeKey)
|
||
end = mid - 1;
|
||
else
|
||
first = mid + 1;
|
||
}
|
||
return 0;
|
||
}
|
||
|
||
|
||
int GetUtf8ByteNumForWord(uint8_t firstCh)
|
||
{
|
||
uint8_t temp = 0x80;
|
||
int num = 0;
|
||
|
||
while (temp & firstCh) {
|
||
num++;
|
||
temp = (temp >> 1);
|
||
}
|
||
return num;
|
||
}
|
||
|
||
char *Utf8ToGb2312(char *utf8)
|
||
{
|
||
char *temp = NULL;
|
||
int byteCount = 0;
|
||
int len, i = 0, j = 0;
|
||
|
||
uint16_t unicodeKey = 0;
|
||
uint16_t gbKey = 0;
|
||
|
||
if (!utf8)
|
||
return NULL;
|
||
|
||
len = strlen(utf8);
|
||
if (len <= 0)
|
||
return NULL;
|
||
|
||
temp = os_malloc(len * sizeof(char));
|
||
if (!temp)
|
||
return NULL;
|
||
|
||
//ѭ<><D1AD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||
while (i < len) {
|
||
int nbyte = GetUtf8ByteNumForWord((uint8_t)utf8[i]);
|
||
switch (nbyte) {
|
||
case 0:
|
||
temp[j] = utf8[i];
|
||
byteCount = 1;
|
||
j += 1;
|
||
break;
|
||
|
||
case 2:
|
||
temp[j] = utf8[i];
|
||
temp[j + 1] = utf8[i + 1];
|
||
byteCount = 2;
|
||
j += 2;
|
||
break;
|
||
|
||
case 3:
|
||
//<2F><><EFBFBD><EFBFBD><EFBFBD>Ϳ<EFBFBD>ʼ<EFBFBD><CABC><EFBFBD><EFBFBD>UTF8->Unicode
|
||
temp[j + 1] = ((utf8[i] & 0x0F) << 4) | ((utf8[i + 1] >> 2) & 0x0F);
|
||
temp[j] = ((utf8[i + 1] & 0x03) << 6) + (utf8[i + 2] & 0x3F);
|
||
|
||
//ȡ<><C8A1>Unicode<64><65>ֵ
|
||
memcpy(&unicodeKey, (temp + j), 2);
|
||
|
||
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>ȡ<EFBFBD>ö<EFBFBD>Ӧ<EFBFBD><D3A6>GB2312<31><32>ֵ
|
||
gbKey = SearchCodeTable(unicodeKey);
|
||
temp[j] = (gbKey & 0xff00) >> 8;
|
||
temp[j + 1] = (gbKey & 0x00ff);
|
||
|
||
byteCount = 3;
|
||
j += 2;
|
||
break;
|
||
|
||
case 4:
|
||
case 5:
|
||
case 6:
|
||
byteCount = nbyte;
|
||
//printf("4-6 utf8 no process, copy them\n");
|
||
break;
|
||
|
||
default:
|
||
//printf("err! the len is more than 6\n");
|
||
byteCount = 1;
|
||
break;
|
||
}
|
||
|
||
i += byteCount;
|
||
}
|
||
|
||
temp[j++] = '\0';
|
||
memcpy(utf8, temp, j);
|
||
|
||
//printf("%s", utf8);
|
||
os_free(temp);
|
||
|
||
return utf8;
|
||
}
|
||
|
||
|
||
#endif // CONFIG_USE_CONV_UTF8
|
||
|