199 lines
3.5 KiB
C
199 lines
3.5 KiB
C
![]() |
#include <string.h>
|
|||
|
#include <stdlib.h>
|
|||
|
#include <stdio.h>
|
|||
|
#include <common/bk_typedef.h>
|
|||
|
#include <common/bk_include.h>
|
|||
|
#include <os/mem.h>
|
|||
|
|
|||
|
#if CONFIG_USE_CONV_UTF8
|
|||
|
#include "conv_utf8_gb2312_table.h"
|
|||
|
#include "conv_utf8_pub.h"
|
|||
|
static int conv_utf8_for_gb2312(unsigned char *pin, unsigned char *pout)
|
|||
|
{
|
|||
|
unsigned char hi_bits, low_bit;
|
|||
|
const unsigned char *utf8;
|
|||
|
unsigned short gb_code, dist_cnt;
|
|||
|
|
|||
|
hi_bits = *pin & 0xff;
|
|||
|
low_bit = *(pin + 1) & 0xff;
|
|||
|
//!“#¥%&‘() * + , - . / A3A1~A3AF
|
|||
|
if((hi_bits == 0xA3) &&
|
|||
|
(low_bit >= 0xA1) && (low_bit <= 0xAF)){
|
|||
|
utf8 = t_gb2312_utf8_ex[low_bit - 0xA1];
|
|||
|
memcpy(pout, utf8, 3);
|
|||
|
return 1;
|
|||
|
}
|
|||
|
// chinese char start from 0xB0A1 to 0xF7FE
|
|||
|
if ((hi_bits < 0xB0) || (hi_bits > 0xF7))
|
|||
|
return 0;
|
|||
|
if ((low_bit < 0xA1) || (low_bit > 0xFE))
|
|||
|
return 0;
|
|||
|
|
|||
|
gb_code = (hi_bits << 8) + low_bit;
|
|||
|
|
|||
|
// there are 0xa2 banks between two district
|
|||
|
dist_cnt = hi_bits - 0xB0;
|
|||
|
dist_cnt = dist_cnt * (0xA1 + 0x01);
|
|||
|
|
|||
|
// shift pos in table
|
|||
|
if (gb_code > 0xd7f9)
|
|||
|
gb_code -= 5;
|
|||
|
gb_code = gb_code - 0xB0A1 - dist_cnt;
|
|||
|
utf8 = t_gb2312_utf8[gb_code];
|
|||
|
|
|||
|
memcpy(pout, utf8, 3);
|
|||
|
|
|||
|
return 1;
|
|||
|
}
|
|||
|
|
|||
|
unsigned char *conv_utf8(unsigned char *input)
|
|||
|
{
|
|||
|
int len;
|
|||
|
unsigned char *ptr, *out_bak, *out;
|
|||
|
|
|||
|
len = strlen((char *)input) + 1;
|
|||
|
len = (len / 2) * 3 + 1;
|
|||
|
|
|||
|
out_bak = (unsigned char *)os_malloc(len);
|
|||
|
if (!out_bak)
|
|||
|
return NULL;
|
|||
|
|
|||
|
ptr = input;
|
|||
|
out = out_bak;
|
|||
|
|
|||
|
while (*ptr) {
|
|||
|
if (conv_utf8_for_gb2312(ptr, out)) {
|
|||
|
out += 3;
|
|||
|
ptr += 2;
|
|||
|
} else {
|
|||
|
*out = *ptr;
|
|||
|
out++;
|
|||
|
ptr++;
|
|||
|
}
|
|||
|
}
|
|||
|
*out = 0;
|
|||
|
|
|||
|
|
|||
|
//for(i=0; out_bak[i] != 0; i++)
|
|||
|
// os_printf("%02x", out_bak[i]);
|
|||
|
|
|||
|
//os_printf("\r\n");
|
|||
|
|
|||
|
return out_bak;
|
|||
|
}
|
|||
|
|
|||
|
unsigned short SearchCodeTable(unsigned short unicodeKey)
|
|||
|
{
|
|||
|
int first = 0;
|
|||
|
int end = sizeof(unicode_to_gb2312) / (2 * sizeof(uint16_t)) - 1;
|
|||
|
int mid = 0;
|
|||
|
|
|||
|
while (first <= end) {
|
|||
|
mid = (first + end) / 2;
|
|||
|
|
|||
|
if (unicode_to_gb2312[mid][0] == unicodeKey)
|
|||
|
return unicode_to_gb2312[mid][1];
|
|||
|
else if (unicode_to_gb2312[mid][0] > unicodeKey)
|
|||
|
end = mid - 1;
|
|||
|
else
|
|||
|
first = mid + 1;
|
|||
|
}
|
|||
|
return 0;
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
int GetUtf8ByteNumForWord(uint8_t firstCh)
|
|||
|
{
|
|||
|
uint8_t temp = 0x80;
|
|||
|
int num = 0;
|
|||
|
|
|||
|
while (temp & firstCh) {
|
|||
|
num++;
|
|||
|
temp = (temp >> 1);
|
|||
|
}
|
|||
|
return num;
|
|||
|
}
|
|||
|
|
|||
|
char *Utf8ToGb2312(char *utf8)
|
|||
|
{
|
|||
|
char *temp = NULL;
|
|||
|
int byteCount = 0;
|
|||
|
int len, i = 0, j = 0;
|
|||
|
|
|||
|
uint16_t unicodeKey = 0;
|
|||
|
uint16_t gbKey = 0;
|
|||
|
|
|||
|
if (!utf8)
|
|||
|
return NULL;
|
|||
|
|
|||
|
len = strlen(utf8);
|
|||
|
if (len <= 0)
|
|||
|
return NULL;
|
|||
|
|
|||
|
temp = os_malloc(len * sizeof(char));
|
|||
|
if (!temp)
|
|||
|
return NULL;
|
|||
|
|
|||
|
//ѭ<><D1AD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
while (i < len) {
|
|||
|
int nbyte = GetUtf8ByteNumForWord((uint8_t)utf8[i]);
|
|||
|
switch (nbyte) {
|
|||
|
case 0:
|
|||
|
temp[j] = utf8[i];
|
|||
|
byteCount = 1;
|
|||
|
j += 1;
|
|||
|
break;
|
|||
|
|
|||
|
case 2:
|
|||
|
temp[j] = utf8[i];
|
|||
|
temp[j + 1] = utf8[i + 1];
|
|||
|
byteCount = 2;
|
|||
|
j += 2;
|
|||
|
break;
|
|||
|
|
|||
|
case 3:
|
|||
|
//<2F><><EFBFBD><EFBFBD><EFBFBD>Ϳ<EFBFBD>ʼ<EFBFBD><CABC><EFBFBD><EFBFBD>UTF8->Unicode
|
|||
|
temp[j + 1] = ((utf8[i] & 0x0F) << 4) | ((utf8[i + 1] >> 2) & 0x0F);
|
|||
|
temp[j] = ((utf8[i + 1] & 0x03) << 6) + (utf8[i + 2] & 0x3F);
|
|||
|
|
|||
|
//ȡ<><C8A1>Unicode<64><65>ֵ
|
|||
|
memcpy(&unicodeKey, (temp + j), 2);
|
|||
|
|
|||
|
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>ȡ<EFBFBD>ö<EFBFBD>Ӧ<EFBFBD><D3A6>GB2312<31><32>ֵ
|
|||
|
gbKey = SearchCodeTable(unicodeKey);
|
|||
|
temp[j] = (gbKey & 0xff00) >> 8;
|
|||
|
temp[j + 1] = (gbKey & 0x00ff);
|
|||
|
|
|||
|
byteCount = 3;
|
|||
|
j += 2;
|
|||
|
break;
|
|||
|
|
|||
|
case 4:
|
|||
|
case 5:
|
|||
|
case 6:
|
|||
|
byteCount = nbyte;
|
|||
|
//printf("4-6 utf8 no process, copy them\n");
|
|||
|
break;
|
|||
|
|
|||
|
default:
|
|||
|
//printf("err! the len is more than 6\n");
|
|||
|
byteCount = 1;
|
|||
|
break;
|
|||
|
}
|
|||
|
|
|||
|
i += byteCount;
|
|||
|
}
|
|||
|
|
|||
|
temp[j++] = '\0';
|
|||
|
memcpy(utf8, temp, j);
|
|||
|
|
|||
|
//printf("%s", utf8);
|
|||
|
os_free(temp);
|
|||
|
|
|||
|
return utf8;
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
#endif // CONFIG_USE_CONV_UTF8
|
|||
|
|