199 lines
3.5 KiB
C
Raw Normal View History

2025-02-27 17:59:18 +08:00
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <common/bk_typedef.h>
#include <common/bk_include.h>
#include <os/mem.h>
#if CONFIG_USE_CONV_UTF8
#include "conv_utf8_gb2312_table.h"
#include "conv_utf8_pub.h"
static int conv_utf8_for_gb2312(unsigned char *pin, unsigned char *pout)
{
unsigned char hi_bits, low_bit;
const unsigned char *utf8;
unsigned short gb_code, dist_cnt;
hi_bits = *pin & 0xff;
low_bit = *(pin + 1) & 0xff;
//!“#¥%& * + , - . / A3A1~A3AF
if((hi_bits == 0xA3) &&
(low_bit >= 0xA1) && (low_bit <= 0xAF)){
utf8 = t_gb2312_utf8_ex[low_bit - 0xA1];
memcpy(pout, utf8, 3);
return 1;
}
// chinese char start from 0xB0A1 to 0xF7FE
if ((hi_bits < 0xB0) || (hi_bits > 0xF7))
return 0;
if ((low_bit < 0xA1) || (low_bit > 0xFE))
return 0;
gb_code = (hi_bits << 8) + low_bit;
// there are 0xa2 banks between two district
dist_cnt = hi_bits - 0xB0;
dist_cnt = dist_cnt * (0xA1 + 0x01);
// shift pos in table
if (gb_code > 0xd7f9)
gb_code -= 5;
gb_code = gb_code - 0xB0A1 - dist_cnt;
utf8 = t_gb2312_utf8[gb_code];
memcpy(pout, utf8, 3);
return 1;
}
unsigned char *conv_utf8(unsigned char *input)
{
int len;
unsigned char *ptr, *out_bak, *out;
len = strlen((char *)input) + 1;
len = (len / 2) * 3 + 1;
out_bak = (unsigned char *)os_malloc(len);
if (!out_bak)
return NULL;
ptr = input;
out = out_bak;
while (*ptr) {
if (conv_utf8_for_gb2312(ptr, out)) {
out += 3;
ptr += 2;
} else {
*out = *ptr;
out++;
ptr++;
}
}
*out = 0;
//for(i=0; out_bak[i] != 0; i++)
// os_printf("%02x", out_bak[i]);
//os_printf("\r\n");
return out_bak;
}
unsigned short SearchCodeTable(unsigned short unicodeKey)
{
int first = 0;
int end = sizeof(unicode_to_gb2312) / (2 * sizeof(uint16_t)) - 1;
int mid = 0;
while (first <= end) {
mid = (first + end) / 2;
if (unicode_to_gb2312[mid][0] == unicodeKey)
return unicode_to_gb2312[mid][1];
else if (unicode_to_gb2312[mid][0] > unicodeKey)
end = mid - 1;
else
first = mid + 1;
}
return 0;
}
int GetUtf8ByteNumForWord(uint8_t firstCh)
{
uint8_t temp = 0x80;
int num = 0;
while (temp & firstCh) {
num++;
temp = (temp >> 1);
}
return num;
}
char *Utf8ToGb2312(char *utf8)
{
char *temp = NULL;
int byteCount = 0;
int len, i = 0, j = 0;
uint16_t unicodeKey = 0;
uint16_t gbKey = 0;
if (!utf8)
return NULL;
len = strlen(utf8);
if (len <= 0)
return NULL;
temp = os_malloc(len * sizeof(char));
if (!temp)
return NULL;
//ѭ<><D1AD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
while (i < len) {
int nbyte = GetUtf8ByteNumForWord((uint8_t)utf8[i]);
switch (nbyte) {
case 0:
temp[j] = utf8[i];
byteCount = 1;
j += 1;
break;
case 2:
temp[j] = utf8[i];
temp[j + 1] = utf8[i + 1];
byteCount = 2;
j += 2;
break;
case 3:
//<2F><><EFBFBD><EFBFBD><EFBFBD>Ϳ<EFBFBD>ʼ<EFBFBD><CABC><EFBFBD><EFBFBD>UTF8->Unicode
temp[j + 1] = ((utf8[i] & 0x0F) << 4) | ((utf8[i + 1] >> 2) & 0x0F);
temp[j] = ((utf8[i + 1] & 0x03) << 6) + (utf8[i + 2] & 0x3F);
//ȡ<><C8A1>Unicode<64><65>ֵ
memcpy(&unicodeKey, (temp + j), 2);
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>ȡ<EFBFBD>ö<EFBFBD>Ӧ<EFBFBD><D3A6>GB2312<31><32>ֵ
gbKey = SearchCodeTable(unicodeKey);
temp[j] = (gbKey & 0xff00) >> 8;
temp[j + 1] = (gbKey & 0x00ff);
byteCount = 3;
j += 2;
break;
case 4:
case 5:
case 6:
byteCount = nbyte;
//printf("4-6 utf8 no process, copy them\n");
break;
default:
//printf("err! the len is more than 6\n");
byteCount = 1;
break;
}
i += byteCount;
}
temp[j++] = '\0';
memcpy(utf8, temp, j);
//printf("%s", utf8);
os_free(temp);
return utf8;
}
#endif // CONFIG_USE_CONV_UTF8