2025-02-27 17:59:18 +08:00

199 lines
3.5 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <common/bk_typedef.h>
#include <common/bk_include.h>
#include <os/mem.h>
#if CONFIG_USE_CONV_UTF8
#include "conv_utf8_gb2312_table.h"
#include "conv_utf8_pub.h"
static int conv_utf8_for_gb2312(unsigned char *pin, unsigned char *pout)
{
unsigned char hi_bits, low_bit;
const unsigned char *utf8;
unsigned short gb_code, dist_cnt;
hi_bits = *pin & 0xff;
low_bit = *(pin + 1) & 0xff;
//!“#¥%& * + , - . / A3A1~A3AF
if((hi_bits == 0xA3) &&
(low_bit >= 0xA1) && (low_bit <= 0xAF)){
utf8 = t_gb2312_utf8_ex[low_bit - 0xA1];
memcpy(pout, utf8, 3);
return 1;
}
// chinese char start from 0xB0A1 to 0xF7FE
if ((hi_bits < 0xB0) || (hi_bits > 0xF7))
return 0;
if ((low_bit < 0xA1) || (low_bit > 0xFE))
return 0;
gb_code = (hi_bits << 8) + low_bit;
// there are 0xa2 banks between two district
dist_cnt = hi_bits - 0xB0;
dist_cnt = dist_cnt * (0xA1 + 0x01);
// shift pos in table
if (gb_code > 0xd7f9)
gb_code -= 5;
gb_code = gb_code - 0xB0A1 - dist_cnt;
utf8 = t_gb2312_utf8[gb_code];
memcpy(pout, utf8, 3);
return 1;
}
unsigned char *conv_utf8(unsigned char *input)
{
int len;
unsigned char *ptr, *out_bak, *out;
len = strlen((char *)input) + 1;
len = (len / 2) * 3 + 1;
out_bak = (unsigned char *)os_malloc(len);
if (!out_bak)
return NULL;
ptr = input;
out = out_bak;
while (*ptr) {
if (conv_utf8_for_gb2312(ptr, out)) {
out += 3;
ptr += 2;
} else {
*out = *ptr;
out++;
ptr++;
}
}
*out = 0;
//for(i=0; out_bak[i] != 0; i++)
// os_printf("%02x", out_bak[i]);
//os_printf("\r\n");
return out_bak;
}
unsigned short SearchCodeTable(unsigned short unicodeKey)
{
int first = 0;
int end = sizeof(unicode_to_gb2312) / (2 * sizeof(uint16_t)) - 1;
int mid = 0;
while (first <= end) {
mid = (first + end) / 2;
if (unicode_to_gb2312[mid][0] == unicodeKey)
return unicode_to_gb2312[mid][1];
else if (unicode_to_gb2312[mid][0] > unicodeKey)
end = mid - 1;
else
first = mid + 1;
}
return 0;
}
int GetUtf8ByteNumForWord(uint8_t firstCh)
{
uint8_t temp = 0x80;
int num = 0;
while (temp & firstCh) {
num++;
temp = (temp >> 1);
}
return num;
}
char *Utf8ToGb2312(char *utf8)
{
char *temp = NULL;
int byteCount = 0;
int len, i = 0, j = 0;
uint16_t unicodeKey = 0;
uint16_t gbKey = 0;
if (!utf8)
return NULL;
len = strlen(utf8);
if (len <= 0)
return NULL;
temp = os_malloc(len * sizeof(char));
if (!temp)
return NULL;
//ѭ<><D1AD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
while (i < len) {
int nbyte = GetUtf8ByteNumForWord((uint8_t)utf8[i]);
switch (nbyte) {
case 0:
temp[j] = utf8[i];
byteCount = 1;
j += 1;
break;
case 2:
temp[j] = utf8[i];
temp[j + 1] = utf8[i + 1];
byteCount = 2;
j += 2;
break;
case 3:
//<2F><><EFBFBD><EFBFBD><EFBFBD>Ϳ<EFBFBD>ʼ<EFBFBD><CABC><EFBFBD><EFBFBD>UTF8->Unicode
temp[j + 1] = ((utf8[i] & 0x0F) << 4) | ((utf8[i + 1] >> 2) & 0x0F);
temp[j] = ((utf8[i + 1] & 0x03) << 6) + (utf8[i + 2] & 0x3F);
//ȡ<><C8A1>Unicode<64><65>ֵ
memcpy(&unicodeKey, (temp + j), 2);
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>ȡ<EFBFBD>ö<EFBFBD>Ӧ<EFBFBD><D3A6>GB2312<31><32>ֵ
gbKey = SearchCodeTable(unicodeKey);
temp[j] = (gbKey & 0xff00) >> 8;
temp[j + 1] = (gbKey & 0x00ff);
byteCount = 3;
j += 2;
break;
case 4:
case 5:
case 6:
byteCount = nbyte;
//printf("4-6 utf8 no process, copy them\n");
break;
default:
//printf("err! the len is more than 6\n");
byteCount = 1;
break;
}
i += byteCount;
}
temp[j++] = '\0';
memcpy(utf8, temp, j);
//printf("%s", utf8);
os_free(temp);
return utf8;
}
#endif // CONFIG_USE_CONV_UTF8