lock_lfvx/bk_avdk-v2.0.1.32/bk_idk/components/utf8/conv_utf8.c

#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <common/bk_typedef.h>
#include <common/bk_include.h>
#include <os/mem.h>

#if CONFIG_USE_CONV_UTF8
#include "conv_utf8_gb2312_table.h"
#include "conv_utf8_pub.h"
static int conv_utf8_for_gb2312(unsigned char *pin, unsigned char *pout)
{
	unsigned char hi_bits, low_bit;
	const unsigned char *utf8;
	unsigned short gb_code, dist_cnt;

	hi_bits = *pin & 0xff;
	low_bit = *(pin + 1) & 0xff;
	//！“＃￥％&‘（） * + , - . /           A3A1~A3AF
	if((hi_bits == 0xA3) &&
		(low_bit >= 0xA1) && (low_bit <= 0xAF)){
		utf8 = t_gb2312_utf8_ex[low_bit - 0xA1];
		memcpy(pout, utf8, 3);
		return 1;
	}
	// chinese char start from 0xB0A1 to 0xF7FE
	if ((hi_bits < 0xB0) || (hi_bits > 0xF7))
		return 0;
	if ((low_bit < 0xA1) || (low_bit > 0xFE))
		return 0;

	gb_code = (hi_bits << 8) + low_bit;

	// there are 0xa2 banks between two district
	dist_cnt = hi_bits - 0xB0;
	dist_cnt = dist_cnt * (0xA1 + 0x01);

	// shift pos in table
	if (gb_code > 0xd7f9)
		gb_code -= 5;
	gb_code = gb_code - 0xB0A1 - dist_cnt;
	utf8 = t_gb2312_utf8[gb_code];

	memcpy(pout, utf8, 3);

	return 1;
}

unsigned char *conv_utf8(unsigned char *input)
{
	int len;
	unsigned char *ptr, *out_bak, *out;

	len = strlen((char *)input) + 1;
	len = (len / 2) * 3 + 1;

	out_bak = (unsigned char *)os_malloc(len);
	if (!out_bak)
		return NULL;

	ptr = input;
	out = out_bak;

	while (*ptr) {
		if (conv_utf8_for_gb2312(ptr, out)) {
			out += 3;
			ptr += 2;
		} else {
			*out = *ptr;
			out++;
			ptr++;
		}
	}
	*out = 0;


	//for(i=0; out_bak[i] != 0; i++)
	//    os_printf("%02x", out_bak[i]);

	//os_printf("\r\n");

	return out_bak;
}

unsigned short SearchCodeTable(unsigned short unicodeKey)
{
	int first = 0;
	int end = sizeof(unicode_to_gb2312) / (2 * sizeof(uint16_t)) - 1;
	int mid = 0;

	while (first <= end) {
		mid = (first + end) / 2;

		if (unicode_to_gb2312[mid][0] == unicodeKey)
			return unicode_to_gb2312[mid][1];
		else if (unicode_to_gb2312[mid][0] > unicodeKey)
			end = mid - 1;
		else
			first = mid + 1;
	}
	return 0;
}


int GetUtf8ByteNumForWord(uint8_t firstCh)
{
	uint8_t temp = 0x80;
	int num = 0;

	while (temp & firstCh) {
		num++;
		temp = (temp >> 1);
	}
	return num;
}

char *Utf8ToGb2312(char *utf8)
{
	char *temp = NULL;
	int byteCount = 0;
	int len, i = 0, j = 0;

	uint16_t unicodeKey = 0;
	uint16_t gbKey = 0;

	if (!utf8)
		return NULL;

	len = strlen(utf8);
	if (len <= 0)
		return NULL;

	temp = os_malloc(len * sizeof(char));
	if (!temp)
		return NULL;

	//ѭ<><D1AD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
	while (i < len) {
		int nbyte = GetUtf8ByteNumForWord((uint8_t)utf8[i]);
		switch (nbyte) {
		case 0:
			temp[j] = utf8[i];
			byteCount = 1;
			j += 1;
			break;

		case 2:
			temp[j] = utf8[i];
			temp[j + 1] = utf8[i + 1];
			byteCount = 2;
			j += 2;
			break;

		case 3:
			//<2F><><EFBFBD><EFBFBD><EFBFBD>Ϳ<EFBFBD>ʼ<EFBFBD><CABC><EFBFBD><EFBFBD>UTF8->Unicode
			temp[j + 1] = ((utf8[i] & 0x0F) << 4) | ((utf8[i + 1] >> 2) & 0x0F);
			temp[j] = ((utf8[i + 1] & 0x03) << 6) + (utf8[i + 2] & 0x3F);

			//ȡ<><C8A1>Unicode<64><65>ֵ
			memcpy(&unicodeKey, (temp + j), 2);

			//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>ȡ<EFBFBD>ö<EFBFBD>Ӧ<EFBFBD><D3A6>GB2312<31><32>ֵ
			gbKey = SearchCodeTable(unicodeKey);
			temp[j] = (gbKey & 0xff00) >> 8;
			temp[j + 1] = (gbKey & 0x00ff);

			byteCount = 3;
			j += 2;
			break;

		case 4:
		case 5:
		case 6:
			byteCount = nbyte;
			//printf("4-6 utf8 no process, copy them\n");
			break;

		default:
			//printf("err! the len is more than 6\n");
			byteCount = 1;
			break;
		}

		i += byteCount;
	}

	temp[j++] = '\0';
	memcpy(utf8, temp, j);

	//printf("%s", utf8);
	os_free(temp);

	return utf8;
}


#endif // CONFIG_USE_CONV_UTF8