helloyifa 31f179cb76 init
2025-05-15 14:19:56 +08:00

1310 lines
50 KiB
C
Executable File
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "facedetectcnn.h"
#include <math.h>
#include <float.h> //for FLT_EPSION
#include <stdlib.h>//for stable_sort, sort
#include <string.h>
#include <os/os.h>
#if CONFIG_ARCH_RISCV && CONFIG_CACHE_ENABLE
#include "cache.h"
#endif
void yuv422packed_to_rgb24(unsigned char *yuv, unsigned char *rgb, int source_width, int source_height, int target_width, int target_height)
{
#if 1
float scale_x;
float scale_y;
if (target_width >= source_width){
scale_x = 1;
}else{
scale_x = (float)source_width / target_width;
}
if (target_height >= source_height) {//ä¸<C3A4>能放大,å<C592>ªèƒ½ç¼©å°<C3A5>{
scale_y = 1;
}else{
scale_y = (float)source_height / target_height;
}
float y_t = 0, x_t = 0; //ç®æ ‡è¡Œåˆ—是å<C2AF>¦å<C2A6>样判æ­
int y_t_count = 0; //目标�
int x_t_count = 0; //目标�
int r, g, b;
int y0, y1, u, v;
int p, p_t; //指针
int p0, p_t0; //æ¯<C3A6>一行开å§çš„æŒ‡éˆ
scale_x = scale_x * 4; //ä¸?次处ç<E2809E>?2个ç¹ï¼?4个数æ<C2B0>?
//unsigned char *yuv_temp = (unsigned char *)os_malloc(4);
for (int h = 0; h < source_height; h++)
{
if (h >= y_t) //å<>æ ·æ<C2B7>¡ä»¶æ»¡è¶³
{
y_t += scale_y; //æ´æ°å<C2B0>æ ·æ<C2B7>¡ä»¶
p0 = h * source_width * 2; //YUV422是两个数æ<C2B0>®ä¸€ä¸ªç¹
p_t0 = y_t_count * target_width * 3; //RGBæ˜?3个数æ<C2B0>®ä¸€ä¸ªç¹
x_t = 0;
x_t_count = 0; //å¼?å§æ°çš„一行,é‡<C3A9>ç½®
for (int w = 0; w < source_width * 2; w += 4) //ä¸?次å<C2A1>¸ªæ•°å€¼ï¼ˆä¸¤ä¸ªç¹ï¼‰
{
if (w >= x_t) //满足å<C2B3>æ ·æ<C2B7>¡ä»¶
{
x_t += scale_x; //æ´æ°å<C2B0>æ ·æ<C2B7>¡ä»¶
p = p0 + w; //输入指针
p_t = p_t0 + x_t_count * 3; //输出指针
y0 = (int)yuv[p]; //YYUV
p++;
y1 = (int)yuv[p];
p++;
u = (int)yuv[p];
p++;
v = (int)yuv[p];
r = y0 + (int)(1.370705 * (v - 128));
g = y0 - (int)(0.698001 * (v - 128)) - (int)(0.337633 * (u - 128));
b = y0 + (int)(1.732446 * (u - 128));
rgb[p_t] = r > 255 ? 0xFF : (r < 0 ? 0x00 : (unsigned char)r); //R值大äº?255 æˆå°<C3A5>äº?0 越界处ç<E2809E>
p_t++;
rgb[p_t] = g > 255 ? 0xFF : (g < 0 ? 0x00 : (unsigned char)g); //G值大äº?255 æˆå°<C3A5>äº?0 越界处ç<E2809E>
p_t++;
rgb[p_t] = b > 255 ? 0xFF : (b < 0 ? 0x00 : (unsigned char)b); //B值大äº?255 æˆå°<C3A5>äº?0 越界处ç<E2809E>
x_t_count++; //处ç<E2809E>†å®Œä¸€ä¸ªç¹
if (x_t_count >= target_width)
break; //超过å<E280A1>œæ­¢
//处ç<E2809E>†ç¬¬äºŒä¸ªç¹
r = y1 + (int)(1.370705 * (v - 128));
g = y1 - (int)(0.698001 * (v - 128)) - (int)(0.337633 * (u - 128));
b = y1 + (int)(1.732446 * (u - 128));
p_t++;
rgb[p_t] = r > 255 ? 0xFF : (r < 0 ? 0x00 : (unsigned char)r); //R值大äº?255 æˆå°<C3A5>äº?0 越界处ç<E2809E>
p_t++;
rgb[p_t] = g > 255 ? 0xFF : (g < 0 ? 0x00 : (unsigned char)g); //G值大äº?255 æˆå°<C3A5>äº?0 越界处ç<E2809E>
p_t++;
rgb[p_t] = b > 255 ? 0xFF : (b < 0 ? 0x00 : (unsigned char)b); //B值大äº?255 æˆå°<C3A5>äº?0 越界处ç<E2809E>
x_t_count++;
if (x_t_count >= target_width)
break; //超过å<E280A1>œæ­¢
}
}
y_t_count++;
if (y_t_count >= target_height)
break; //超过å<E280A1>œæ­¢
}
}
#else
int i=0,j=0,k=0;
uint32_t yuv_temp = 0;
int sc_h = source_height / target_height;
int sc_w = source_width / target_width;
for(j = 0; j < source_height; j = j + sc_h){
for(i = 0; i < source_width * 2; i = i + sc_w * 2){
//os_memcpy_word((uint32_t *)yuv_temp, (uint32_t *)(yuv + i + j * source_width * 2), (uint32_t)4);
yuv_temp = *((uint32_t *)(yuv + i + j * source_width * 2));
int y = (int)((yuv_temp >> 8) & 0xFF);
int u = (int)((yuv_temp >> 16) & 0xFF);
int v = (int)((yuv_temp >> 24) & 0xFF);
int r = y + (int)(1.370705 * (v - 128));
int g = y - (int)(0.698001 * (v - 128)) - (int)(0.337633 * (u - 128));
int b = y + (int)(1.732446 * (u - 128));
rgb[k++] = b > 255 ? 0xFF : (b < 0 ? 0x00 : (unsigned char)b);
rgb[k++] = g > 255 ? 0xFF : (g < 0 ? 0x00 : (unsigned char)g);
rgb[k++] = r > 255 ? 0xFF : (r < 0 ? 0x00 : (unsigned char)r);
}
}
//os_printf("yuv i = %d, j = %d, k = %d\n", i, j, k);
#endif
}
void draw_box(unsigned char* a, int x1, int y1, int x2, int y2, float r, float g, float b, int col, int row)
{
int i;
for (i = x1; i <= x2; i++) {
setpixel(a, i, y1, r, g, b, col, row);
setpixel(a, i, y2, r, g, b, col, row);
}
for (i = y1; i <= y2; i++) {
setpixel(a, x1, i, r, g, b, col, row);
setpixel(a, x2, i, r, g, b, col, row);
}
}
void setpixel(unsigned char* pb, int x, int y, int r, int g, int b, int col, int row)
{
if (x >= col || y >= row) return;
r = r < 0 ? 0 : r < 255 ? r : 255;
g = g < 0 ? 0 : g < 255 ? g : 255;
b = b < 0 ? 0 : b < 255 ? b : 255;
//if png[RGBA], idx = [x * 4 + channels + y * col * 4]
pb[x * 3 + 0 + y * col * 3] = r;
pb[x * 3 + 1 + y * col * 3] = g;
pb[x * 3 + 2 + y * col * 3] = b;
}
void draw_box_yuv(unsigned char* a, int x1, int y1, int x2, int y2, int y, int u, int v, int col, int row)
{
int i;
for (i = x1; i <= x2; i++) {
setpixel_yuv(a, i, y1, y, u, v, col, row);
setpixel_yuv(a, i, y2, y, u, v, col, row);
}
for (i = y1; i <= y2; i++) {
setpixel_yuv_c(a, x1, i, y, u, v, col, row);
setpixel_yuv_c(a, x2, i, y, u, v, col, row);
}
}
void setpixel_yuv(unsigned char* pb, int x, int y, int y0, int u, int v, int col, int row)
{
y0 = y0 < 0 ? 0 : y0 < 255 ? y0 : 255;
u = u < 0 ? 0 : u < 255 ? u : 255;
v = v < 0 ? 0 : v < 255 ? v : 255;
//if yyuv[yyuv], idx = [x * 4 + channels + y * col * 4]
pb[x * 4 + 3 + y * col * 4] = y0;
pb[x * 4 + 2 + y * col * 4] = v;
pb[x * 4 + 1 + y * col * 4] = y0;
pb[x * 4 + 0 + y * col * 4] = u;
}
void setpixel_yuv_c(unsigned char* pb, int x, int y, int y0, int u, int v, int col, int row)
{
y0 = y0 < 0 ? 0 : y0 < 255 ? y0 : 255;
u = u < 0 ? 0 : u < 255 ? u : 255;
v = v < 0 ? 0 : v < 255 ? v : 255;
//if yyuv[yyuv], idx = [x * 4 + channels + y * col * 4]
pb[x * 4 + 3 + y * col * 4] = y0;
pb[x * 4 + 2 + y * col * 4] = v;
pb[x * 4 + 1 + y * col * 4] = u;
}
typedef struct NormalizedBBox_
{
int xmin;
int ymin;
int xmax;
int ymax;
int *lm;
} NormalizedBBox;
typedef struct Score_bb_
{
int idx;
float score;
float xmin;
float ymin;
float xmax;
float ymax;
//int lm[10];
} Score_bb;
int memsize = 0;
int mall = 0;
int fre = 0;
void* myAlloc(size_t size)
{
#if 0
char *ptr, *ptr0;
ptr0 = (char*)os_malloc((size_t)(size + _MALLOC_ALIGN * ((size >= 4096) + 1L) + sizeof(char*)));
if (!ptr0)
return 0;
// align the pointer
ptr = (char*)(((size_t)(ptr0 + sizeof(char*) + 1) + _MALLOC_ALIGN - 1) & ~(size_t)(_MALLOC_ALIGN - 1));
*(char**)(ptr - sizeof(char*)) = ptr0;
#else
//os_printf("= [MMM] %d\n", mall);
//char* ptr = (char*)malloc(size);
char* ptr = (char*)psram_malloc(size);
#endif
mall = mall + 1;
memsize = memsize + size;
// os_printf("====mem = %d, all mem size = %d\n", size, memsize);
return ptr;
}
void myFree(void* ptr)
{
// Pointer must be aligned by _MALLOC_ALIGN
if (ptr)
{
/*
if (((size_t)ptr & (_MALLOC_ALIGN - 1)) != 0)
return;
free(*((char**)ptr - 1));
*/
os_free(ptr);
//psram_free(ptr);
fre = fre + 1;
//os_printf("= [FFF] %d\n", fre);
}
}
void setZero(CDataBlob* blob)
{
if (blob->data)
os_memset_word((uint32_t *)(blob->data), 0, (uint32_t)(blob->channelStep * blob->rows * blob->cols));
}
void setNULL(CDataBlob* blob)
{
if (blob->data)
myFree(blob->data);
int size = blob->channelStep * blob->rows * blob->cols * blob->typesize + sizeof(CDataBlob);
blob->rows = blob->cols = blob->channels = blob->channelStep = 0;
blob->data = NULL;
memsize = memsize - size;
// os_printf("=%d free all mem size = %d\n", size, memsize);
if(blob)
myFree(blob);
blob = NULL;
}
CDataBlob* create(CDataBlob* blob, int r, int c, int ch, int typesize)
{
blob->rows = r;
blob->cols = c;
blob->channels = ch;
if (typesize == 4)
blob->typesize = 4;
else
blob->typesize = 1;
//alloc space for int8 array
/*
int remBytes = (typesize * blob->channels) % (_MALLOC_ALIGN / 8);
if (remBytes == 0)
blob->channelStep = blob->channels * typesize;
else
blob->channelStep = (blob->channels * typesize) + (_MALLOC_ALIGN / 8) - remBytes;
*/
blob->channelStep = blob->channels;
blob->data = (int *)myAlloc(blob->rows * blob->cols * blob->channelStep * typesize);
if (blob->data == NULL) {
os_printf("data err;");
return 0;
}
//int size = blob->rows * blob->cols * blob->channelStep;
os_memset_word((uint32_t *)(blob->data), 0 , (uint32_t)(blob->rows * blob->cols * blob->channelStep * typesize));
return blob;
}
int* ptr(CDataBlob* blob, int r, int c, int typesize)
{
if (r < 0 || r >= blob->rows || c < 0 || c >= blob->cols)
return NULL;
return (blob->data + (r * blob->cols + c) * blob->channelStep);
}
int getElement(CDataBlob* blob, int r, int c, int ch)
{
if (blob->data)
{
if (r >= 0 && r < blob->rows &&
c >= 0 && c < blob->cols &&
ch >= 0 && ch < blob->channels)
{
int* p = ptr(blob, r, c, blob->typesize);
//return (p + ch);
return p[ch];
}
}
return 0;
}
int isEmpty(CDataBlob* blob)
{
return (blob->rows <= 0 || blob->cols <= 0 || blob->channels == 0 || blob->data == NULL);
}
CDataBlob* setDataFrom3x3S2P1to1x1S1P0FromImage(unsigned char* inputData, int imgWidth, int imgHeight, int imgChannels, int imgWidthStep, int padDivisor) {
if (imgChannels != 3) {
os_printf("%s err\n", __func__);
exit(1);
}
if (padDivisor != 32) {
os_printf("%s err\n", __func__);
exit(1);
}
int rows = ((imgHeight - 1) / padDivisor + 1) * padDivisor / 2;
int cols = ((imgWidth - 1) / padDivisor + 1 ) * padDivisor / 2;
int channels = 32;
CDataBlob* outBlob;
outBlob = (CDataBlob* )myAlloc(sizeof(CDataBlob));
outBlob = create(outBlob, rows, cols, channels, sizeof(int));
// outBlob = (CDataBlob*)os_malloc(sizeof(CDataBlob));
// outBlob->rows = rows;
// outBlob->cols = cols;
// outBlob->channels = channels;
// outBlob->typesize = 4;
// outBlob->channelStep = outBlob->channels;
// outBlob->data = (int *)os_malloc(outBlob->rows * outBlob->cols * outBlob->channelStep * sizeof(int));
for (int r = 0; r < rows; r++) {
for (int c = 0; c < cols; c++) {
//int* pData = ptr(outBlob->data, r, c, sizeof(int));
for (int fy = -1; fy <= 1; fy++) {
int srcy = r * 2 + fy;
if (srcy < 0 || srcy >= imgHeight) //out of the range of the image
continue;
for (int fx = -1; fx <= 1; fx++) {
int srcx = c * 2 + fx;
if (srcx < 0 || srcx >= imgWidth) //out of the range of the image
continue;
//int *pImgData = inputData + imgWidthStep * srcy + imgChannels * srcx;
int output_channel_offset = ((fy + 1) * 3 + fx + 1) ; //3x3 filters, 3-channel image
outBlob->data[(r * outBlob->cols + c) * outBlob->channelStep + output_channel_offset * imgChannels] = (int)(inputData[imgWidthStep * srcy + imgChannels * srcx]);
outBlob->data[(r * outBlob->cols + c) * outBlob->channelStep + output_channel_offset * imgChannels + 1] = (int)(inputData[imgWidthStep * srcy + imgChannels * srcx + 1]);
outBlob->data[(r * outBlob->cols + c) * outBlob->channelStep + output_channel_offset * imgChannels + 2] = (int)(inputData[imgWidthStep * srcy + imgChannels * srcx + 2]);
}
}
}
}
return outBlob;
}
int dotProduct0(int* p1, int* p2, int num)
{
int sum = 0;
#if 1
for (int i = 0; i < num; i++)
{
int tmp = p1[i] * p2[i];
sum = sum + (tmp >> 8);
//check_int16(sum);
}
#else
for (int i = 0; i < num; i = i + 8)
{
int tmp0 = p1[i] * p2[i];
int tmp1 = p1[i + 1] * p2[i + 1];
int tmp2 = p1[i + 2] * p2[i + 2];
int tmp3 = p1[i + 3] * p2[i + 3];
int tmp4 = p1[i + 4] * p2[i + 4];
int tmp5 = p1[i + 5] * p2[i + 5];
int tmp6 = p1[i + 6] * p2[i + 6];
int tmp7 = p1[i + 7] * p2[i + 7];
sum = sum + ((tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6 + tmp7) >> 8);
}
#endif
//sum = (sum >> 8);//第一层放å¤?512,å<C592>¦åˆ™ç¬¬ä¸?å±å…¨0系数
return sum;
}
//p1 and p2 must be 512-bit aligned (16 int numbers)
int dotProduct(int * p1, int * p2, int num)
{
int sum = 0;
#if 1
for (int i = 0; i < num; i = i + 8)
{
int tmp0 = p1[i] * p2[i];
int tmp1 = p1[i + 1] * p2[i + 1];
int tmp2 = p1[i + 2] * p2[i + 2];
int tmp3 = p1[i + 3] * p2[i + 3];
int tmp4 = p1[i + 4] * p2[i + 4];
int tmp5 = p1[i + 5] * p2[i + 5];
int tmp6 = p1[i + 6] * p2[i + 6];
int tmp7 = p1[i + 7] * p2[i + 7];
sum = sum + ((tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6 + tmp7) >> 7);
}
#else
for (int i = 0; i < num; i++)
{
int tmp0 = p1[i] * p2[i];
sum = sum + (tmp0 >> 7);
//check_int16(sum);
}
#endif
return sum;
}
int vecMulAdd(int * p1, int * p2, int * p3, int num)
{
#if 1
//printf("num %d\n", num);
for (int i = 0; i < num; i++) {
int tmp = p1[i] * p2[i];
p3[i] += (tmp >> 7);
}
#else
if (num == 1) {
for (int i = 0; i < num; i++) {
int tmp = p1[i] * p2[i];
p3[i] += (tmp >> 7);
}
}
else if(num == 10){
int tmp = p1[0] * p2[0];
p3[0] += (tmp >> 7);
tmp = p1[1] * p2[1];
p3[1] += (tmp >> 7);
tmp = p1[2] * p2[2];
p3[2] += (tmp >> 7);
tmp = p1[3] * p2[3];
p3[3] += (tmp >> 7);
tmp = p1[4] * p2[4];
p3[4] += (tmp >> 7);
tmp = p1[5] * p2[5];
p3[5] += (tmp >> 7);
tmp = p1[6] * p2[6];
p3[6] += (tmp >> 7);
tmp = p1[7] * p2[7];
p3[7] += (tmp >> 7);
tmp = p1[8] * p2[8];
p3[8] += (tmp >> 7);
tmp = p1[9] * p2[9];
p3[9] += (tmp >> 7);
}
else if(num == 4){
int tmp = p1[0] * p2[0];
p3[0] += (tmp >> 7);
tmp = p1[1] * p2[1];
p3[1] += (tmp >> 7);
tmp = p1[2] * p2[2];
p3[2] += (tmp >> 7);
tmp = p1[3] * p2[3];
p3[3] += (tmp >> 7);
}
else {
for (int i = 0; i < num; i = i + 8) {
int tmp = p1[i] * p2[i];
p3[i] += (tmp >> 7);
tmp = p1[i + 1] * p2[i + 1];
p3[i + 1] += (tmp >> 7);
tmp = p1[i + 2] * p2[i + 2];
p3[i + 2] += (tmp >> 7);
tmp = p1[i + 3] * p2[i + 3];
p3[i + 3] += (tmp >> 7);
tmp = p1[i + 4] * p2[i + 4];
p3[i + 4] += (tmp >> 7);
tmp = p1[i + 5] * p2[i + 5];
p3[i + 5] += (tmp >> 7);
tmp = p1[i + 6] * p2[i + 6];
p3[i + 6] += (tmp >> 7);
tmp = p1[i + 7] * p2[i + 7];
p3[i + 7] += (tmp >> 7);
}
}
#endif
return 1;
}
int vecAdd(int* p1, int* p2, int num)
{
#if 1
for(int i = 0; i < num; i++)
{
p2[i] += ((short)p1[i] * 2);
//check_int16(p2[i]);
}
#else
if (num == 1) {
for (int i = 0; i < num; i++) {
p2[i] += (p1[i] * 2);
}
}
else if (num == 10) {
p2[0] += (p1[0] * 2);
p2[1] += (p1[1] * 2);
p2[2] += (p1[2] * 2);
p2[3] += (p1[3] * 2);
p2[4] += (p1[4] * 2);
p2[5] += (p1[5] * 2);
p2[6] += (p1[6] * 2);
p2[7] += (p1[7] * 2);
p2[8] += (p1[8] * 2);
p2[9] += (p1[9] * 2);
}
else if (num == 4) {
p2[0] += (p1[0] * 2);
p2[1] += (p1[1] * 2);
p2[2] += (p1[2] * 2);
p2[3] += (p1[3] * 2);
}
else {
for (int i = 0; i < num; i = i + 8) {
p2[i + 0] += (p1[i + 0] * 2);
p2[i + 1] += (p1[i + 1] * 2);
p2[i + 2] += (p1[i + 2] * 2);
p2[i + 3] += (p1[i + 3] * 2);
p2[i + 4] += (p1[i + 4] * 2);
p2[i + 5] += (p1[i + 5] * 2);
p2[i + 6] += (p1[i + 6] * 2);
p2[i + 7] += (p1[i + 7] * 2);
}
}
#endif
return 1;
}
int vecAdd2(int* p1, int* p2, int* p3, int num)
{
#if 1
for (int i = 0; i < num; i++)
{
p3[i] = (short)p1[i] + (short)p2[i];
//check_int16(p3[i]);
}
#else
for (int i = 0; i < num; i = i + 8) {
p3[i] = p1[i] + p2[i];
p3[i + 1] = p1[i + 1] + p2[i + 1];
p3[i + 2] = p1[i + 2] + p2[i + 2];
p3[i + 3] = p1[i + 3] + p2[i + 3];
p3[i + 4] = p1[i + 4] + p2[i + 4];
p3[i + 5] = p1[i + 5] + p2[i + 5];
p3[i + 6] = p1[i + 6] + p2[i + 6];
p3[i + 7] = p1[i + 7] + p2[i + 7];
}
#endif
return 1;
}
int convolution_1x1pointwise(CDataBlob* inputData, Filters* filters, CDataBlob* outputData)
{
//int
//int typesize = 4;
// int *temp = NULL;
// temp = inputData->data + 0x4000000;
//#if CONFIG_ARCH_RISCV && CONFIG_CACHE_ENABLE
// flush_dcache(temp, inputData->rows * inputData->cols * inputData->channelStep * 4);
//#endif
for (int row = 0; row < outputData->rows; row++)
{
for (int col = 0; col < outputData->cols; col++)
{
for (int ch = 0; ch < outputData->channels; ch++){
outputData->data[(row * outputData->cols + col) * outputData->channelStep + ch]
= dotProduct(inputData->data + (row * inputData->cols + col) * inputData->channelStep,
filters->weights->data + (0 * filters->weights->cols + ch) * filters->weights->channelStep,
inputData->channels);
outputData->data[(row * outputData->cols + col) * outputData->channelStep + ch] += (filters->biases->data[ch] * 2);
}
}
}
return 1;
}
int convolution_1x1pointwise0(CDataBlob* inputData, Filters* filters, CDataBlob* outputData)
{
//int
//int typesize = 4;
for (int row = 0; row < outputData->rows; row++)
{
for (int col = 0; col < outputData->cols; col++)
{
for (int ch = 0; ch < outputData->channels; ch++) {
outputData->data[(row * outputData->cols + col) * outputData->channelStep + ch]
= dotProduct0(inputData->data + (row * inputData->cols + col) * inputData->channelStep,
filters->weights->data + (0 * filters->weights->cols + ch) * filters->weights->channelStep,
inputData->channels);
int temp = outputData->data[(row * outputData->cols + col) * outputData->channelStep + ch] + (filters->biases->data[ch] * 2);
outputData->data[(row * outputData->cols + col) * outputData->channelStep + ch] = (temp);
}
}
}
return 1;
}
int convolution_3x3depthwise(CDataBlob* inputData, Filters* filters, CDataBlob* outputData)
{
//set all elements in outputData to zeros
setZero(outputData);
//int typesize = 4;
// int *temp = NULL;
// temp = inputData->data + 0x4000000;
//#if CONFIG_ARCH_RISCV && CONFIG_CACHE_ENABLE
// flush_dcache(temp, inputData->rows * inputData->cols * inputData->channelStep * 4);
//#endif
for (int row = 0; row < outputData->rows; row++)
{
int srcy_start = row - 1;
int srcy_end = srcy_start + 3;
srcy_start = MAX(0, srcy_start);
srcy_end = MIN(srcy_end, inputData->rows);
for (int col = 0; col < outputData->cols; col++)
{
//float *pOut = (float*)ptr(outputData, row, col, sizeof(float));
int srcx_start = col - 1;
int srcx_end = srcx_start + 3;
srcx_start = MAX(0, srcx_start);
srcx_end = MIN(srcx_end, inputData->cols);
for ( int r = srcy_start; r < srcy_end; r++){
for( int c = srcx_start; c < srcx_end; c++){
int filter_r = r - row + 1;
int filter_c = c - col + 1;
int filter_idx = filter_r * 3 + filter_c;
vecMulAdd(inputData->data + (r * inputData->cols + c) * inputData->channelStep,
filters->weights->data + (0 * filters->weights->cols + filter_idx) * filters->weights->channelStep,
outputData->data + (row * outputData->cols + col) * outputData->channelStep,
filters->num_filters);
}
}
vecAdd(filters->biases->data + (0 * filters->weights->cols + 0) * filters->weights->channelStep,
outputData->data + (row * outputData->cols + col) * outputData->channelStep,
filters->num_filters);
}
}
return 1;
}
int relu(CDataBlob* inputoutputData)
{
//float
//int typesize = sizeof(int);
if(isEmpty(inputoutputData))
{
os_printf("%s err\n", __func__);
return 0;
}
int len = inputoutputData->cols * inputoutputData->rows * inputoutputData->channelStep;
for (int i = 0; i < len; i++) {
//inputoutputData->data[i] *= (inputoutputData->data[i] > 0);
if (inputoutputData->data[i] > 0) {
inputoutputData->data[i] = inputoutputData->data[i];
}
else {
inputoutputData->data[i] = 0;
}
}
return 1;
}
CDataBlob* upsampleX2(CDataBlob* inputData)
{
//float
//int typesize = sizeof(int);
if (isEmpty(inputData)) {
os_printf("%s err\n", __func__);
exit(1);
}
//os_printf("= ### upsampleX2 mem\n");
CDataBlob* outData;
outData = (CDataBlob*)myAlloc(sizeof(CDataBlob));
create(outData, inputData->rows * 2, inputData->cols * 2, inputData->channels, inputData->typesize);
for (int r = 0; r < inputData->rows; r++) {
for (int c = 0; c < inputData->cols; c++) {
int outr = r * 2;
int outc = c * 2;
for (int ch = 0; ch < inputData->channels; ++ch) {//data : 1->4
outData->data[(outr * outData->cols + outc) * outData->channelStep + ch] = inputData->data[(r * inputData->cols + c) * inputData->channelStep + ch];
outData->data[(outr * outData->cols + outc + 1) * outData->channelStep + ch] = inputData->data[(r * inputData->cols + c) * inputData->channelStep + ch];
outData->data[((outr + 1) * outData->cols + outc) * outData->channelStep + ch] = inputData->data[(r * inputData->cols + c) * inputData->channelStep + ch];
outData->data[((outr + 1) * outData->cols + outc + 1) * outData->channelStep + ch] = inputData->data[(r * inputData->cols + c) * inputData->channelStep + ch];
}
}
}
return outData;
}
CDataBlob* elementAdd(CDataBlob* inputData1, CDataBlob* inputData2) {
if (inputData1->rows != inputData2->rows || inputData1->cols != inputData2->cols || inputData1->channels != inputData2->channels) {
os_printf("%s err\n", __func__);
exit(1);
}
//int typesize = sizeof(int);
CDataBlob* outData;
outData = (CDataBlob*)myAlloc(sizeof(CDataBlob));
create(outData, inputData1->rows, inputData1->cols, inputData1->channels, inputData1->typesize);
for (int r = 0; r < inputData1->rows; r++) {
for (int c = 0; c < inputData1->cols; c++) {
vecAdd2(inputData1->data + (r * inputData1->cols + c) * inputData1->channelStep,
inputData2->data + (r * inputData2->cols + c) * inputData2->channelStep,
outData->data + (r * outData->cols + c) * outData->channelStep,
inputData1->channels);
}
}
return outData;
}
CDataBlob* convolution(CDataBlob* inputData, Filters* filters, int do_relu)
{
//do_relu = 1;
if( isEmpty(inputData) || isEmpty(filters->weights) || isEmpty(filters->biases)){
os_printf("%s err\n", __func__);
exit(1);
}
if( inputData->channels != filters->channels){
os_printf("%s err\n", __func__);
exit(1);
}
//float
CDataBlob* outputData;
outputData = (CDataBlob *)myAlloc(sizeof(CDataBlob));
create(outputData, inputData->rows, inputData->cols, filters->num_filters, sizeof(int));
// outputData = (CDataBlob*)os_malloc(sizeof(CDataBlob));
// outputData->rows = inputData->rows;
// outputData->cols = inputData->cols;
// outputData->channels = filters->num_filters;
// outputData->typesize = 4;
// outputData->channelStep = outputData->channels;
// outputData->data = (int *)os_malloc(outputData->rows * outputData->cols * outputData->channelStep * sizeof(int));
setZero(outputData);
if (filters->is_pointwise && !filters->is_depthwise) {
convolution_1x1pointwise(inputData, filters, outputData);
}
else if (!filters->is_pointwise && filters->is_depthwise) {
convolution_3x3depthwise(inputData, filters, outputData);
}
else
{
os_printf("%s err %d\n", __func__, __LINE__);
exit(1);
}
if(do_relu)
relu(outputData);
return outputData;
}
CDataBlob* convolution1(CDataBlob* inputData, Filters* filters, int do_relu)
{
//do_relu = 1;
if( isEmpty(inputData) || isEmpty(filters->weights) || isEmpty(filters->biases)){
os_printf("%s err\n", __func__);
exit(1);
}
if( inputData->channels != filters->channels){
os_printf("%s err\n", __func__);
exit(1);
}
//float
CDataBlob* outputData;
outputData = (CDataBlob *)myAlloc(sizeof(CDataBlob));
create(outputData, inputData->rows, inputData->cols, filters->num_filters, sizeof(int));
//os_printf("%s %p %d %d %d\r\n", __func__, outputData->data, outputData->rows, outputData->cols, outputData->channelStep);
setZero(outputData);
if (filters->is_pointwise && !filters->is_depthwise) {
convolution_1x1pointwise(inputData, filters, outputData);
}
else if (!filters->is_pointwise && filters->is_depthwise) {
convolution_3x3depthwise(inputData, filters, outputData);
}
else
{
os_printf("%s err %d\n", __func__, __LINE__);
exit(1);
}
if(do_relu)
relu(outputData);
return outputData;
}
CDataBlob* convolution_first(CDataBlob* inputData, Filters* filters)
{
if (isEmpty(inputData) || isEmpty(filters->weights) || isEmpty(filters->biases)) {
printf("%s err\n", __func__);
exit(1);
}
if (inputData->channels != filters->channels) {
printf("%s err\n", __func__);
exit(1);
}
CDataBlob* outputData;
outputData = (CDataBlob*)os_malloc(sizeof(CDataBlob));
outputData->rows = inputData->rows;
outputData->cols = inputData->cols;
outputData->channels = filters->num_filters;
outputData->typesize = 4;
outputData->channelStep = outputData->channels;
outputData->data = (int *)os_malloc(outputData->rows * outputData->cols * outputData->channelStep * sizeof(int));
//os_printf("%s %p %d %d %d\r\n", __func__, outputData->data, outputData->rows, outputData->cols, outputData->channelStep);
os_memset_word((uint32_t *)(outputData->data), 0 , (uint32_t)(outputData->rows * outputData->cols * outputData->channelStep * 4));
setZero(outputData);
if (filters->is_pointwise && !filters->is_depthwise) {
convolution_1x1pointwise(inputData, filters, outputData);
}
else if (!filters->is_pointwise && filters->is_depthwise) {
convolution_3x3depthwise(inputData, filters, outputData);
}
else
{
os_printf("%s err %d\n", __func__, __LINE__);
exit(1);
}
return outputData;
}
CDataBlob* convolution_second(CDataBlob* inputData, Filters* filters)
{
if (isEmpty(inputData) || isEmpty(filters->weights) || isEmpty(filters->biases)) {
printf("%s err\n", __func__);
exit(1);
}
if (inputData->channels != filters->channels) {
printf("%s err\n", __func__);
exit(1);
}
CDataBlob* outputData;
outputData = (CDataBlob*)os_malloc(sizeof(CDataBlob));
outputData->rows = inputData->rows;
outputData->cols = inputData->cols;
outputData->channels = filters->num_filters;
outputData->typesize = 4;
outputData->channelStep = outputData->channels;
outputData->data = (int *)os_malloc(outputData->rows * outputData->cols * outputData->channelStep * 4);
setZero(outputData);
convolution_1x1pointwise(inputData, filters, outputData);
if (filters->is_pointwise && !filters->is_depthwise) {
convolution_1x1pointwise(inputData, filters, outputData);
}
else if (!filters->is_pointwise && filters->is_depthwise) {
convolution_3x3depthwise(inputData, filters, outputData);
}
else
{
os_printf("%s err %d\n", __func__, __LINE__);
exit(1);
}
relu(outputData);
return outputData;
}
CDataBlob* convolution0(CDataBlob* inputData, Filters* filters, int do_relu)
{
//do_relu = 1;
if (isEmpty(inputData) || isEmpty(filters->weights) || isEmpty(filters->biases)) {
printf("%s err\n", __func__);
exit(1);
}
if (inputData->channels != filters->channels) {
printf("%s err\n", __func__);
exit(1);
}
//int
//printf("%s \n", __func__);
CDataBlob* outputData;
// outputData = (CDataBlob*)myAlloc(sizeof(CDataBlob));
// create(outputData, inputData->rows, inputData->cols, filters->num_filters, sizeof(int));
outputData = (CDataBlob*)os_malloc(sizeof(CDataBlob));
outputData->rows = inputData->rows;
outputData->cols = inputData->cols;
outputData->channels = filters->num_filters;
outputData->typesize = 4;
outputData->channelStep = outputData->channels;
outputData->data = (int *)os_malloc(outputData->rows * outputData->cols * outputData->channelStep * sizeof(int));
setZero(outputData);
if (filters->is_pointwise && !filters->is_depthwise) {
convolution_1x1pointwise0(inputData, filters, outputData);
}
else if (!filters->is_pointwise && filters->is_depthwise) {
convolution_3x3depthwise(inputData, filters, outputData);
}
else
{
printf("%s err %d\n", __func__, __LINE__);
exit(1);
}
if (do_relu)
relu(outputData);
return outputData;
}
CDataBlob* convolutionDP(CDataBlob* inputData,
Filters* filtersP, Filters* filtersD, int do_relu)
{
//float
CDataBlob* tmp = convolution(inputData, filtersP, 0);
//setNULL(inputData);
CDataBlob* out = convolution(tmp, filtersD, do_relu);
setNULL(tmp);
return out;
}
CDataBlob* convolutionDP1(CDataBlob* inputData,
Filters* filtersP, Filters* filtersD, int do_relu)
{
//float
CDataBlob* tmp = convolution_first(inputData, filtersP);
//CDataBlob* tmp2 = convolution1(inputData, filtersP, 0);
//setNULL(inputData);
CDataBlob* out = convolution(tmp, filtersD, do_relu);
setNULL(tmp);
return out;
}
CDataBlob* convolution4layerUnit(CDataBlob* inputData,
Filters* filtersP1, Filters* filtersD1,
Filters* filtersP2, Filters* filtersD2, int do_relu)
{
//float
CDataBlob* tmp = convolutionDP(inputData, filtersP1, filtersD1, 1);
//setNULL(inputData);
CDataBlob* out = convolutionDP(tmp, filtersP2, filtersD2, do_relu);
setNULL(tmp);
return out;
}
CDataBlob* convolution4layerUnit1(CDataBlob* inputData,
Filters* filtersP1, Filters* filtersD1,
Filters* filtersP2, Filters* filtersD2, int do_relu)
{
//float
CDataBlob* tmp = convolutionDP1(inputData, filtersP1, filtersD1, 1);
//setNULL(inputData);
CDataBlob* out = convolutionDP1(tmp, filtersP2, filtersD2, do_relu);
setNULL(tmp);
return out;
}
//only 2X2 S2 is supported
CDataBlob* maxpooling2x2S2(CDataBlob* inputData)
{
//float
//int typesize = sizeof(int);
if (isEmpty(inputData))
{
os_printf("%s err %d\n", __func__, __LINE__);
exit(1);
}
int outputR = (int)(ceil((inputData->rows - 3.0f) / 2)) + 1;
int outputC = (int)(ceil((inputData->cols - 3.0f) / 2)) + 1;
int outputCH = inputData->channels;
if (outputR < 1 || outputC < 1)
{
os_printf("%s err %d\n", __func__, __LINE__);
exit(1);
}
CDataBlob* outputData;
outputData = (CDataBlob*)myAlloc(sizeof(CDataBlob));
create(outputData, outputR, outputC, outputCH, sizeof(float));
// outputData = (CDataBlob*)os_malloc(sizeof(CDataBlob));
// outputData->rows = outputR;
// outputData->cols = outputC;
// outputData->channels = outputCH;
// outputData->typesize = 4;
// outputData->channelStep = outputData->channels;
// outputData->data = (int *)os_malloc(outputData->rows * outputData->cols * outputData->channelStep * sizeof(int));
setZero(outputData);
for (int row = 0; row < outputData->rows; row++)
{
for (int col = 0; col < outputData->cols; col++)
{
int inputMatOffsetsInElement[4] = {0};
int elementCount = 0;
int rstart = row * 2;
int cstart = col * 2;
int rend = MIN(rstart + 2, inputData->rows);
int cend = MIN(cstart + 2, inputData->cols);
for (int fr = rstart; fr < rend; fr++)
{
for (int fc = cstart; fc < cend; fc++)
{
inputMatOffsetsInElement[elementCount++] = ((size_t)(fr)*inputData->cols + fc) * inputData->channelStep;
}
}
for (int ch = 0; ch < outputData->channels; ch++)
{
//float maxVal = pIn[ch + inputMatOffsetsInElement[0]];
float maxVal = inputData->data[ch + inputMatOffsetsInElement[0]];
for (int ec = 1; ec < elementCount; ec++)
{
//maxVal = MAX(maxVal, pIn[ch + inputMatOffsetsInElement[ec]]);
maxVal = MAX(maxVal, inputData->data[ch + inputMatOffsetsInElement[ec]]);
}
//pOut[ch] = maxVal;
outputData->data[(row * outputData->cols + col) * outputData->channelStep + ch] = maxVal;
}
}
}
return outputData;
}
CDataBlob* meshgrid(int feature_width, int feature_height, int stride, int offset) {
//int
//int typesize = sizeof(int);
//printf("%s \n", __func__);
CDataBlob* out;
out = (CDataBlob*)myAlloc(sizeof(CDataBlob));
create(out, feature_height, feature_width, 2, sizeof(int));
for(int r = 0; r < feature_height; ++r) {
int rx = (int)(r * stride) + offset;
for(int c = 0; c < feature_width; ++c) {
//int* p = (int *)ptr(out, r, c, sizeof(int));
//p[0] = (int)(c * stride) + offset;
//p[1] = rx;
//(blob->data + (r * blob->cols + c) * blob->channelStep);
out->data[(r * out->cols + c) * out->channelStep + 0] = (c * stride) + offset;
out->data[(r * out->cols + c) * out->channelStep + 1] = rx ;
}
}
return out;
}
const int16_t exp_table[640] =
{
147,147,148,149,149,150,150,151,151,152,153,153,154,154,155,156,156,157,157,158,159,159,160,161,161,162,163,163,164,164,165,166,166,167,168,168,169,170,170,171,172,172,173,174,174,175,176,176,177,178,179,179,180,181,181,182,183,183,184,185,186,186,187,188,189,189,190,191,192,192,193,194,195,195,196,197,198,198,199,200,201,202,202,203,204,205,206,206,207,208,209,210,210,211,212,213,214,215,215,216,217,218,219,220,221,221,222,223,224,225,226,227,228,228,229,230,231,232,233,234,235,236,237,238,239,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,274,275,276,277,278,279,280,281,282,283,284,286,287,288,289,290,291,292,294,295,296,297,298,299,300,302,303,304,305,306,308,309,310,311,312,314,315,316,317,319,320,321,322,324,325,326,327,329,330,331,333,334,335,337,338,339,341,342,343,345,346,347,349,350,351,353,354,356,357,358,360,361,363,364,365,367,368,370,371,373,374,376,377,379,380,381,383,384,386,387,389,391,392,394,395,397,398,400,401,403,405,406,408,409,411,413,414,416,417,419,421,422,424,426,427,429,431,432,434,436,437,439,441,443,444,446,448,450,451,453,455,457,458,460,462,464,466,468,469,471,473,475,477,479,480,482,484,486,488,490,492,494,496,498,500,502,504,506,508,510,512,514,516,518,520,522,524,526,528,530,532,534,536,538,540,542,545,547,549,551,553,555,557,560,562,564,566,568,571,573,575,577,580,582,584,587,589,591,593,596,598,600,603,605,608,610,612,615,617,620,622,624,627,629,632,634,637,639,642,644,647,649,652,654,657,659,662,665,667,670,673,675,678,680,683,686,688,691,694,697,699,702,705,708,710,713,716,719,722,724,727,730,733,736,739,742,744,747,750,753,756,759,762,765,768,771,774,777,780,783,786,789,793,796,799,802,805,808,811,814,818,821,824,827,831,834,837,840,844,847,850,854,857,860,864,867,870,874,877,881,884,888,891,895,898,902,905,909,912,916,919,923,927,930,934,938,941,945,949,952,956,960,964,967,971,975,979,983,986,990,994,998,1002,1006,1010,1014,1018,1022,1026,1030,1034,1038,1042,1046,1050,1054,1058,1062,1067,1071,1075,1079,1083,1088,1092,1096,1100,1105,1109,1113,1118,1122,1127,1131,1135,1140,1144,1149,1153,1158,1162,1167,1171,1176,1181,1185,1190,1195,1199,1204,1209,1213,1218,1223,1228,1233,1237,1242,1247,1252,1257,1262,1267,1272,1277,1282,1287,1292,1297,1302,1307,1312,1317,1322,1328,1333,1338,1343,1348,1354,1359,1364,1370,1375,1380,1386,1391,1397,1402,1408,1413,1419,1424,1430,1435,1441,1447,1452,1458,1464,1469,1475,1481,1487,1493,1498,1504,1510,1516,1522,1528,1534,1540,1546,1552,1558,1564,1570,1577,1583,1589,1595,1601,1608,1614,1620,1627,1633,1639,1646,1652,1659,1665,1672,1678,1685,1691,1698,1705,1711,1718,1725,1732,1738,1745,1752,1759,1766,1773,1780,1787
};
void bbox_decode(CDataBlob* bbox_pred, CDataBlob* priors, int stride) {
if(bbox_pred->cols != priors->cols || bbox_pred->rows != priors->rows) {
printf("%s err %d\n", __func__, __LINE__);
}
if(bbox_pred->channels != 4) {
printf("%s err %d\n", __func__, __LINE__);
}
int fstride = (int)stride;
for(int r = 0; r < bbox_pred->rows; ++r) {
for(int c = 0; c < bbox_pred->cols; ++c) {
int* pb = (int*)ptr(bbox_pred, r, c, sizeof(int));
int* pp = ptr(priors, r, c, sizeof(int));
int cx = (int)(((float)pb[0] / (1 << 8) * fstride + pp[0]) * (1 << 7)) ;
int cy = (int)(((float)pb[1] / (1 << 8) * fstride + pp[1]) * (1 << 7));
//int w = (int)(exp((float)pb[2] / (1 << 8)) * fstride * (1 << 7));
//int h = (int)(exp((float)pb[3] / (1 << 8)) * fstride * (1 << 7));
int w = exp_table[pb[2]] * fstride;
int h = exp_table[pb[3]] * fstride;
pb[0] = cx - w / 2;
pb[1] = cy - h / 2;
pb[2] = cx + w / 2;
pb[3] = cy + h / 2;
}
}
}
void kps_decode(CDataBlob* kps_pred, CDataBlob* priors, int stride) {
//int
if(kps_pred->cols != priors->cols || kps_pred->rows != priors->rows) {
printf("%s err %d\n", __func__, __LINE__);
exit(1);
}
if(kps_pred->channels & 1) {
printf("%s err %d\n", __func__, __LINE__);
exit(1);
}
int fstride = (int)stride;
int num_points = kps_pred->channels >> 1;
for(int r = 0; r < kps_pred->rows; ++r) {
for(int c = 0; c < kps_pred->cols; ++c) {
int* pb = (int*)ptr(kps_pred, r, c, sizeof(int));
int* pp = (int*)ptr(priors, r, c,sizeof(int));
for(int n = 0; n < num_points; ++n) {
pb[2 * n] = pb[2 * n] * fstride + pp[0] ;
pb[2 * n + 1] = pb[2 * n + 1] * fstride + pp[1] ;
}
}
}
}
CDataBlob* concat3(CDataBlob* inputData1, CDataBlob* inputData2, CDataBlob* inputData3)
{
//T
if ((isEmpty(inputData1)) || (isEmpty(inputData2)) || (isEmpty(inputData3)))
{
os_printf("%s err %d\n", __func__, __LINE__);
exit(1);
}
if ((inputData1->cols != inputData2->cols) ||
(inputData1->rows != inputData2->rows) ||
(inputData1->cols != inputData3->cols) ||
(inputData1->rows != inputData3->rows))
{
os_printf("%s err %d\n", __func__, __LINE__);
exit(1);
}
int outputR = inputData1->rows;
int outputC = inputData1->cols;
int outputCH = inputData1->channels + inputData2->channels + inputData3->channels;
if (outputR < 1 || outputC < 1 || outputCH < 1)
{
os_printf("%s err %d\n", __func__, __LINE__);
exit(1);
}
CDataBlob* outputData;
outputData = (CDataBlob*)myAlloc(sizeof(CDataBlob));
create(outputData, outputR, outputC, outputCH, sizeof(int));
for (int row = 0; row < outputData->rows; row++)
{
for (int col = 0; col < outputData->cols; col++)
{
int* pOut = ptr(outputData, row, col, sizeof(int));
int* pIn1 = ptr(inputData1, row, col, sizeof(int));
int* pIn2 = ptr(inputData2, row, col, sizeof(int));
int* pIn3 = ptr(inputData3, row, col, sizeof(int));
os_memcpy_word((uint32_t *)pOut, (uint32_t *)pIn1, (uint32_t)(sizeof(int) * inputData1->channels));
os_memcpy_word((uint32_t *)(pOut + inputData1->channels), (uint32_t *)pIn2, (uint32_t)(sizeof(int) * inputData2->channels));
os_memcpy_word((uint32_t *)(pOut + inputData1->channels + inputData2->channels), (uint32_t *)pIn3, (uint32_t)(sizeof(int) * inputData3->channels));
}
}
return outputData;
}
CDataBlob* blob2vector(CDataBlob* inputData)
{
//T
if (isEmpty(inputData))
{
os_printf("%s err %d\n", __func__, __LINE__);
exit(1);
}
CDataBlob* outputData;
outputData = (CDataBlob*)myAlloc(sizeof(CDataBlob));
create(outputData, 1, 1, inputData->cols * inputData->rows * inputData->channels, sizeof(int));
int bytesOfAChannel = inputData->channels * sizeof(int);
int* pOut = ptr(outputData, 0,0, sizeof(int));
for (int row = 0; row < inputData->rows; row++)
{
for (int col = 0; col < inputData->cols; col++)
{
int* pIn = ptr(inputData, row, col, sizeof(int));
os_memcpy_word((uint32_t *)pOut, (uint32_t *)pIn, (uint32_t)bytesOfAChannel);
pOut += inputData->channels;
}
}
return outputData;
}
const int16_t sigmoid_table[256] =
{
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,6,6,6,7,7,8,8,9,9,10,10,11,12,13,13,14,15,16,
17,18,19,20,21,22,23,24,26,27,28,30,31,33,34,36,37,39,41,43,44,46,48,50,52,54,56,58,60,62,64,66,68,70,72,
74,76,77,79,81,83,85,87,88,90,91,93,95,96,97,99,100,101,103,104,105,106,107,108,109,110,111,112,113,113,
114,115,115,116,117,117,118,118,119,119,120,120,120,121,121,121,122,122,122,122,123,123,123,123,124,124,
124,124,124,124,124,125,125,125,125,125,125,125,125,125,125,125,126,126,126,126,126,126,126,126,126,126,
126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,
126,126,126,126,126,126,126,126,126,126,126,126,126,126,126
};
void sigmoid(CDataBlob* inputData) {
//int
for(int r = 0; r < inputData->rows; ++r) {
for(int c = 0; c < inputData->cols; ++c) {
int* pIn = (int*)ptr(inputData, r, c, sizeof(int));
for(int ch = 0; ch < inputData->channels; ++ch) {
#if 0
float v = (float)pIn[ch] / (1 << 8);
v = MIN(v, 88.3762626647949f);
v = MAX(v, -88.3762626647949f);
pIn[ch] = (int)((float)(1.f / (1.0f + exp(-v)) * (1 << 7)) );
#endif
int v = MAX(MIN(pIn[ch] >> 4, 127), -128);
pIn[ch] = sigmoid_table[v + 128];
}
}
}
}
FaceRect* detection_output(CDataBlob* cls,
CDataBlob* reg,
CDataBlob* kps,
CDataBlob* obj,
float overlap_threshold,
float confidence_threshold,
int top_k,
int keep_top_k)
{
//float
//int typesize = sizeof(int);
if (isEmpty(reg) || isEmpty(cls) || isEmpty(kps) || isEmpty(obj))//|| iou.isEmpty())
{
os_printf("%s err %d\n", __func__, __LINE__);
exit(1);
}
if(reg->cols != 1 || reg->rows!= 1 || cls->cols != 1 || cls->rows!= 1 || kps->cols != 1 || kps->rows!= 1 || obj->cols != 1 || obj->rows!= 1) {
os_printf("%s err %d\n", __func__, __LINE__);
exit(1);
}
if((int)(kps->channels / obj->channels) != 10) {
os_printf("%s err %d\n", __func__, __LINE__);
exit(1);
}
Score_bb *score_bbox_vec;
score_bbox_vec = (Score_bb*)myAlloc(sizeof(Score_bb) * 200);
int count = 0;
for(int i = 0; i < cls->channels; ++i)
{
//float conf = sqrt(cls->data[(0 * cls->cols + 0) * cls->channelStep + i] * obj->data[(0 * obj->cols + 0) * obj->channelStep + i]);
float conf = sqrt( (float)(cls->data[(0 * cls->cols + 0) * cls->channelStep + i]) / (1 << 7) * (float)(obj->data[(0 * obj->cols + 0) * obj->channelStep + i]) / (1 << 7) );
if(conf >= confidence_threshold)
{
score_bbox_vec[count].xmin = (float)reg->data[(0 * reg->cols + 0) * reg->channelStep + 4 * i + 0] / (1 << 7);
score_bbox_vec[count].ymin = (float)reg->data[(0 * reg->cols + 0) * reg->channelStep + 4 * i + 1] / (1 << 7);
score_bbox_vec[count].xmax = (float)reg->data[(0 * reg->cols + 0) * reg->channelStep + 4 * i + 2] / (1 << 7);
score_bbox_vec[count].ymax = (float)reg->data[(0 * reg->cols + 0) * reg->channelStep + 4 * i + 3] / (1 << 7);
score_bbox_vec[count].score = conf;
score_bbox_vec[count].idx = count;
count++;
}
}
//Sort the score pair according to the scores in descending order
//stable_sort(score_bbox_vec.begin(), score_bbox_vec.end(), SortScoreBBoxPairDescend);
Score_bb* temp = (Score_bb*)myAlloc(sizeof(Score_bb));
for (int ii = 0; ii < count; ii++) {
for (int jj = 0; jj < count; jj++) {
if (score_bbox_vec[ii].score > score_bbox_vec[jj].score) {
os_memcpy_word((uint32_t *)(temp), (uint32_t *)(score_bbox_vec + ii), (uint32_t)sizeof(Score_bb));
os_memcpy_word((uint32_t *)(score_bbox_vec + ii), (uint32_t *)(score_bbox_vec +jj), (uint32_t)sizeof(Score_bb));
os_memcpy_word((uint32_t *)(score_bbox_vec + jj), (uint32_t *)(temp), (uint32_t)sizeof(Score_bb));
}
}
}
myFree(temp);
memsize = memsize - sizeof(Score_bb);
int i, j, c;
for (i = 0; i < count && i != -1; ) {
for (c = i, j = i + 1, i = -1; j < count; j++) {
if (score_bbox_vec[j].score == 0) continue;
{
float xc1, yc1, xc2, yc2, sc, s1, s2, ss, iou;
xc1 = score_bbox_vec[c].xmin > score_bbox_vec[j].xmin ? score_bbox_vec[c].xmin : score_bbox_vec[j].xmin;
yc1 = score_bbox_vec[c].ymin > score_bbox_vec[j].ymin ? score_bbox_vec[c].ymin : score_bbox_vec[j].ymin;
xc2 = score_bbox_vec[c].xmax < score_bbox_vec[j].xmax ? score_bbox_vec[c].xmax : score_bbox_vec[j].xmax;
yc2 = score_bbox_vec[c].ymax < score_bbox_vec[j].ymax ? score_bbox_vec[c].ymax : score_bbox_vec[j].ymax;
sc = (xc1 < xc2&& yc1 < yc2) ? (xc2 - xc1) * (yc2 - yc1) : 0;
s1 = (score_bbox_vec[c].xmax - score_bbox_vec[c].xmin) * (score_bbox_vec[c].ymax - score_bbox_vec[c].ymin);
s2 = (score_bbox_vec[j].xmax - score_bbox_vec[j].xmin) * (score_bbox_vec[j].ymax - score_bbox_vec[j].ymin);
ss = s1 + s2 - sc;
if (1)
iou = sc / (s1 < s2 ? s1 : s2);
else
iou = sc / ss;
if (iou > overlap_threshold)
score_bbox_vec[j].score = 0;
else if (i == -1)
i = j;
}
}
}
for (i = 0, j = 0; i < count; i++) {
if (score_bbox_vec[i].score) {
score_bbox_vec[j].score = score_bbox_vec[i].score;
score_bbox_vec[j].xmin = score_bbox_vec[i].xmin;
score_bbox_vec[j].ymin = score_bbox_vec[i].ymin;
score_bbox_vec[j].xmax = score_bbox_vec[i].xmax;
score_bbox_vec[j++].ymax = score_bbox_vec[i].ymax;
}
}
count = j;
FaceRect* facesInfo;
facesInfo = (FaceRect*)myAlloc(sizeof(FaceRect) * count);
for (i = 0; i < count; i++) {
facesInfo[i].score = score_bbox_vec[i].score;
facesInfo[i].x = (int)score_bbox_vec[i].xmin;
facesInfo[i].y = (int)score_bbox_vec[i].ymin;
facesInfo[i].w = (int)(score_bbox_vec[i].xmax - score_bbox_vec[i].xmin);
facesInfo[i].h = (int)(score_bbox_vec[i].ymax - score_bbox_vec[i].ymin);
}
facesInfo[0].numface = count;
myFree(score_bbox_vec);
score_bbox_vec = NULL;
memsize = memsize - sizeof(Score_bb) * 200;
return facesInfo;
}