#include "facedetection_export.h" #include #include #include //#define _ENABLE_AVX512 //Please enable it if X64 CPU //#define _ENABLE_AVX2 //Please enable it if X64 CPU //#define _ENABLE_NEON //Please enable it if ARM CPU int *facedetect_cnn(unsigned char * result_buffer, //buffer memory for storing face detection results, !!its size must be 0x20000 Bytes!! unsigned char *rgb_image_data, int width, int height, int step); //input image, it must be BGR (three channels) insteed of RGB image! /* DO NOT EDIT the following code if you don't really understand it. */ #if defined(_ENABLE_AVX512) || defined(_ENABLE_AVX2) #include #endif #if defined(_ENABLE_NEON) #include "arm_neon.h" //NEON does not support UINT8*INT8 dot product //to conver the input data to range [0, 127], //and then use INT8*INT8 dot product #define _MAX_UINT8_VALUE 127 #else #define _MAX_UINT8_VALUE 255 #endif #if defined(_ENABLE_AVX512) #define _MALLOC_ALIGN 512 #elif defined(_ENABLE_AVX2) #define _MALLOC_ALIGN 256 #else #define _MALLOC_ALIGN 128 #endif #if defined(_ENABLE_AVX512)&& defined(_ENABLE_NEON) #error Cannot enable the two of AVX512 and NEON at the same time. #endif #if defined(_ENABLE_AVX2)&& defined(_ENABLE_NEON) #error Cannot enable the two of AVX and NEON at the same time. #endif #if defined(_ENABLE_AVX512)&& defined(_ENABLE_AVX2) #error Cannot enable the two of AVX512 and AVX2 at the same time. #endif #include #include #include #include //#include #include void* myAlloc(size_t size); void myFree(void* ptr); //#define myFree(ptr) (myFree_(*(ptr)), *(ptr)=0); #ifndef MIN # define MIN(a,b) ((a) > (b) ? (b) : (a)) #endif #ifndef MAX # define MAX(a,b) ((a) < (b) ? (b) : (a)) #endif typedef struct FaceRect_ { int numface; float score; int x; int y; int w; int h; //int lm[10]; }FaceRect; typedef struct CDataBlob_ { int rows; int cols; int channels; //in element int channelStep; //in byte int typesize; int *data; }CDataBlob; typedef struct Filters_ { int channels; int num_filters; int is_depthwise; int is_pointwise; int with_relu; CDataBlob* weights; CDataBlob* biases; }Filters; typedef struct ConvInfoStruct_ { int channels; int num_filters; int is_depthwise; int is_pointwise; int with_relu; int* pWeights; int* pBiases; }ConvInfoStruct; void setZero(CDataBlob* blob); void setNULL(CDataBlob* blob); CDataBlob* create(CDataBlob* blob, int r, int c, int ch, int typesize); int* ptr(CDataBlob* blob, int r, int c, int typesize); int getElement(CDataBlob* blob, int r, int c, int ch); int isEmpty(CDataBlob* blob); Filters* Operator_conv(Filters* filter, ConvInfoStruct* convinfo); FaceRect* objectdetect_cnn(unsigned char* rgbImageData, int with, int height, int step); CDataBlob* setDataFrom3x3S2P1to1x1S1P0FromImage(unsigned char* inputData, int imgWidth, int imgHeight, int imgChannels, int imgWidthStep, int padDivisor); CDataBlob* convolution(CDataBlob* inputData, Filters* filters, int do_relu); CDataBlob* convolution0(CDataBlob* inputData, Filters* filters, int do_relu); CDataBlob* convolution_first(CDataBlob* inputData, Filters* filters); CDataBlob* convolution_second(CDataBlob* inputData, Filters* filters); CDataBlob* convolutionDP(CDataBlob* inputData, Filters* filtersP, Filters* filtersD, int do_relu); CDataBlob* convolutionDP1(CDataBlob* inputData, Filters* filtersP, Filters* filtersD, int do_relu); CDataBlob* convolution4layerUnit(CDataBlob* inputData, Filters* filtersP1, Filters* filtersD1, Filters* filtersP2, Filters* filtersD2, int do_relu); CDataBlob* convolution4layerUnit1(CDataBlob* inputData, Filters* filtersP1, Filters* filtersD1, Filters* filtersP2, Filters* filtersD2, int do_relu); CDataBlob* maxpooling2x2S2(CDataBlob* inputData); CDataBlob* elementAdd(CDataBlob* inputData1, CDataBlob* inputData2); CDataBlob* upsampleX2(CDataBlob* inputData); CDataBlob* meshgrid(int feature_width, int feature_height, int stride, int offset); // TODO implement in SIMD void bbox_decode(CDataBlob* bbox_pred, CDataBlob* priors, int stride); void kps_decode(CDataBlob* bbox_pred, CDataBlob* priors, int stride); CDataBlob* blob2vector(CDataBlob* inputData); CDataBlob* concat3(CDataBlob* inputData1, CDataBlob* inputData2, CDataBlob* inputData3); // TODO implement in SIMD void sigmoid(CDataBlob* inputData); FaceRect* detection_output(CDataBlob* cls, CDataBlob* reg, CDataBlob* kps, CDataBlob* obj, float overlap_threshold, float confidence_threshold, int top_k, int keep_top_k); void draw_box(unsigned char* a, int x1, int y1, int x2, int y2, float r, float g, float b, int col, int row); void draw_box_yuv(unsigned char* a, int x1, int y1, int x2, int y2, int y, int u, int v, int col, int row); void setpixel(unsigned char* pb, int x, int y, int r, int g, int b, int col, int row); void setpixel_yuv(unsigned char* pb, int x, int y, int y0, int u, int v, int col, int row); void setpixel_yuv_c(unsigned char* pb, int x, int y, int y0, int u, int v, int col, int row); void yuv422packed_to_rgb24(unsigned char *yuv, unsigned char *rgb, int source_width, int source_height, int target_width, int target_height);