Loading lib_com/ivas_cnst.h +4 −0 Original line number Diff line number Diff line Loading @@ -1142,8 +1142,12 @@ enum #define MASA_TRANSP_BITS 1 #define NO_BITS_MASA_ISM_NO_OBJ 2 #define MASA2TOTAL_THR 0.98f #define BITS_MASA2TOTTAL_DCT0 6 #define STEP_M2T 0.1f #ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL #define STEP_M2T_FX 214748365 // Q31 #endif #define MASA_HEADER_BITS 2 #define MASA_SUBFRAME_BITS 1 #define MASA_LOWBITRATE_MODE_BITS 1 Loading lib_com/ivas_prot.h +26 −0 Original line number Diff line number Diff line Loading @@ -756,6 +756,32 @@ int16_t get_igf_startline( float rand_triangular_signed( int16_t *seed ); #ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL Word16 matrix_product_fx( const Word32 *X_fx, /* i : left hand matrix Qx*/ const Word16 rowsX, /* i : number of rows of the left hand matrix Q0*/ const Word16 colsX, /* i : number of columns of the left hand matrix Q0*/ const Word16 transpX, /* i : flag indicating the transposition of the left hand matrix prior to the multiplication Q0*/ const Word32 *Y_fx, /* i : right hand matrix Qy*/ const Word16 rowsY, /* i : number of rows of the right hand matrix Q0*/ const Word16 colsY, /* i : number of columns of the right hand matrix Q0*/ const Word16 transpY, /* i : flag indicating the transposition of the right hand matrix prior to the multiplication Q0*/ Word32 *Z_fx /* o : resulting matrix after the matrix multiplication Qx + Qy - 31*/ ); Word16 matrix_product_q30_fx( const Word32 *X_fx, /* i : left hand matrix Q31*/ const Word16 rowsX, /* i : number of rows of the left hand matrix Q0*/ const Word16 colsX, /* i : number of columns of the left hand matrix Q0*/ const Word16 transpX, /* i : flag indicating the transposition of the left hand matrix prior to the multiplication Q0*/ const Word32 *Y_fx, /* i : right hand matrix Q25*/ const Word16 rowsY, /* i : number of rows of the right hand matrix Q0*/ const Word16 colsY, /* i : number of columns of the right hand matrix Q0*/ const Word16 transpY, /* i : flag indicating the transposition of the right hand matrix prior to the multiplication Q0*/ Word32 *Z_fx /* o : resulting matrix after the matrix multiplication Q30*/ ); #endif void dtx_read_padding_bits( DEC_CORE_HANDLE st, Loading lib_com/ivas_rom_com.c +42 −0 Original line number Diff line number Diff line Loading @@ -2778,6 +2778,48 @@ const int32_t sep_object_brate[][MAX_NUM_OBJECTS] = }; /* column wise DCT matrices for 4 5, and 8 dim */ #ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL const Word32 dct4_fx[4 * 4] = { // Q31 1073741824, 1402951040, 1073741824, 581109056, 1073741824, 581109056, -1073741824, -1402951040, 1073741824, -581109056, -1073741824, 1402951040, 1073741824, -1402951040, 1073741824, -581109056 }; const Word32 dct5_fx[5 * 5] = { // Q31 960354688, 1291711360, 1098867328, 798219648, 419618304, 960354688, 798219648, -419618304, -1291711360, -1098867328, 960354688, 0, -1358283392, 0, 1358283392, 960354688, -798219648, -419618304, 1291711360, -1098867328, 960354688, -1291711360, 1098867328, -798219648, 419618304 }; const Word32 dct8_fx[8 * 8] = { // Q31 759350208, 1053125952, 991922688, 892708928, 759350208, 596570944, 410813632, 209379648, 759350208, 892708928, 410813632, -209379648, -759350208, -1053125952, -991922688, -596570944, 759350208, 596570944, -410813632, -1053125952, -759350208, 209379648, 991922688, 892708928, 759350208, 209379648, -991922688, -596570944, 759350208, 892708928, -410813632, -1053125952, 759350208, -209379648, -991922688, 596570944, 759350208, -892708928, -410813632, 1053125952, 759350208, -596570944, -410813632, 1053125952, -759350208, -209379648, 991922688, -892708928, 759350208, -892708928, 410813632, 209379648, -759350208, 1053125952, -991922688, 596570944, 759350208, -1053125952, 991922688, -892708928, 759350208, -596570944, 410813632, -209379648 }; const Word32 dct12_fx[12 * 12] = { // Q31 619978560, 869301376, 846752832, 810030848, 759350208, 695569984, 619978560, 533649696, 438301408, 335436960, 226989024, 114460880, 619978560, 810030848, 619978560, 335436960, 0, -335436960, -619978560, -810030848, -876602816, -810030848, -619978560, -335436960, 619978560, 695569984, 226989024, -335436960, -759350208, -869301376, -619978560, -114460880, 438301408, 810030848, 846752832, 533649696, 619978560, 533649696, -226989024, -810030848, -759350208, -114460880, 619978560, 869301376, 438301408, -335436960, -846752832, -695569984, 619978560, 335436960, -619978560, -810030848, 0, 810030848, 619978560, -335436960, -876602816, -335436960, 619978560, 810030848, 619978560, 114460880, -846752832, -335436960, 759350208, 533649696, -619978560, -695569984, 438301408, 810030848, -226989024, -869301376, 619978560, -114460880, -846752832, 335436960, 759350208, -533649696, -619978560, 695569984, 438301408, -810030848, -226989024, 869301376, 619978560, -335436960, -619978560, 810030848, 0, -810030848, 619978560, 335436960, -876602816, 335436960, 619978560, -810030848, 619978560, -533649696, -226989024, 810030848, -759350208, 114460880, 619978560, -869301376, 438301408, 335436960, -846752832, 695569984, 619978560, -695569984, 226989024, 335436960, -759350208, 869301376, -619978560, 114460880, 438301408, -810030848, 846752832, -533649696, 619978560, -810030848, 619978560, -335436960, 0, 335436960, -619978560, 810030848, -876602816, 810030848, -619978560, 335436960, 619978560, -869301376, 846752832, -810030848, 759350208, -695569984, 619978560, -533649696, 438301408, -335436960, 226989024, -114460880 }; #endif const float dct4[4*4] = { 0.5000f, 0.6533f, 0.5000f, 0.2706f, Loading lib_com/ivas_rom_com.h +6 −0 Original line number Diff line number Diff line Loading @@ -325,6 +325,12 @@ extern const float McMASA_LFEGain_vectors[64]; *----------------------------------------------------------------------------------*/ extern const int32_t sep_object_brate[][MAX_NUM_OBJECTS]; #ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL extern const Word32 dct4_fx[]; extern const Word32 dct5_fx[]; extern const Word32 dct8_fx[]; extern const Word32 dct12_fx[]; #endif extern const float dct4[]; extern const float dct5[]; extern const float dct8[]; Loading lib_com/ivas_tools.c +242 −0 Original line number Diff line number Diff line Loading @@ -41,6 +41,9 @@ #include "ivas_prot.h" #include "wmc_auto.h" #include "ivas_rom_com.h" #ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL #include "basop_settings.h" #endif /*--------------------------------------------------------------- * sumAbs() Loading Loading @@ -1274,3 +1277,242 @@ float rand_triangular_signed( return 0.5f - 0.5f * sqrtf( 1.0f - rand_val ); } } #ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL Word16 matrix_product_fx( const Word32 *X_fx, /* i : left hand matrix Qx*/ const Word16 rowsX, /* i : number of rows of the left hand matrix Q0*/ const Word16 colsX, /* i : number of columns of the left hand matrix Q0*/ const Word16 transpX, /* i : flag indicating the transposition of the left hand matrix prior to the multiplication Q0*/ const Word32 *Y_fx, /* i : right hand matrix Qy*/ const Word16 rowsY, /* i : number of rows of the right hand matrix Q0*/ const Word16 colsY, /* i : number of columns of the right hand matrix Q0*/ const Word16 transpY, /* i : flag indicating the transposition of the right hand matrix prior to the multiplication Q0*/ Word32 *Z_fx /* o : resulting matrix after the matrix multiplication Qx + Qy - 31*/ ) { Word16 i, j, k; Word16 x_idx, y_idx; Word32 *Zp_fx = Z_fx; /* Processing */ if ( transpX == 1 && transpY == 0 ) /* We use X transpose */ { if ( rowsX != rowsY ) { return EXIT_FAILURE; } for ( j = 0; j < colsY; ++j ) { for ( i = 0; i < colsX; ++i ) { ( *Zp_fx ) = 0; for ( k = 0; k < rowsX; ++k ) { x_idx = k + i * rowsX; /*Q0*/ y_idx = k + j * rowsY; /*Q0*/ ( *Zp_fx ) = *Zp_fx + Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); /*Qx + Qy - 31*/ } Zp_fx++; } } } else if ( transpX == 0 && transpY == 1 ) /* We use Y transpose */ { if ( colsX != colsY ) { return EXIT_FAILURE; } for ( j = 0; j < rowsY; ++j ) { for ( i = 0; i < rowsX; ++i ) { ( *Zp_fx ) = 0; for ( k = 0; k < colsX; ++k ) { x_idx = i + k * rowsX; /*Q0*/ y_idx = j + k * rowsY; /*Q0*/ ( *Zp_fx ) = ( *Zp_fx ) + Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); /*Qx + Qy - 31*/ } Zp_fx++; } } } else if ( transpX == 1 && transpY == 1 ) /* We use both transpose */ { if ( rowsX != colsY ) { return EXIT_FAILURE; } for ( j = 0; j < rowsY; ++j ) { for ( i = 0; i < colsX; ++i ) { ( *Zp_fx ) = 0; for ( k = 0; k < colsX; ++k ) { x_idx = k + i * rowsX; /*Q0*/ y_idx = j + k * rowsY; /*Q0*/ ( *Zp_fx ) = ( *Zp_fx ) + Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); /*Qx + Qy - 31*/ } Zp_fx++; } } } else /* Regular case */ { if ( colsX != rowsY ) { return EXIT_FAILURE; } for ( j = 0; j < colsY; ++j ) { for ( i = 0; i < rowsX; ++i ) { ( *Zp_fx ) = 0; for ( k = 0; k < colsX; ++k ) { x_idx = i + k * rowsX; /*Q0*/ y_idx = k + j * rowsY; /*Q0*/ ( *Zp_fx ) = ( *Zp_fx ) + Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); /*Qx + Qy - 31 L_sat_add() */ /* TODO: overflow of Z_fx to be checked */ move32(); } Zp_fx++; } } } return EXIT_SUCCESS; } Word16 matrix_product_q30_fx( const Word32 *X_fx, /* i : left hand matrix Q31*/ const Word16 rowsX, /* i : number of rows of the left hand matrix Q0*/ const Word16 colsX, /* i : number of columns of the left hand matrix Q0*/ const Word16 transpX, /* i : flag indicating the transposition of the left hand matrix prior to the multiplication Q0*/ const Word32 *Y_fx, /* i : right hand matrix Q25*/ const Word16 rowsY, /* i : number of rows of the right hand matrix Q0*/ const Word16 colsY, /* i : number of columns of the right hand matrix Q0*/ const Word16 transpY, /* i : flag indicating the transposition of the right hand matrix prior to the multiplication Q0*/ Word32 *Z_fx /* o : resulting matrix after the matrix multiplication Q30*/ ) { Word16 i, j, k; Word16 x_idx, y_idx; Word32 *Zp_fx = Z_fx; int64_t W_tmp; /* Processing */ test(); test(); test(); if ( transpX == 1 && transpY == 0 ) /* We use X transpose */ { if ( rowsX != rowsY ) { return EXIT_FAILURE; } for ( j = 0; j < colsY; ++j ) { for ( i = 0; i < colsX; ++i ) { W_tmp = 0; for ( k = 0; k < rowsX; ++k ) { /*( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[k + j * rowsY] ) ); */ x_idx = k + i * rowsX; /* Q0 */ y_idx = k + j * rowsY; /* Q0 */ W_tmp += ( (int64_t) X_fx[x_idx] * (int64_t) Y_fx[y_idx] ); /* Q56 */ } W_tmp = W_tmp * 64; /* W_shl( W_tmp, 6 ); */ /*Q62*/ ( *Zp_fx ) = ( W_tmp + 0x80000000 ) >> 32; /* W_round64_L( W_tmp ); */ /*Q30*/ Zp_fx++; } } } else if ( transpX == 0 && transpY == 1 ) /* We use Y transpose */ { if ( colsX != colsY ) { return EXIT_FAILURE; } for ( j = 0; j < rowsY; ++j ) { for ( i = 0; i < rowsX; ++i ) { W_tmp = 0; for ( k = 0; k < colsX; ++k ) { /* ( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[j + k * rowsY] ) ); */ x_idx = i + k * rowsX; /*Q0*/ y_idx = j + k * rowsY; /*Q0*/ W_tmp += ( (int64_t) X_fx[x_idx] * (int64_t) Y_fx[y_idx] ); /* Q56 */ } W_tmp = W_tmp * 64; /*Q62*/ ( *Zp_fx ) = ( W_tmp + 0x80000000 ) >> 32; /*Q30*/ Zp_fx++; } } } else if ( transpX == 1 && transpY == 1 ) /* We use both transpose */ { if ( rowsX != colsY ) { return EXIT_FAILURE; } for ( j = 0; j < rowsY; ++j ) { for ( i = 0; i < colsX; ++i ) { W_tmp = 0; for ( k = 0; k < colsX; ++k ) { /* ( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[j + k * rowsY] ) ); */ x_idx = k + i * rowsX; /*Q0*/ y_idx = j + k * rowsY; /*Q0*/ W_tmp += ( (int64_t) X_fx[x_idx] * (int64_t) Y_fx[y_idx] ); /* Q56*/ } W_tmp = W_tmp * 64; /*Q62*/ ( *Zp_fx ) = ( W_tmp + 0x80000000 ) >> 32; /*Q30*/ Zp_fx++; } } } else /* Regular case */ { if ( colsX != rowsY ) { return EXIT_FAILURE; } for ( j = 0; j < colsY; ++j ) { for ( i = 0; i < rowsX; ++i ) { W_tmp = 0; for ( k = 0; k < colsX; ++k ) { /* ( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[k + j * rowsY] ) ); */ x_idx = i + k * rowsX; /*Q0*/ y_idx = k + j * rowsY; /*Q0*/ W_tmp += ( (int64_t) X_fx[x_idx] * (int64_t) Y_fx[y_idx] ); /* Q56*/ } W_tmp = W_tmp * 64; /*Q62*/ ( *Zp_fx ) = ( W_tmp + 0x80000000 ) >> 32; /*Q30*/ Zp_fx++; } } } return EXIT_SUCCESS; } #endif Loading
lib_com/ivas_cnst.h +4 −0 Original line number Diff line number Diff line Loading @@ -1142,8 +1142,12 @@ enum #define MASA_TRANSP_BITS 1 #define NO_BITS_MASA_ISM_NO_OBJ 2 #define MASA2TOTAL_THR 0.98f #define BITS_MASA2TOTTAL_DCT0 6 #define STEP_M2T 0.1f #ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL #define STEP_M2T_FX 214748365 // Q31 #endif #define MASA_HEADER_BITS 2 #define MASA_SUBFRAME_BITS 1 #define MASA_LOWBITRATE_MODE_BITS 1 Loading
lib_com/ivas_prot.h +26 −0 Original line number Diff line number Diff line Loading @@ -756,6 +756,32 @@ int16_t get_igf_startline( float rand_triangular_signed( int16_t *seed ); #ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL Word16 matrix_product_fx( const Word32 *X_fx, /* i : left hand matrix Qx*/ const Word16 rowsX, /* i : number of rows of the left hand matrix Q0*/ const Word16 colsX, /* i : number of columns of the left hand matrix Q0*/ const Word16 transpX, /* i : flag indicating the transposition of the left hand matrix prior to the multiplication Q0*/ const Word32 *Y_fx, /* i : right hand matrix Qy*/ const Word16 rowsY, /* i : number of rows of the right hand matrix Q0*/ const Word16 colsY, /* i : number of columns of the right hand matrix Q0*/ const Word16 transpY, /* i : flag indicating the transposition of the right hand matrix prior to the multiplication Q0*/ Word32 *Z_fx /* o : resulting matrix after the matrix multiplication Qx + Qy - 31*/ ); Word16 matrix_product_q30_fx( const Word32 *X_fx, /* i : left hand matrix Q31*/ const Word16 rowsX, /* i : number of rows of the left hand matrix Q0*/ const Word16 colsX, /* i : number of columns of the left hand matrix Q0*/ const Word16 transpX, /* i : flag indicating the transposition of the left hand matrix prior to the multiplication Q0*/ const Word32 *Y_fx, /* i : right hand matrix Q25*/ const Word16 rowsY, /* i : number of rows of the right hand matrix Q0*/ const Word16 colsY, /* i : number of columns of the right hand matrix Q0*/ const Word16 transpY, /* i : flag indicating the transposition of the right hand matrix prior to the multiplication Q0*/ Word32 *Z_fx /* o : resulting matrix after the matrix multiplication Q30*/ ); #endif void dtx_read_padding_bits( DEC_CORE_HANDLE st, Loading
lib_com/ivas_rom_com.c +42 −0 Original line number Diff line number Diff line Loading @@ -2778,6 +2778,48 @@ const int32_t sep_object_brate[][MAX_NUM_OBJECTS] = }; /* column wise DCT matrices for 4 5, and 8 dim */ #ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL const Word32 dct4_fx[4 * 4] = { // Q31 1073741824, 1402951040, 1073741824, 581109056, 1073741824, 581109056, -1073741824, -1402951040, 1073741824, -581109056, -1073741824, 1402951040, 1073741824, -1402951040, 1073741824, -581109056 }; const Word32 dct5_fx[5 * 5] = { // Q31 960354688, 1291711360, 1098867328, 798219648, 419618304, 960354688, 798219648, -419618304, -1291711360, -1098867328, 960354688, 0, -1358283392, 0, 1358283392, 960354688, -798219648, -419618304, 1291711360, -1098867328, 960354688, -1291711360, 1098867328, -798219648, 419618304 }; const Word32 dct8_fx[8 * 8] = { // Q31 759350208, 1053125952, 991922688, 892708928, 759350208, 596570944, 410813632, 209379648, 759350208, 892708928, 410813632, -209379648, -759350208, -1053125952, -991922688, -596570944, 759350208, 596570944, -410813632, -1053125952, -759350208, 209379648, 991922688, 892708928, 759350208, 209379648, -991922688, -596570944, 759350208, 892708928, -410813632, -1053125952, 759350208, -209379648, -991922688, 596570944, 759350208, -892708928, -410813632, 1053125952, 759350208, -596570944, -410813632, 1053125952, -759350208, -209379648, 991922688, -892708928, 759350208, -892708928, 410813632, 209379648, -759350208, 1053125952, -991922688, 596570944, 759350208, -1053125952, 991922688, -892708928, 759350208, -596570944, 410813632, -209379648 }; const Word32 dct12_fx[12 * 12] = { // Q31 619978560, 869301376, 846752832, 810030848, 759350208, 695569984, 619978560, 533649696, 438301408, 335436960, 226989024, 114460880, 619978560, 810030848, 619978560, 335436960, 0, -335436960, -619978560, -810030848, -876602816, -810030848, -619978560, -335436960, 619978560, 695569984, 226989024, -335436960, -759350208, -869301376, -619978560, -114460880, 438301408, 810030848, 846752832, 533649696, 619978560, 533649696, -226989024, -810030848, -759350208, -114460880, 619978560, 869301376, 438301408, -335436960, -846752832, -695569984, 619978560, 335436960, -619978560, -810030848, 0, 810030848, 619978560, -335436960, -876602816, -335436960, 619978560, 810030848, 619978560, 114460880, -846752832, -335436960, 759350208, 533649696, -619978560, -695569984, 438301408, 810030848, -226989024, -869301376, 619978560, -114460880, -846752832, 335436960, 759350208, -533649696, -619978560, 695569984, 438301408, -810030848, -226989024, 869301376, 619978560, -335436960, -619978560, 810030848, 0, -810030848, 619978560, 335436960, -876602816, 335436960, 619978560, -810030848, 619978560, -533649696, -226989024, 810030848, -759350208, 114460880, 619978560, -869301376, 438301408, 335436960, -846752832, 695569984, 619978560, -695569984, 226989024, 335436960, -759350208, 869301376, -619978560, 114460880, 438301408, -810030848, 846752832, -533649696, 619978560, -810030848, 619978560, -335436960, 0, 335436960, -619978560, 810030848, -876602816, 810030848, -619978560, 335436960, 619978560, -869301376, 846752832, -810030848, 759350208, -695569984, 619978560, -533649696, 438301408, -335436960, 226989024, -114460880 }; #endif const float dct4[4*4] = { 0.5000f, 0.6533f, 0.5000f, 0.2706f, Loading
lib_com/ivas_rom_com.h +6 −0 Original line number Diff line number Diff line Loading @@ -325,6 +325,12 @@ extern const float McMASA_LFEGain_vectors[64]; *----------------------------------------------------------------------------------*/ extern const int32_t sep_object_brate[][MAX_NUM_OBJECTS]; #ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL extern const Word32 dct4_fx[]; extern const Word32 dct5_fx[]; extern const Word32 dct8_fx[]; extern const Word32 dct12_fx[]; #endif extern const float dct4[]; extern const float dct5[]; extern const float dct8[]; Loading
lib_com/ivas_tools.c +242 −0 Original line number Diff line number Diff line Loading @@ -41,6 +41,9 @@ #include "ivas_prot.h" #include "wmc_auto.h" #include "ivas_rom_com.h" #ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL #include "basop_settings.h" #endif /*--------------------------------------------------------------- * sumAbs() Loading Loading @@ -1274,3 +1277,242 @@ float rand_triangular_signed( return 0.5f - 0.5f * sqrtf( 1.0f - rand_val ); } } #ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL Word16 matrix_product_fx( const Word32 *X_fx, /* i : left hand matrix Qx*/ const Word16 rowsX, /* i : number of rows of the left hand matrix Q0*/ const Word16 colsX, /* i : number of columns of the left hand matrix Q0*/ const Word16 transpX, /* i : flag indicating the transposition of the left hand matrix prior to the multiplication Q0*/ const Word32 *Y_fx, /* i : right hand matrix Qy*/ const Word16 rowsY, /* i : number of rows of the right hand matrix Q0*/ const Word16 colsY, /* i : number of columns of the right hand matrix Q0*/ const Word16 transpY, /* i : flag indicating the transposition of the right hand matrix prior to the multiplication Q0*/ Word32 *Z_fx /* o : resulting matrix after the matrix multiplication Qx + Qy - 31*/ ) { Word16 i, j, k; Word16 x_idx, y_idx; Word32 *Zp_fx = Z_fx; /* Processing */ if ( transpX == 1 && transpY == 0 ) /* We use X transpose */ { if ( rowsX != rowsY ) { return EXIT_FAILURE; } for ( j = 0; j < colsY; ++j ) { for ( i = 0; i < colsX; ++i ) { ( *Zp_fx ) = 0; for ( k = 0; k < rowsX; ++k ) { x_idx = k + i * rowsX; /*Q0*/ y_idx = k + j * rowsY; /*Q0*/ ( *Zp_fx ) = *Zp_fx + Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); /*Qx + Qy - 31*/ } Zp_fx++; } } } else if ( transpX == 0 && transpY == 1 ) /* We use Y transpose */ { if ( colsX != colsY ) { return EXIT_FAILURE; } for ( j = 0; j < rowsY; ++j ) { for ( i = 0; i < rowsX; ++i ) { ( *Zp_fx ) = 0; for ( k = 0; k < colsX; ++k ) { x_idx = i + k * rowsX; /*Q0*/ y_idx = j + k * rowsY; /*Q0*/ ( *Zp_fx ) = ( *Zp_fx ) + Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); /*Qx + Qy - 31*/ } Zp_fx++; } } } else if ( transpX == 1 && transpY == 1 ) /* We use both transpose */ { if ( rowsX != colsY ) { return EXIT_FAILURE; } for ( j = 0; j < rowsY; ++j ) { for ( i = 0; i < colsX; ++i ) { ( *Zp_fx ) = 0; for ( k = 0; k < colsX; ++k ) { x_idx = k + i * rowsX; /*Q0*/ y_idx = j + k * rowsY; /*Q0*/ ( *Zp_fx ) = ( *Zp_fx ) + Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); /*Qx + Qy - 31*/ } Zp_fx++; } } } else /* Regular case */ { if ( colsX != rowsY ) { return EXIT_FAILURE; } for ( j = 0; j < colsY; ++j ) { for ( i = 0; i < rowsX; ++i ) { ( *Zp_fx ) = 0; for ( k = 0; k < colsX; ++k ) { x_idx = i + k * rowsX; /*Q0*/ y_idx = k + j * rowsY; /*Q0*/ ( *Zp_fx ) = ( *Zp_fx ) + Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); /*Qx + Qy - 31 L_sat_add() */ /* TODO: overflow of Z_fx to be checked */ move32(); } Zp_fx++; } } } return EXIT_SUCCESS; } Word16 matrix_product_q30_fx( const Word32 *X_fx, /* i : left hand matrix Q31*/ const Word16 rowsX, /* i : number of rows of the left hand matrix Q0*/ const Word16 colsX, /* i : number of columns of the left hand matrix Q0*/ const Word16 transpX, /* i : flag indicating the transposition of the left hand matrix prior to the multiplication Q0*/ const Word32 *Y_fx, /* i : right hand matrix Q25*/ const Word16 rowsY, /* i : number of rows of the right hand matrix Q0*/ const Word16 colsY, /* i : number of columns of the right hand matrix Q0*/ const Word16 transpY, /* i : flag indicating the transposition of the right hand matrix prior to the multiplication Q0*/ Word32 *Z_fx /* o : resulting matrix after the matrix multiplication Q30*/ ) { Word16 i, j, k; Word16 x_idx, y_idx; Word32 *Zp_fx = Z_fx; int64_t W_tmp; /* Processing */ test(); test(); test(); if ( transpX == 1 && transpY == 0 ) /* We use X transpose */ { if ( rowsX != rowsY ) { return EXIT_FAILURE; } for ( j = 0; j < colsY; ++j ) { for ( i = 0; i < colsX; ++i ) { W_tmp = 0; for ( k = 0; k < rowsX; ++k ) { /*( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[k + j * rowsY] ) ); */ x_idx = k + i * rowsX; /* Q0 */ y_idx = k + j * rowsY; /* Q0 */ W_tmp += ( (int64_t) X_fx[x_idx] * (int64_t) Y_fx[y_idx] ); /* Q56 */ } W_tmp = W_tmp * 64; /* W_shl( W_tmp, 6 ); */ /*Q62*/ ( *Zp_fx ) = ( W_tmp + 0x80000000 ) >> 32; /* W_round64_L( W_tmp ); */ /*Q30*/ Zp_fx++; } } } else if ( transpX == 0 && transpY == 1 ) /* We use Y transpose */ { if ( colsX != colsY ) { return EXIT_FAILURE; } for ( j = 0; j < rowsY; ++j ) { for ( i = 0; i < rowsX; ++i ) { W_tmp = 0; for ( k = 0; k < colsX; ++k ) { /* ( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[j + k * rowsY] ) ); */ x_idx = i + k * rowsX; /*Q0*/ y_idx = j + k * rowsY; /*Q0*/ W_tmp += ( (int64_t) X_fx[x_idx] * (int64_t) Y_fx[y_idx] ); /* Q56 */ } W_tmp = W_tmp * 64; /*Q62*/ ( *Zp_fx ) = ( W_tmp + 0x80000000 ) >> 32; /*Q30*/ Zp_fx++; } } } else if ( transpX == 1 && transpY == 1 ) /* We use both transpose */ { if ( rowsX != colsY ) { return EXIT_FAILURE; } for ( j = 0; j < rowsY; ++j ) { for ( i = 0; i < colsX; ++i ) { W_tmp = 0; for ( k = 0; k < colsX; ++k ) { /* ( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[j + k * rowsY] ) ); */ x_idx = k + i * rowsX; /*Q0*/ y_idx = j + k * rowsY; /*Q0*/ W_tmp += ( (int64_t) X_fx[x_idx] * (int64_t) Y_fx[y_idx] ); /* Q56*/ } W_tmp = W_tmp * 64; /*Q62*/ ( *Zp_fx ) = ( W_tmp + 0x80000000 ) >> 32; /*Q30*/ Zp_fx++; } } } else /* Regular case */ { if ( colsX != rowsY ) { return EXIT_FAILURE; } for ( j = 0; j < colsY; ++j ) { for ( i = 0; i < rowsX; ++i ) { W_tmp = 0; for ( k = 0; k < colsX; ++k ) { /* ( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[k + j * rowsY] ) ); */ x_idx = i + k * rowsX; /*Q0*/ y_idx = k + j * rowsY; /*Q0*/ W_tmp += ( (int64_t) X_fx[x_idx] * (int64_t) Y_fx[y_idx] ); /* Q56*/ } W_tmp = W_tmp * 64; /*Q62*/ ( *Zp_fx ) = ( W_tmp + 0x80000000 ) >> 32; /*Q30*/ Zp_fx++; } } } return EXIT_SUCCESS; } #endif