Loading lib_dec/ivas_svd_dec_fx.c +22 −26 Original line number Diff line number Diff line Loading @@ -1002,11 +1002,11 @@ static void biDiagonalReductionLeft_64( Word32 *g, Word16 *g_e ) { #define HEADROOM_LEFT_1 2 #define HEADROOM_LEFT_2 ( 16 - norm_x_e0 / 4 ) // 10 #define HEADROOM_LEFT_3 ( 16 - norm_x_e0 / 4 ) // 4 #define HEADROOM_LEFT_4 ( 16 - norm_x_e0 / 4 ) // 14 /* TODO: For some reason, this is optimal. But why? why not ( 32 - 2 * MAGIC_HEADROOM_1 - norm_x_e0 + 1 ) , for example? */ #define MAGIC_HEADROOM_1 2 #define MAGIC_HEADROOM_2 ( 16 - norm_x_e0 / 4 ) #define MAGIC_HEADROOM_3 ( 16 - norm_x_e0 / 4 ) #define MAGIC_HEADROOM_4 ( 16 - norm_x_e0 / 4 ) Word16 iCh, jCh; Word32 norm_x; Loading @@ -1027,7 +1027,7 @@ static void biDiagonalReductionLeft_64( move64(); FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_1 ) ); // q(sing)-H1 // exp(sing)+H1 tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], MAGIC_HEADROOM_1 ) ); // q(sing)-H1 // exp(sing)+H1 norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) ); // q(norm)=2*q(sing)-2*H1 // exp(norm)=2*exp(sing)+2*H1 } norm_x_e = W_norm( norm_64 ); Loading @@ -1044,16 +1044,16 @@ static void biDiagonalReductionLeft_64( Word32 r, invVal; Word16 r_e, invVal_e; ( *g_e ) = add( sub( add( HEADROOM_LEFT_1, HEADROOM_LEFT_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1) ( *g_e ) = add( sub( add( MAGIC_HEADROOM_1, MAGIC_HEADROOM_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1) move16(); ( *g ) = Sqrt32( norm_x, g_e ); // --> exp(g)=((2*H1-exp(norm_x)+1)/2) IF( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) ) { ( *g ) = L_negate( *g ); } factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_2 ) ); // q(factor2)=q(sing)-H2 exp(factor2)=exp(qsing)+H2 tmp_e = sub( 2 * HEADROOM_LEFT_1 - HEADROOM_LEFT_2, ( *g_e ) ); tmpmul = W_mult0_32_32( ( *g ), factor2 ); // q(tmpmul)=q(g)+q(factor2) factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], MAGIC_HEADROOM_2 ) ); // q(factor2)=q(sing)-H2 exp(factor2)=exp(qsing)+H2 tmp_e = sub( 2 * MAGIC_HEADROOM_1 - MAGIC_HEADROOM_2, ( *g_e ) ); tmpmul = W_mult0_32_32( ( *g ), factor2 ); // q(tmpmul)=q(g)+q(factor2) --> q(tmpmul) ~= q(norm) tmpmul = W_shr( tmpmul, tmp_e ); // --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g)) r_64 = W_sub( tmpmul, norm_64 ); // q(r_64)=max(q(tmpmul),q(norm)) r_e = W_norm( r_64 ); Loading @@ -1077,17 +1077,17 @@ static void biDiagonalReductionLeft_64( norm_64 = 0; for ( jCh = currChannel; jCh < nChannelsL; jCh++ ) { factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_3 ) ); // q(factor1) = q(sing)-H3 factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], 32 - HEADROOM_LEFT_3 ) ); // q(factor2) = q(sing)-H3 factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - MAGIC_HEADROOM_3 ) ); // q(factor1) = q(sing)-H3 factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], 32 - MAGIC_HEADROOM_3 ) ); // q(factor2) = q(sing)-H3 norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); // q(norm)=2*q(sing)-2*H3 } norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm f = Mpy_32_32( norm_x, invVal ); // q(f)=q(norm_x)-q(invVal) magic_shift = ( norm_x_e - 2 * HEADROOM_LEFT_3 ) - ( r_e - 2 * HEADROOM_LEFT_1 ) + ( 32 - HEADROOM_LEFT_4 ) - 2 * invVal_e; magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e; FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) { factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_4 ) ); factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - MAGIC_HEADROOM_4 ) ); singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr( W_mult0_32_32( f, factor1 ), magic_shift ) ); } } Loading Loading @@ -1115,10 +1115,6 @@ static void biDiagonalReductionRight_64( Word64 norm_64; Word16 idx; #define HEADROOM_RIGHT_1 2 #define HEADROOM_RIGHT_2 ( 16 - norm_x_e0 / 4 ) // 10 #define HEADROOM_RIGHT_3 ( 16 - norm_x_e0 / 4 ) // 4 #define HEADROOM_RIGHT_4 ( 16 - norm_x_e0 / 4 ) // 14 ( *g ) = 0; Loading @@ -1133,7 +1129,7 @@ static void biDiagonalReductionRight_64( FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { Word32 tmp; tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_1 ) ); // q(sing)-H1 // exp(sing)+H1 tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], MAGIC_HEADROOM_1 ) ); // q(sing)-H1 // exp(sing)+H1 norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) ); // q(norm)=2*q(sing)-2*H1 // exp(norm)=2*exp(sing)+2*H1 } norm_x_e = W_norm( norm_64 ); Loading @@ -1155,7 +1151,7 @@ static void biDiagonalReductionRight_64( Word32 invVal; Word16 invVal_e; ( *g_e ) = add( sub( ( HEADROOM_RIGHT_1 + HEADROOM_RIGHT_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1) ( *g_e ) = add( sub( ( MAGIC_HEADROOM_1 + MAGIC_HEADROOM_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1) move16(); ( *g ) = Sqrt32( norm_x, g_e ); // --> exp(g)=((2*H1-exp(norm_x)+1)/2) IF( GE_64( singularVectors_Left_64[currChannel][idx], 0 ) ) Loading @@ -1164,8 +1160,8 @@ static void biDiagonalReductionRight_64( } move32(); move16(); factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_2 ) ); // q(factor2)=q(sing)-H2 exp(factor2)=exp(qsing)+H2 tmp_e = sub( 2 * HEADROOM_RIGHT_1 - HEADROOM_RIGHT_2, *g_e ); factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], MAGIC_HEADROOM_2 ) ); // q(factor2)=q(sing)-H2 exp(factor2)=exp(qsing)+H2 tmp_e = sub( 2 * MAGIC_HEADROOM_1 - MAGIC_HEADROOM_2, *g_e ); tmpmul = W_mult0_32_32( *g, factor2 ); // q(tmpmul)=q(g)+q(factor2) tmpmul = W_shr( tmpmul, tmp_e ); // --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g)) r_64 = W_sub( tmpmul, norm_64 ); // q(r_64)=max(q(tmpmul),q(norm)) Loading @@ -1185,19 +1181,19 @@ static void biDiagonalReductionRight_64( move64(); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], 32 - HEADROOM_RIGHT_3 ) ); // q(factor1) = q(sing)-H3 factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - HEADROOM_RIGHT_3 ) ); // q(factor2) = q(sing)-H3 factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], 32 - MAGIC_HEADROOM_3 ) ); // q(factor1) = q(sing)-H3 factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - MAGIC_HEADROOM_3 ) ); // q(factor2) = q(sing)-H3 norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); // q(norm)=2*q(sing)-2*H3 } norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm f = Mpy_32_32( norm_x, invVal ); // q(f)=q(norm_x)-q(invVal) magic_shift = ( norm_x_e - 2 * HEADROOM_RIGHT_3 ) - ( r_e - 2 * HEADROOM_RIGHT_1 ) + ( 32 - HEADROOM_RIGHT_4 ) - 2 * invVal_e; magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e; FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - HEADROOM_RIGHT_4 ) ); factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - MAGIC_HEADROOM_4 ) ); singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2 ), magic_shift ) ); } } Loading Loading
lib_dec/ivas_svd_dec_fx.c +22 −26 Original line number Diff line number Diff line Loading @@ -1002,11 +1002,11 @@ static void biDiagonalReductionLeft_64( Word32 *g, Word16 *g_e ) { #define HEADROOM_LEFT_1 2 #define HEADROOM_LEFT_2 ( 16 - norm_x_e0 / 4 ) // 10 #define HEADROOM_LEFT_3 ( 16 - norm_x_e0 / 4 ) // 4 #define HEADROOM_LEFT_4 ( 16 - norm_x_e0 / 4 ) // 14 /* TODO: For some reason, this is optimal. But why? why not ( 32 - 2 * MAGIC_HEADROOM_1 - norm_x_e0 + 1 ) , for example? */ #define MAGIC_HEADROOM_1 2 #define MAGIC_HEADROOM_2 ( 16 - norm_x_e0 / 4 ) #define MAGIC_HEADROOM_3 ( 16 - norm_x_e0 / 4 ) #define MAGIC_HEADROOM_4 ( 16 - norm_x_e0 / 4 ) Word16 iCh, jCh; Word32 norm_x; Loading @@ -1027,7 +1027,7 @@ static void biDiagonalReductionLeft_64( move64(); FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_1 ) ); // q(sing)-H1 // exp(sing)+H1 tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], MAGIC_HEADROOM_1 ) ); // q(sing)-H1 // exp(sing)+H1 norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) ); // q(norm)=2*q(sing)-2*H1 // exp(norm)=2*exp(sing)+2*H1 } norm_x_e = W_norm( norm_64 ); Loading @@ -1044,16 +1044,16 @@ static void biDiagonalReductionLeft_64( Word32 r, invVal; Word16 r_e, invVal_e; ( *g_e ) = add( sub( add( HEADROOM_LEFT_1, HEADROOM_LEFT_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1) ( *g_e ) = add( sub( add( MAGIC_HEADROOM_1, MAGIC_HEADROOM_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1) move16(); ( *g ) = Sqrt32( norm_x, g_e ); // --> exp(g)=((2*H1-exp(norm_x)+1)/2) IF( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) ) { ( *g ) = L_negate( *g ); } factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_2 ) ); // q(factor2)=q(sing)-H2 exp(factor2)=exp(qsing)+H2 tmp_e = sub( 2 * HEADROOM_LEFT_1 - HEADROOM_LEFT_2, ( *g_e ) ); tmpmul = W_mult0_32_32( ( *g ), factor2 ); // q(tmpmul)=q(g)+q(factor2) factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], MAGIC_HEADROOM_2 ) ); // q(factor2)=q(sing)-H2 exp(factor2)=exp(qsing)+H2 tmp_e = sub( 2 * MAGIC_HEADROOM_1 - MAGIC_HEADROOM_2, ( *g_e ) ); tmpmul = W_mult0_32_32( ( *g ), factor2 ); // q(tmpmul)=q(g)+q(factor2) --> q(tmpmul) ~= q(norm) tmpmul = W_shr( tmpmul, tmp_e ); // --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g)) r_64 = W_sub( tmpmul, norm_64 ); // q(r_64)=max(q(tmpmul),q(norm)) r_e = W_norm( r_64 ); Loading @@ -1077,17 +1077,17 @@ static void biDiagonalReductionLeft_64( norm_64 = 0; for ( jCh = currChannel; jCh < nChannelsL; jCh++ ) { factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_3 ) ); // q(factor1) = q(sing)-H3 factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], 32 - HEADROOM_LEFT_3 ) ); // q(factor2) = q(sing)-H3 factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - MAGIC_HEADROOM_3 ) ); // q(factor1) = q(sing)-H3 factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], 32 - MAGIC_HEADROOM_3 ) ); // q(factor2) = q(sing)-H3 norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); // q(norm)=2*q(sing)-2*H3 } norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm f = Mpy_32_32( norm_x, invVal ); // q(f)=q(norm_x)-q(invVal) magic_shift = ( norm_x_e - 2 * HEADROOM_LEFT_3 ) - ( r_e - 2 * HEADROOM_LEFT_1 ) + ( 32 - HEADROOM_LEFT_4 ) - 2 * invVal_e; magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e; FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) { factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_4 ) ); factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - MAGIC_HEADROOM_4 ) ); singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr( W_mult0_32_32( f, factor1 ), magic_shift ) ); } } Loading Loading @@ -1115,10 +1115,6 @@ static void biDiagonalReductionRight_64( Word64 norm_64; Word16 idx; #define HEADROOM_RIGHT_1 2 #define HEADROOM_RIGHT_2 ( 16 - norm_x_e0 / 4 ) // 10 #define HEADROOM_RIGHT_3 ( 16 - norm_x_e0 / 4 ) // 4 #define HEADROOM_RIGHT_4 ( 16 - norm_x_e0 / 4 ) // 14 ( *g ) = 0; Loading @@ -1133,7 +1129,7 @@ static void biDiagonalReductionRight_64( FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { Word32 tmp; tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_1 ) ); // q(sing)-H1 // exp(sing)+H1 tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], MAGIC_HEADROOM_1 ) ); // q(sing)-H1 // exp(sing)+H1 norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) ); // q(norm)=2*q(sing)-2*H1 // exp(norm)=2*exp(sing)+2*H1 } norm_x_e = W_norm( norm_64 ); Loading @@ -1155,7 +1151,7 @@ static void biDiagonalReductionRight_64( Word32 invVal; Word16 invVal_e; ( *g_e ) = add( sub( ( HEADROOM_RIGHT_1 + HEADROOM_RIGHT_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1) ( *g_e ) = add( sub( ( MAGIC_HEADROOM_1 + MAGIC_HEADROOM_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1) move16(); ( *g ) = Sqrt32( norm_x, g_e ); // --> exp(g)=((2*H1-exp(norm_x)+1)/2) IF( GE_64( singularVectors_Left_64[currChannel][idx], 0 ) ) Loading @@ -1164,8 +1160,8 @@ static void biDiagonalReductionRight_64( } move32(); move16(); factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_2 ) ); // q(factor2)=q(sing)-H2 exp(factor2)=exp(qsing)+H2 tmp_e = sub( 2 * HEADROOM_RIGHT_1 - HEADROOM_RIGHT_2, *g_e ); factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], MAGIC_HEADROOM_2 ) ); // q(factor2)=q(sing)-H2 exp(factor2)=exp(qsing)+H2 tmp_e = sub( 2 * MAGIC_HEADROOM_1 - MAGIC_HEADROOM_2, *g_e ); tmpmul = W_mult0_32_32( *g, factor2 ); // q(tmpmul)=q(g)+q(factor2) tmpmul = W_shr( tmpmul, tmp_e ); // --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g)) r_64 = W_sub( tmpmul, norm_64 ); // q(r_64)=max(q(tmpmul),q(norm)) Loading @@ -1185,19 +1181,19 @@ static void biDiagonalReductionRight_64( move64(); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], 32 - HEADROOM_RIGHT_3 ) ); // q(factor1) = q(sing)-H3 factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - HEADROOM_RIGHT_3 ) ); // q(factor2) = q(sing)-H3 factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], 32 - MAGIC_HEADROOM_3 ) ); // q(factor1) = q(sing)-H3 factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - MAGIC_HEADROOM_3 ) ); // q(factor2) = q(sing)-H3 norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); // q(norm)=2*q(sing)-2*H3 } norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm f = Mpy_32_32( norm_x, invVal ); // q(f)=q(norm_x)-q(invVal) magic_shift = ( norm_x_e - 2 * HEADROOM_RIGHT_3 ) - ( r_e - 2 * HEADROOM_RIGHT_1 ) + ( 32 - HEADROOM_RIGHT_4 ) - 2 * invVal_e; magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e; FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - HEADROOM_RIGHT_4 ) ); factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - MAGIC_HEADROOM_4 ) ); singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2 ), magic_shift ) ); } } Loading