Loading lib_dec/ivas_svd_dec_fx.c +55 −31 Original line number Diff line number Diff line Loading @@ -88,7 +88,7 @@ static void biDiagonalReductionRight_fx( Word16 *g_e ); static void biDiagonalReductionLeft_64( Word64 singularVectors_Left_64, Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], Word16 singularVectors_e, Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ Word16 singularValues_e[MAX_OUTPUT_CHANNELS], Loading @@ -98,7 +98,7 @@ static void biDiagonalReductionLeft_64( ); static void biDiagonalReductionRight_64( Word64 singularVectors_Left_64, Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], Word16 singularVectors_e, Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ Word16 secDiag_e[MAX_OUTPUT_CHANNELS], Loading Loading @@ -893,26 +893,22 @@ static void HouseholderReduction_fx( for (nCh=0;nCh<nChannelsC;nCh++) { biDiagonalReductionLeft_64( singularVectors_Left_64,currChannel, singularValues,singularValues_e, singularVectors_Left_64,nCh, singularValues_fx,singularValues_fx_e, nChannelsL, nChannelsC, currChannel nCh ); biDiagonalReductionRight_64( singularVectors_Left_64,currChannel, secDiag,secDiag_exp, singularVectors_Left_64,nCh, secDiag_fx,secDiag_fx_e, nChannelsL, nChannelsC, currChannel, nCh, &g_fx, &g_e ); } #endif FOR( jCh = 0; jCh < nChannelsL; jCh++ ) { Loading Loading @@ -978,6 +974,7 @@ static void biDiagonalReductionLeft_64( Word64 r_64; Word32 tmp; Word16 tmpe; Word64 norm_64; norm_x=0; move32(); IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ Loading @@ -987,12 +984,12 @@ static void biDiagonalReductionLeft_64( tmpe=add(singularVectors_e,1); FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { tmp=E_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],tmpe)); tmp=W_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],tmpe)); norm_64=W_add(norm_64,W_mult0_32_32(tmp,tmp)); } tmpe=W_norm(norm_64); norm_x=W_extract_h(W_shl(norm_64, tmpe )); norm_x_e = add(sub(shl(singularVectors_e, 1), tmp3), 3 ); norm_x_e = add(sub(shl(singularVectors_e, 1), tmpe), 3 ); } IF ( norm_x ) { Loading @@ -1003,8 +1000,8 @@ static void biDiagonalReductionLeft_64( Word64 tmp64; L_temp_e = norm_x_e; move16(); L_temp = Sqrt( norm_x, &L_temp_e); IF ( GE_64( singularVectors_Left_64[currChannel][currChannel] ) ) L_temp = Sqrt32( norm_x, &L_temp_e); IF ( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) ) { L_temp = L_negate( L_temp ); } Loading @@ -1021,13 +1018,13 @@ static void biDiagonalReductionLeft_64( tmpe2=W_norm(r_64); r=W_extract_h(W_shl(r_64,tmpe2)); r_e = sub(sub(add(add(1,singularVectors_e),g_e),tmpe,tmpe2)); r_e = sub(sub(add(add(1,singularVectors_e),g_e),tmpe),tmpe2); invVal_e = r_e; invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e); tmpe=add(32,sub(singularVectors_e,g_e)); // TODO: maybe the other way around?? tmp64=W_shr(W_deposit_h(g),tmpe); tmp64=W_shr(W_deposit32_h(g),tmpe); singularVectors_Left_64[currChannel][currChannel]=W_sub(singularVectors_Left_64[currChannel][currChannel],tmp64); // exponent +1 Loading Loading @@ -1062,7 +1059,7 @@ static void biDiagonalReductionLeft_64( } static void biDiagonalReductionRight_64( Word64 singularVectors_Left_64, Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], Word16 singularVectors_e, Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ Word16 secDiag_e[MAX_OUTPUT_CHANNELS], Loading @@ -1075,7 +1072,7 @@ static void biDiagonalReductionRight_64( { secDiag[currChannel] = ( *g ); move32(); secDiag_exp[currChannel] = ( *g_e ); secDiag_e[currChannel] = ( *g_e ); move16(); ( *g ) =0; Loading @@ -1088,6 +1085,11 @@ static void biDiagonalReductionRight_64( Word64 norm_64; Word64 abs_x; Word16 idx; Word32 tmp; Word16 tmpe; Word16 iCh,jCh; Word32 norm_x; Word16 norm_x_e; idx=add(currChannel,1); Loading @@ -1099,13 +1101,13 @@ static void biDiagonalReductionRight_64( tmpe=add(singularVectors_e,1); for (jCh=idx;jCh<nChannelsC; jCh++) { tmp=E_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],tmpe)); tmp=W_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],tmpe)); norm_64=W_add(norm_64,W_mult0_32_32(tmp,tmp)); abs_x=W_add(abs_x, W_abs(singularVectors_Left_64[jCh][currChannel])); } tmpe=W_norm(norm_64); norm_x=W_extract_h(W_shl(norm_64, tmpe )); norm_x_e = add(sub(shl(singularVectors_e, 1), tmp3), 3 ); norm_x_e = add(sub(shl(singularVectors_e, 1), tmpe), 3 ); IF (norm_x) { Word16 invVal_e; Loading @@ -1113,33 +1115,42 @@ static void biDiagonalReductionRight_64( Word64 tmpmul; Word16 tmpe2; Word64 tmp64; Word32 f; Word16 f_e; Word32 L_temp; Word16 L_temp_e; Word64 r_64; Word32 r; Word16 r_e; L_temp_e = norm_x_e; move16(); L_temp = Sqrt( norm_x, &L_temp_e); IF ( GE_64( singularVectors_Left_64[currChannel][idx] ) ) L_temp = Sqrt32( norm_x, &L_temp_e); IF ( GE_64( singularVectors_Left_64[currChannel][idx],0 ) ) { L_temp = L_negate( L_temp ); } g=L_temp; *g=L_temp; move32(); g_e = L_temp_e; *g_e = L_temp_e; move16(); tmp=W_extract_l(W_shr(singularVectors_Left_64[currChannel][idx],singularVectors_e) ); tmpe=sub(sub(g_e, singularVectors_e),1); tmpmul=W_mult0_32_32(g,tmp); tmpe=sub(sub(*g_e, singularVectors_e),1); tmpmul=W_mult0_32_32(*g,tmp); tmpmul=W_shl(tmpmul,tmpe); r_64=W_sub(tmpmul, norm_64 ); tmpe2=W_norm(r_64); r=W_extract_h(W_shl(r_64,tmpe2)); r_e = sub(sub(add(add(1,singularVectors_e),g_e),tmpe,tmpe2)); r_e = sub(sub(add(add(1,singularVectors_e),*g_e),tmpe),tmpe2); invVal_e = r_e; invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e); tmpe=add(32,sub(singularVectors_e,g_e)); // TODO: maybe the other way around?? tmp64=W_shr(W_deposit_h(g),tmpe); tmp64=W_shr(W_deposit32_h(g),tmpe); singularVectors_Left_64[currChannel][idx]=W_sub(singularVectors_Left_64[currChannel][idx],tmp64); // exponent +1 Loading @@ -1164,12 +1175,25 @@ static void biDiagonalReductionRight_64( f=Mpy_32_32(norm_x,invVal); f_e=add(invVal_e, sub(norm_x_e, r_e )); for (jCh=currChannel;jCh<nChannelsL; jCh++) for (jCh=idx;jCh<nChannelsL; jCh++) { tmp2=W_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],tmpe)); singularVectors_Left_64[jCh][iCh]=W_add(singularVectors_Left_64[jCh][iCh],W_mult0_32_32(f,tmp2)); // exponent +1 } } invVal_e = 0; move16(); tmpe = W_norm(abs_x); invVal = BASOP_Util_Divide3232_Scale_newton( W_extract_h(W_shl(abs_x,tmpe)), r, &invVal_e); invVal_e = add(invVal_e, sub(tmpe, r_e)); tmpe = add(1,singularVectors_e); for ( jCh = idx; jCh < nChannelsC ; jCh++) { secDiag[jCh] = Mpy_32_32( W_extract_l(W_shr(singularVectors_Left_64[currChannel][jCh],tmpe)), invVal); move32(); secDiag_e[jCh]=add(invVal_e, tmpe); move16(); } } } return; Loading Loading
lib_dec/ivas_svd_dec_fx.c +55 −31 Original line number Diff line number Diff line Loading @@ -88,7 +88,7 @@ static void biDiagonalReductionRight_fx( Word16 *g_e ); static void biDiagonalReductionLeft_64( Word64 singularVectors_Left_64, Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], Word16 singularVectors_e, Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ Word16 singularValues_e[MAX_OUTPUT_CHANNELS], Loading @@ -98,7 +98,7 @@ static void biDiagonalReductionLeft_64( ); static void biDiagonalReductionRight_64( Word64 singularVectors_Left_64, Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], Word16 singularVectors_e, Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ Word16 secDiag_e[MAX_OUTPUT_CHANNELS], Loading Loading @@ -893,26 +893,22 @@ static void HouseholderReduction_fx( for (nCh=0;nCh<nChannelsC;nCh++) { biDiagonalReductionLeft_64( singularVectors_Left_64,currChannel, singularValues,singularValues_e, singularVectors_Left_64,nCh, singularValues_fx,singularValues_fx_e, nChannelsL, nChannelsC, currChannel nCh ); biDiagonalReductionRight_64( singularVectors_Left_64,currChannel, secDiag,secDiag_exp, singularVectors_Left_64,nCh, secDiag_fx,secDiag_fx_e, nChannelsL, nChannelsC, currChannel, nCh, &g_fx, &g_e ); } #endif FOR( jCh = 0; jCh < nChannelsL; jCh++ ) { Loading Loading @@ -978,6 +974,7 @@ static void biDiagonalReductionLeft_64( Word64 r_64; Word32 tmp; Word16 tmpe; Word64 norm_64; norm_x=0; move32(); IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ Loading @@ -987,12 +984,12 @@ static void biDiagonalReductionLeft_64( tmpe=add(singularVectors_e,1); FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { tmp=E_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],tmpe)); tmp=W_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],tmpe)); norm_64=W_add(norm_64,W_mult0_32_32(tmp,tmp)); } tmpe=W_norm(norm_64); norm_x=W_extract_h(W_shl(norm_64, tmpe )); norm_x_e = add(sub(shl(singularVectors_e, 1), tmp3), 3 ); norm_x_e = add(sub(shl(singularVectors_e, 1), tmpe), 3 ); } IF ( norm_x ) { Loading @@ -1003,8 +1000,8 @@ static void biDiagonalReductionLeft_64( Word64 tmp64; L_temp_e = norm_x_e; move16(); L_temp = Sqrt( norm_x, &L_temp_e); IF ( GE_64( singularVectors_Left_64[currChannel][currChannel] ) ) L_temp = Sqrt32( norm_x, &L_temp_e); IF ( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) ) { L_temp = L_negate( L_temp ); } Loading @@ -1021,13 +1018,13 @@ static void biDiagonalReductionLeft_64( tmpe2=W_norm(r_64); r=W_extract_h(W_shl(r_64,tmpe2)); r_e = sub(sub(add(add(1,singularVectors_e),g_e),tmpe,tmpe2)); r_e = sub(sub(add(add(1,singularVectors_e),g_e),tmpe),tmpe2); invVal_e = r_e; invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e); tmpe=add(32,sub(singularVectors_e,g_e)); // TODO: maybe the other way around?? tmp64=W_shr(W_deposit_h(g),tmpe); tmp64=W_shr(W_deposit32_h(g),tmpe); singularVectors_Left_64[currChannel][currChannel]=W_sub(singularVectors_Left_64[currChannel][currChannel],tmp64); // exponent +1 Loading Loading @@ -1062,7 +1059,7 @@ static void biDiagonalReductionLeft_64( } static void biDiagonalReductionRight_64( Word64 singularVectors_Left_64, Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], Word16 singularVectors_e, Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ Word16 secDiag_e[MAX_OUTPUT_CHANNELS], Loading @@ -1075,7 +1072,7 @@ static void biDiagonalReductionRight_64( { secDiag[currChannel] = ( *g ); move32(); secDiag_exp[currChannel] = ( *g_e ); secDiag_e[currChannel] = ( *g_e ); move16(); ( *g ) =0; Loading @@ -1088,6 +1085,11 @@ static void biDiagonalReductionRight_64( Word64 norm_64; Word64 abs_x; Word16 idx; Word32 tmp; Word16 tmpe; Word16 iCh,jCh; Word32 norm_x; Word16 norm_x_e; idx=add(currChannel,1); Loading @@ -1099,13 +1101,13 @@ static void biDiagonalReductionRight_64( tmpe=add(singularVectors_e,1); for (jCh=idx;jCh<nChannelsC; jCh++) { tmp=E_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],tmpe)); tmp=W_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],tmpe)); norm_64=W_add(norm_64,W_mult0_32_32(tmp,tmp)); abs_x=W_add(abs_x, W_abs(singularVectors_Left_64[jCh][currChannel])); } tmpe=W_norm(norm_64); norm_x=W_extract_h(W_shl(norm_64, tmpe )); norm_x_e = add(sub(shl(singularVectors_e, 1), tmp3), 3 ); norm_x_e = add(sub(shl(singularVectors_e, 1), tmpe), 3 ); IF (norm_x) { Word16 invVal_e; Loading @@ -1113,33 +1115,42 @@ static void biDiagonalReductionRight_64( Word64 tmpmul; Word16 tmpe2; Word64 tmp64; Word32 f; Word16 f_e; Word32 L_temp; Word16 L_temp_e; Word64 r_64; Word32 r; Word16 r_e; L_temp_e = norm_x_e; move16(); L_temp = Sqrt( norm_x, &L_temp_e); IF ( GE_64( singularVectors_Left_64[currChannel][idx] ) ) L_temp = Sqrt32( norm_x, &L_temp_e); IF ( GE_64( singularVectors_Left_64[currChannel][idx],0 ) ) { L_temp = L_negate( L_temp ); } g=L_temp; *g=L_temp; move32(); g_e = L_temp_e; *g_e = L_temp_e; move16(); tmp=W_extract_l(W_shr(singularVectors_Left_64[currChannel][idx],singularVectors_e) ); tmpe=sub(sub(g_e, singularVectors_e),1); tmpmul=W_mult0_32_32(g,tmp); tmpe=sub(sub(*g_e, singularVectors_e),1); tmpmul=W_mult0_32_32(*g,tmp); tmpmul=W_shl(tmpmul,tmpe); r_64=W_sub(tmpmul, norm_64 ); tmpe2=W_norm(r_64); r=W_extract_h(W_shl(r_64,tmpe2)); r_e = sub(sub(add(add(1,singularVectors_e),g_e),tmpe,tmpe2)); r_e = sub(sub(add(add(1,singularVectors_e),*g_e),tmpe),tmpe2); invVal_e = r_e; invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e); tmpe=add(32,sub(singularVectors_e,g_e)); // TODO: maybe the other way around?? tmp64=W_shr(W_deposit_h(g),tmpe); tmp64=W_shr(W_deposit32_h(g),tmpe); singularVectors_Left_64[currChannel][idx]=W_sub(singularVectors_Left_64[currChannel][idx],tmp64); // exponent +1 Loading @@ -1164,12 +1175,25 @@ static void biDiagonalReductionRight_64( f=Mpy_32_32(norm_x,invVal); f_e=add(invVal_e, sub(norm_x_e, r_e )); for (jCh=currChannel;jCh<nChannelsL; jCh++) for (jCh=idx;jCh<nChannelsL; jCh++) { tmp2=W_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],tmpe)); singularVectors_Left_64[jCh][iCh]=W_add(singularVectors_Left_64[jCh][iCh],W_mult0_32_32(f,tmp2)); // exponent +1 } } invVal_e = 0; move16(); tmpe = W_norm(abs_x); invVal = BASOP_Util_Divide3232_Scale_newton( W_extract_h(W_shl(abs_x,tmpe)), r, &invVal_e); invVal_e = add(invVal_e, sub(tmpe, r_e)); tmpe = add(1,singularVectors_e); for ( jCh = idx; jCh < nChannelsC ; jCh++) { secDiag[jCh] = Mpy_32_32( W_extract_l(W_shr(singularVectors_Left_64[currChannel][jCh],tmpe)), invVal); move32(); secDiag_e[jCh]=add(invVal_e, tmpe); move16(); } } } return; Loading