brought experiment 5 to the same state as experiment 6. (a1cac2d7) · Commits · SA4 / Audio / IVAS BASOP

lib_dec/ivas_svd_dec_fx.c

+191 −196

Original line number	Diff line number	Diff line
		@@ -79,16 +79,13 @@ static void biDiagonalReductionLeft_64(
		static void biDiagonalReductionRight_64(
		Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
		Word16 bitwindow,
		Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
		Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
		const Word16 nChannelsL, /* Q0 */
		const Word16 nChannelsC, /* Q0 */
		const Word16 currChannel, /* Q0 */
		Word32 g, / Q31 */
		Word16 *g_e
		);

		#endif
		#else
		static void biDiagonalReductionLeft_fx(
		Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
		Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
		@@ -116,6 +113,7 @@ static void biDiagonalReductionRight_fx(
		Word16 *sig_x_e,
		Word32 g / Q31 */
		); // Q31
		#endif

		static void singularVectorsAccumulationLeft_fx(
		Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) as Input, Q31 as output */
		@@ -345,7 +343,11 @@ Word16 svd_fx(
		move16();
		FOR( iCh = 0; iCh < lengthSingularValues - 1; iCh++ )
		{
		#ifdef OPT_MCH_DEC_V1_NBE
		IF( LT_32( L_shl_sat( singularValues_fx[iCh], sub( singularValues_fx_e[iCh], singularValues_fx_e[iCh + 1] ) ), singularValues_fx[iCh + 1] ) )
		#else /* OPT_MCH_DEC_V1_NBE */
		IF( BASOP_Util_Cmp_Mant32Exp( singularValues_fx[iCh], singularValues_fx_e[iCh], singularValues_fx[iCh + 1], singularValues_fx_e[iCh + 1] ) < 0 )
		#endif /* OPT_MCH_DEC_V1_NBE */
		{
		condition = 1;
		move16();
		@@ -450,14 +452,24 @@ static Word16 BidagonalDiagonalisation_fx(

		FOR( jCh = iCh; jCh >= 0; jCh-- )
		{
		split = sub( jCh, 1 ); /* Q0 */
		#ifdef OPT_MCH_DEC_V1_NBE
		Word16 com_e = s_max( secDiag_new_e[jCh], eps_x_e );
		IF( LE_32( L_shr( L_abs( secDiag_fx[jCh] ), sub( com_e, secDiag_new_e[jCh] ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) ) /* is secDiag[ch] vanishing compared to eps_x */
		#else
		split = sub( jCh, 1 ); /* Q0 / / OPT_MCH_DEC_V1_NBE */
		IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( secDiag_fx[jCh] ), secDiag_new_e[jCh], Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is secDiag[ch] vanishing compared to eps_x */
		#endif /* OPT_MCH_DEC_V1_NBE */
		{
		found_split = 0;
		move16();
		BREAK;
		}
		#ifdef OPT_MCH_DEC_V1_NBE
		com_e = s_max( singularValues_new_e[jCh - 1], eps_x_e );
		IF( LE_32( L_shr( L_abs( singularValues_fx[jCh - 1] ), sub( com_e, singularValues_new_e[jCh - 1] ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) ) /* is singularValues[jCh - 1] vanishing compared to eps_x */
		#else /* OPT_MCH_DEC_V1_NBE */
		IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( singularValues_fx[split] ), singularValues_new_e[split], Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is singularValues[split] vanishing compared to eps_x */
		#endif /* OPT_MCH_DEC_V1_NBE */
		{
		BREAK;
		}
		@@ -485,14 +497,21 @@ static Word16 BidagonalDiagonalisation_fx(
		move32();
		c_e = 0;
		move16();

		#ifdef OPT_MCH_DEC_V1_NBE
		split = sub( jCh, 1 ); /* Q0 */
		#endif /* OPT_MCH_DEC_V1_NBE */
		FOR( kCh = jCh; kCh <= iCh; kCh++ )
		{
		g = Mpy_32_32( s, secDiag_fx[kCh] ); /* exp(s_e + secDiag_new_e) */
		g_e = add( s_e, secDiag_new_e[kCh] );
		secDiag_fx[kCh] = Mpy_32_32( c, secDiag_fx[kCh] ); /* exp(c_e + secDiag_new_e) */
		secDiag_new_e[kCh] = add( c_e, secDiag_new_e[kCh] );
		#ifdef OPT_MCH_DEC_V1_NBE
		Word16 com_e = s_max( g_e, eps_x_e );
		IF( LE_32( L_shr( L_abs( g ), sub( com_e, g_e ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) )
		#else /* OPT_MCH_DEC_V1_NBE */
		IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( g ), g_e, Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is singularValues[split] vanishing compared to eps_x */
		#endif /* OPT_MCH_DEC_V1_NBE */
		{
		BREAK;
		}
		@@ -857,16 +876,13 @@ static void HouseholderReduction_fx(
		Word16 nCh;
		push_wmops("HouseholderReduction_fx");
		#ifdef MYCHANGES

		Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
		Word32 g_fx = 0;
		Word16 g_e = 0;
		move32();
		move16();

		Word32 sig_x_fx = 0;
		Word16 sig_x_fx_e = 0;
		move32();
		move16();
		#else

		// float g = 0.0f, sig_x = 0.0f;// to be removed
		@@ -879,7 +895,6 @@ static void HouseholderReduction_fx(

		Word16 iCh, jCh;
		Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
		printf("\n");
		#ifdef MYCHANGES
		push_wmops("HouseholderReduction_fx 64");
		FOR( jCh = 0; jCh < nChannelsL; jCh++ )
		@@ -900,52 +915,47 @@ static void HouseholderReduction_fx(
		nChannelsC,
		nCh
		);
		{
		int i,j;
		printf("\ncompare%d, (%d)\x1b[1;32mstart\x1b[0m\n",nCh,singularVectors_Left_e);
		for (i=0;i<nChannelsL;i++)
		{
		printf("compare%d,%d: \x1b[1;32m",nCh,i);
		for (j=0;j<nChannelsC;j++)
		{
		Word16 n;
		n=W_norm(singularVectors_Left_64[i][j]);
		printf("%08X ",W_extract_h(W_shl(singularVectors_Left_64[i][j],n)));
		}
		printf("\x1b[0m\n");
		}
		printf("compare%d \x1b[1;32mend\x1b[0m\n",nCh);
		}

		// bitwindow=nCh+1;
		singularValues_fx_e[nCh]=add(singularVectors_Left_e,singularValues_fx_e[nCh]);
		secDiag_fx[nCh]=g_fx;
		move32();
		secDiag_fx_e[nCh]=add(singularVectors_Left_e,g_e);
		bitwindow=2;
		biDiagonalReductionRight_64(
		singularVectors_Left_64,bitwindow,
		secDiag_fx,secDiag_fx_e,
		nChannelsL,
		nChannelsC,
		nCh,
		&g_fx,
		&g_e
		);
		{
		Word16 L_temp_e;
		Word32 L_temp;
		L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */
		IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, eps_x_fx, eps_x_fx_e ), 1 ) )
		{
		eps_x_fx = L_temp; / exp(L_temp_e) */
		move32();
		*eps_x_fx_e = L_temp_e;
		move32();
		}
		}
		}
		{
		int i,j;
		printf("\nCOMPARE%d, (%d)\x1b[1;32mstart\x1b[0m\n",nCh,singularVectors_Left_e);
		for (i=0;i<nChannelsL;i++)
		for (j=0;j<nChannelsL;j++)
		{
		printf("COMPARE%d,%d: \x1b[1;32m",nCh,i);
		for (j=0;j<nChannelsC;j++)
		for (i=0;i<nChannelsC;i++)
		{
		Word16 n;
		n=W_norm(singularVectors_Left_64[i][j]);
		printf("%08X<%3d ",W_extract_h(W_shl(singularVectors_Left_64[i][j],n)),n);
		}
		printf("\x1b[0m\n");
		n=W_norm(singularVectors_Left_64[j][i]);
		singularVectors_Left_fx[j][i]=W_extract_h(W_shl(singularVectors_Left_64[j][i],n));
		singularVectors_Left_fx_e[j][i]=sub(add(32,singularVectors_Left_e),n);
		}
		printf("COMPARE%d \x1b[1;32mend\x1b[0m\n",nCh);
		}
		}
		pop_wmops();
		#endif
		#else
		push_wmops("HouseholderReduction_fx 32");
		FOR( jCh = 0; jCh < nChannelsL; jCh++ )
		{
		@@ -960,35 +970,7 @@ static void HouseholderReduction_fx(
		FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
		{
		biDiagonalReductionLeft_fx( singularVectors_Left_fx, singularValues_fx, secDiag_fx, singularVectors_Left_fx_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx );
		{
		int i,j;
		printf("compare%d, start\n",nCh);
		for (i=0;i<nChannelsL;i++)
		{
		printf("compare%d,%d: ",nCh,i);
		for (j=0;j<nChannelsC;j++)
		{
		printf("%08X ",singularVectors_Left_fx[i][j]);
		}
		printf("\n");
		}
		printf("compare%d, end\n",nCh);
		}
		biDiagonalReductionRight_fx( singularVectors_Left_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx );
		{
		int i,j;
		printf("COMPARE%d, start\n",nCh);
		for (i=0;i<nChannelsL;i++)
		{
		printf("COMPARE%d,%d: ",nCh,i);
		for (j=0;j<nChannelsC;j++)
		{
		printf("%08X<%3d ",singularVectors_Left_fx[i][j],singularVectors_Left_fx_e[i][j]);
		}
		printf("\n");
		}
		printf("COMPARE%d, end\n",nCh);
		}

		Word16 L_temp_e;
		Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */
		@@ -1000,57 +982,10 @@ static void HouseholderReduction_fx(
		move32();
		}
		}





		pop_wmops();
		{
		int i,j;
		static int replacecnt=0;
		static int bettercnt=0;
		static int totalcnt=0;

		for (i=0;i<nChannelsL;i++)
		{
		printf("EXPONENT%02d: IN%3d ",i,singularVectors_Left_e);
		for (j=0;j<nChannelsC;j++)
		{
		Word16 n;
		Word32 tmp;
		Word32 minemant;
		unsigned int x,y;
		int mine,theirs;
		n=norm_l(singularVectors_Left_fx[i][j]);
		minemant=singularVectors_Left_fx[i][j]<<n;
		x=(unsigned int)minemant;
		printf("[(%2d)",singularVectors_Left_fx_e[i][j]-n);
		theirs=singularVectors_Left_fx_e[i][j]-n;
		#endif

		n=W_norm(singularVectors_Left_64[i][j]);
		tmp=W_extract_h(W_shl(singularVectors_Left_64[i][j],n));
		y=(unsigned int)tmp;

		n=32+singularVectors_Left_e-n;
		mine=n;
		printf("MINE:%2d]",n);
		if (!((x^y)&0xff000000)) bettercnt++;
		if (mine==theirs \|\| minemant==singularVectors_Left_fx[i][j])
		{
		singularVectors_Left_fx[i][j]=x;
		singularVectors_Left_fx_e[i][j]=mine;
		replacecnt++;
		printf("\x1b[0;32m");
		}
		printf("%08X/%08X ",x,y);
		printf("\x1b[0m");
		totalcnt++;
		}
		printf("\n");
		}
		printf("\nbetter %d replace:%d /%d\n",bettercnt,replacecnt,totalcnt);
		}

		/* SingularVecotr Accumulation */
		singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC );
		@@ -1084,6 +1019,10 @@ static void biDiagonalReductionLeft_64(
		Word32 norm_x, g;
		Word16 norm_x_e, g_e;
		Word64 norm_64;
		g=0;
		g_e=0;
		move32();
		move16();
		norm_x=0;
		move32();
		IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
		@@ -1161,18 +1100,16 @@ static void biDiagonalReductionLeft_64(
		singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr(W_mult0_32_32( f, factor1 ), magic_shift ) );
		}
		}
		}
		singularValues[currChannel] = g;
		singularValues_e[currChannel] = g_e;
		move32();
		move16();
		}
		}

		static void biDiagonalReductionRight_64(
		Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
		Word16 bitwindow,
		Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
		Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
		const Word16 nChannelsL, /* Q0 */
		const Word16 nChannelsC, /* Q0 */
		const Word16 currChannel, /* Q0 */
		@@ -1184,19 +1121,10 @@ static void biDiagonalReductionRight_64(
		Word32 norm_x;
		Word16 norm_x_e;
		Word64 norm_64;
		Word32 abs_x;
		Word16 abs_x_e;
		Word64 abs_64;
		Word16 idx;
		Word16 bitwindow0;



		secDiag[currChannel] = ( *g );
		secDiag_e[currChannel] = ( *g_e );
		move32();
		move16();
		bitwindow0=bitwindow;

		( *g ) =0;
		( *g_e ) = 0;
		@@ -1205,24 +1133,18 @@ static void biDiagonalReductionRight_64(
		IF ( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */
		{
		norm_64=0;
		abs_64=0;
		move64();
		move64();
		idx = add( currChannel, 1);
		FOR ( jCh = idx; jCh < nChannelsC; jCh++ )
		{
		Word32 tmp;
		tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow+1) );
		tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) );
		norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp) );
		abs_64 = W_add( abs_64, W_abs( singularVectors_Left_64[currChannel][jCh]) );
		}
		norm_x_e = W_norm( norm_64);
		norm_x = W_extract_h( W_shl( norm_64, norm_x_e) );
		norm_x_e = add( sub( shl( bitwindow+1, 1), norm_x_e), 1);
		norm_x_e = add( sub( shl( bitwindow, 1), norm_x_e), 1);
		move16();
		abs_x_e = W_norm( abs_64);
		abs_x = W_extract_h( W_shl( abs_64, abs_x_e) );
		abs_x_e = add( sub( add( bitwindow, bitwindow), abs_x_e), 1);

		IF ( norm_x )
		{
		@@ -1237,7 +1159,6 @@ static void biDiagonalReductionRight_64(
		Word32 r;
		Word16 r_e;
		Word32 f;
		Word16 f_e;
		Word32 invVal;
		Word16 invVal_e;

		@@ -1252,17 +1173,15 @@ static void biDiagonalReductionRight_64(
		*g_e = tmp_g_e;
		move32();
		move16();
		factor2=W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], bitwindow+1) );
		tmp_e = sub( tmp_g_e, bitwindow+1);
		factor2=W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], bitwindow) );
		tmp_e = sub( tmp_g_e, bitwindow);
		tmpmul = W_mult0_32_32( tmp_g, factor2);
		tmpmul = W_shl(tmpmul, tmp_e);
		r_64 = W_sub( tmpmul, norm_64 );
		r_e = W_norm( r_64);
		r = W_extract_h( W_shl( r_64, r_e) );
		r_e = sub( add( shl( bitwindow, 1), 1), r_e );
		// r_e=2*bitwindow+1-r_e;

		invVal_e = r_e;
		invVal_e = 0;
		move16();
		invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e);

		@@ -1285,8 +1204,7 @@ static void biDiagonalReductionRight_64(
		norm_x_e = W_norm( norm_64);
		norm_x = W_extract_h( W_shl( norm_64, norm_x_e) );
		f = Mpy_32_32( norm_x, invVal);
		f_e = add( invVal_e, sub( norm_x_e, r_e) );
		magic_shift = 22-2norm_x_e+4r_e+3*f_e; // FIXME: HOW IS THIS WORKING?????!?!?!?!?!?!?!?!?!?
		magic_shift = 25+norm_x_e- r_e ; // FIXME: Why does this work?

		FOR( jCh = idx; jCh < nChannelsC; jCh++ )
		{
		@@ -1294,25 +1212,10 @@ static void biDiagonalReductionRight_64(
		singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2), magic_shift) );
		}
		}
		// FIXME BEGIN: The following code has not yet been tested
		invVal_e = 0;
		move16();
		invVal = BASOP_Util_Divide3232_Scale_newton( abs_x, maxWithSign_fx( r ), &invVal_e);
		invVal_e = add( invVal_e, sub( abs_x_e, r_e) );
		bitwindow = bitwindow0;
		move16();
		FOR ( jCh = idx; jCh < nChannelsL ; jCh++ )
		{
		secDiag[jCh] = Mpy_32_32( W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) ), invVal );
		move32();
		secDiag_e[jCh] = add(invVal_e, bitwindow);
		move16();
		}
		// FIXME END
		}
		}
		}
		#endif
		#else
		static void biDiagonalReductionLeft_fx(
		Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
		Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
		@@ -1360,9 +1263,15 @@ static void biDiagonalReductionLeft_fx(
		Word16 invVal_e;
		Word32 invVal;
		invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
		#ifdef OPT_MCH_DEC_V1_NBE
		Word64 temp = 0;
		move64();
		Word16 max_e = MIN_16;
		#else /* OPT_MCH_DEC_V1_NBE */
		norm_x = 0;
		move32();
		norm_x_e = 0;
		#endif /* OPT_MCH_DEC_V1_NBE */
		move16();

		FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
		@@ -1372,8 +1281,25 @@ static void biDiagonalReductionLeft_fx(
		move32();
		singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e );
		move16();
		#ifdef OPT_MCH_DEC_V1_NBE
		max_e = s_max( max_e, singularVectors2_e[jCh][currChannel] );
		#else /* OPT_MCH_DEC_V1_NBE */
		norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */
		#endif /* OPT_MCH_DEC_V1_NBE */
		}

		#ifdef OPT_MCH_DEC_V1_NBE
		FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
		{
		temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sub( max_e, singularVectors2_e[jCh][currChannel] ), 1 ) ) );
		}

		Word16 nrm = W_norm( temp );
		nrm = sub( nrm, 32 );
		norm_x = W_shl_sat_l( temp, nrm );
		norm_x_e = sub( add( max_e, max_e ), nrm );
		#endif /* OPT_MCH_DEC_V1_NBE */

		IF( GT_16( norm_x_e, 0 ) )
		{
		norm_x = MAX_32;
		@@ -1401,6 +1327,30 @@ static void biDiagonalReductionLeft_fx(

		FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
		{
		#ifdef OPT_MCH_DEC_V1_NBE
		Word16 max2_e = MIN_16;
		max_e = MIN_16;
		move16();
		move16();
		temp = 0;
		move64();

		FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
		{
		max_e = s_max( max_e, singularVectors2_e[jCh][currChannel] ); /* exp(norm_x_e) */
		max2_e = s_max( max2_e, singularVectors2_e[jCh][iCh] ); /* exp(norm_x_e) */
		}
		max_e = add( max_e, max2_e );

		FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
		{
		temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), sub( max_e, add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ) ) ) );
		}
		nrm = W_norm( temp );
		nrm = sub( nrm, 32 );
		norm_x = W_shl_sat_l( temp, nrm );
		norm_x_e = sub( max_e, nrm );
		#else /* OPT_MCH_DEC_V1_NBE */
		norm_x = 0;
		move32();
		norm_x_e = 0;
		@@ -1409,6 +1359,7 @@ static void biDiagonalReductionLeft_fx(
		{
		norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ), &norm_x_e ); /* exp(norm_x_e) */
		}
		#endif /* OPT_MCH_DEC_V1_NBE */

		f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
		f_e = add( invVal_e, sub( norm_x_e, r_e ) );
		@@ -1565,6 +1516,7 @@ static void biDiagonalReductionRight_fx(

		return;
		}
		#endif

		/*-------------------------------------------------------------------------
		* singularVectorsAccumulationLeft()
		@@ -1659,8 +1611,16 @@ static void singularVectorsAccumulationLeft_fx(
		move32();
		}
		}
		#ifdef OPT_MCH_DEC_V1_NBE
		Word16 exp = s_max( singularVectors_Left_e[nCh][nCh], 1 );
		singularVectors_Left[nCh][nCh] = L_sub( L_shr( singularVectors_Left[nCh][nCh], sub( exp, singularVectors_Left_e[nCh][nCh] ) ), L_shr( MINUS_ONE_IN_Q31, exp ) ); /* exp(sing_exp2) */
		move32();
		singularVectors_Left_e[nCh][nCh] = exp;
		move16();
		#else /* OPT_MCH_DEC_V1_NBE */
		singularVectors_Left[nCh][nCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[nCh][nCh], singularVectors_Left_e[nCh][nCh], ONE_IN_Q30, 1, &singularVectors_Left_e[nCh][nCh] ); /* exp(sing_exp2) */
		move32();
		#endif /* OPT_MCH_DEC_V1_NBE */
		}
		// fclose(fp);
		FOR( nCh = 0; nCh < nChannelsL; nCh++ )
		@@ -1723,21 +1683,56 @@ static void singularVectorsAccumulationRight_fx(

		FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
		{
		#ifdef OPT_MCH_DEC_V1_NBE
		Word64 norm_val = 0;
		move64();
		Word16 maxL_e = MIN_16;
		Word16 maxR_e = MIN_16;
		Word16 maxR2_e = MIN_16;
		move16();
		move16();
		move16();
		FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
		{
		maxL_e = s_max( maxL_e, singularVectors_Left_e[nCh][k] );
		maxR_e = s_max( maxR_e, sing_right_exp[k][iCh] );
		maxR2_e = s_max( maxR2_e, sing_right_exp[k][nCh] );
		}
		#else /* OPT_MCH_DEC_V1_NBE */
		norm_y = 0;
		move32();
		norm_y_e = 0;
		move16();
		#endif /* OPT_MCH_DEC_V1_NBE */

		FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
		{
		#ifdef OPT_MCH_DEC_V1_NBE
		norm_val = W_mac_32_32( norm_val, L_shr( singularVectors_Left[nCh][k], sub( maxL_e, singularVectors_Left_e[nCh][k] ) ), L_shr( singularVectors_Right[k][iCh], sub( maxR_e, sing_right_exp[k][iCh] ) ) );
		#else /* OPT_MCH_DEC_V1_NBE */
		norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[nCh][k], singularVectors_Right[k][iCh] ), add( singularVectors_Left_e[nCh][k], sing_right_exp[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
		#endif /* OPT_MCH_DEC_V1_NBE */
		}
		#ifdef OPT_MCH_DEC_V1_NBE
		norm_y_e = W_norm( norm_val );
		norm_y = W_extract_h( W_shl( norm_val, norm_y_e ) );
		norm_y_e = sub( add( maxL_e, maxR_e ), norm_y_e );

		Word16 max_new = s_max( maxR_e, add( maxR2_e, norm_y_e ) );
		#endif /* OPT_MCH_DEC_V1_NBE */
		FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
		{
		#ifdef OPT_MCH_DEC_V1_NBE
		Word32 temp = Mpy_32_32( norm_y, singularVectors_Right[k][nCh] );
		Word32 op2 = L_shr( temp, sub( max_new, add( norm_y_e, sing_right_exp[k][nCh] ) ) );
		singularVectors_Right[k][iCh] = L_add_sat( L_shr( singularVectors_Right[k][iCh], sub( max_new, sing_right_exp[k][iCh] ) ), op2 ); /* exp(sing_right_exp) */
		move32();
		singularVectors_Right[k][iCh] = L_shl_sat( singularVectors_Right[k][iCh], max_new ); /* Q31 */
		#else /* OPT_MCH_DEC_V1_NBE */
		singularVectors_Right[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Right[k][iCh], sing_right_exp[k][iCh], Mpy_32_32( norm_y, singularVectors_Right[k][nCh] ), add( norm_y_e, sing_right_exp[k][nCh] ), &sing_right_exp[k][iCh] ); /* exp(sing_right_exp) */
		move32();
		singularVectors_Right[k][iCh] = L_shl_sat( singularVectors_Right[k][iCh], sing_right_exp[k][iCh] ); /* Q31 */
		#endif /* OPT_MCH_DEC_V1_NBE */
		move32();
		sing_right_exp[k][iCh] = 0;
		move16();