Commit 14ec31bf authored by thomas dettbarn's avatar thomas dettbarn
Browse files

closer to the svd-optimizations-float.

parent 98b25ef6
Loading
Loading
Loading
Loading
+24 −27
Original line number Diff line number Diff line
@@ -917,7 +917,7 @@ static void biDiagonalReductionLeft_fx(
    const Word16 currChannel  /* Q0 */
)
{
    Word16 iCh, jCh, idx;
    Word16 iCh, jCh;
    Word32 norm_x, f, r, g;
    Word16 norm_x_e, f_e, r_e, g_e;
    Word32 L_temp;
@@ -931,14 +931,13 @@ static void biDiagonalReductionLeft_fx(

    IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
    {
        idx = currChannel;
        move16();

        norm_x = 0;
        move32();
        norm_x_e = 0;
        move16();
        FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
        FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
        {
            norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */
        }
@@ -951,8 +950,8 @@ static void biDiagonalReductionLeft_fx(
            move16();
            L_temp = Sqrt32( norm_x, &L_temp_e );
//            L_temp = L_shl_r( L_temp, L_temp_e ); // Q31
                                                  //( *g ) = L_negate( GE_32( singularVectors[currChannel][idx], 0 ) ? L_temp : L_negate( L_temp ) );
            if ( singularVectors[currChannel][idx] >= 0 )
                                                  //( *g ) = L_negate( GE_32( singularVectors[currChannel][currChannel], 0 ) ? L_temp : L_negate( L_temp ) );
            if ( singularVectors[currChannel][currChannel] >= 0 )
            {
                L_temp = L_negate( L_temp );
            }
@@ -961,8 +960,8 @@ static void biDiagonalReductionLeft_fx(
            g_e = L_temp_e;
            move16();

            r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( g ), singularVectors[currChannel][idx] ), add( singularVectors2_e[currChannel][idx], g_e), -norm_x, norm_x_e, &r_e );                                      /* exp(r_e) */
            singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( g ), g_e, &singularVectors2_e[currChannel][idx] ); /* sing_exp */
            r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( g ), singularVectors[currChannel][currChannel] ), add( singularVectors2_e[currChannel][currChannel], g_e), -norm_x, norm_x_e, &r_e );                                      /* exp(r_e) */
            singularVectors[currChannel][currChannel] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][currChannel], singularVectors2_e[currChannel][currChannel], -g, g_e, &singularVectors2_e[currChannel][currChannel] ); /* sing_exp */
            move32();

            invVal_e = r_e;
@@ -975,7 +974,7 @@ static void biDiagonalReductionLeft_fx(
                move32();
                norm_x_e = 0;
                move16();
                FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
                FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
                {
                    norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ), &norm_x_e ); /* exp(norm_x_e) */
                }
@@ -983,13 +982,12 @@ static void biDiagonalReductionLeft_fx(
                f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
                f_e = add( invVal_e, sub( norm_x_e, r_e ) );

                FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
                FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
                {
                    singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors2_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors2_e[jCh][currChannel] ), &singularVectors2_e[jCh][iCh] );
                    move32();
                }
            }

        }

        // rescaling block
@@ -1199,7 +1197,7 @@ static void biDiagonalReductionRight_fx(
            }
            ELSE
            {
                ( *g ) = L_negate( L_negate( L_temp ) ); /* exp(L_temp_e) */
                ( *g ) = L_temp; /* exp(L_temp_e) */
                move32();
            }
            *g_e = L_temp_e;
@@ -1211,18 +1209,6 @@ static void biDiagonalReductionRight_fx(
            move32();


            invVal_e = 0;
            move16();
            invVal = BASOP_Util_Divide3232_Scale_newton( abs_x, maxWithSign_fx( r ), &invVal_e );
            invVal_e = add(invVal_e, sub( abs_x_e,r_e ) );
            FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
            {
                secDiag[jCh] = Mpy_32_32( singularVectors[currChannel][jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
                move32();
                secDiag_exp[jCh] = add( invVal_e, singularVectors2_e[currChannel][jCh] );
                move16();
			
            }
            FOR( iCh = currChannel + 1; iCh < nChannelsL; iCh++ ) /*  nChannelsL */
            {
                norm_x = 0;
@@ -1233,17 +1219,28 @@ static void biDiagonalReductionRight_fx(
                {
                    norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( singularVectors2_e[iCh][jCh], singularVectors2_e[currChannel][jCh] ), &norm_x_e ); /* exp(norm_x_e) */
                }
                norm_x = BASOP_Util_Divide3232_Scale_newton( norm_x, abs_x, &invVal_e );
                norm_x_e = add( invVal_e, sub(norm_x_e, abs_x_e ) );
                norm_x = BASOP_Util_Divide3232_Scale_newton( norm_x, r, &invVal_e );
                norm_x_e = add( invVal_e, sub(norm_x_e, r_e ) );
                FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*  nChannelsC */
                {
                    singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */
                    singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, singularVectors[currChannel][jCh] ), add( norm_x_e, singularVectors2_e[currChannel][jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */
                    move32();
                }
            }
            invVal_e = 0;
            move16();
            invVal = BASOP_Util_Divide3232_Scale_newton( abs_x, maxWithSign_fx( r ), &invVal_e );
            invVal_e = add(invVal_e, sub( abs_x_e,r_e ) );
            FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
            {
                secDiag[jCh] = Mpy_32_32( singularVectors[currChannel][jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
                move32();
                secDiag_exp[jCh] = add( invVal_e, singularVectors2_e[currChannel][jCh] );
                move16();
			
            }
        }
    }
    return;
}
#else