Commit 8e5931aa authored by thomas dettbarn's avatar thomas dettbarn
Browse files

biDiagonalReductionLeft_64() and biDiagonalReductionRight_64() are ready for a...

biDiagonalReductionLeft_64() and biDiagonalReductionRight_64() are ready for a non-draft merge request.
parent 31bfe505
Loading
Loading
Loading
Loading
Loading
+19 −11
Original line number Diff line number Diff line
@@ -1004,9 +1004,9 @@ static void biDiagonalReductionLeft_64(
{
/* TODO: For some reason, this is optimal. But why? why not ( 32 - 2 * MAGIC_HEADROOM_1 - norm_x_e0 + 1 ) , for example? */
#define MAGIC_HEADROOM_1 2
#define MAGIC_HEADROOM_2 ( 16 - norm_x_e0 / 4 )
#define MAGIC_HEADROOM_3 ( 16 - norm_x_e0 / 4 )
#define MAGIC_HEADROOM_4 ( 16 - norm_x_e0 / 4 )
#define MAGIC_HEADROOM_2 ( sub( 16, shr( norm_x_e0, 2 ) ) )
#define MAGIC_HEADROOM_3 ( sub( 16, shr( norm_x_e0, 2 ) ) )
#define MAGIC_HEADROOM_4 ( sub( 16, shr( norm_x_e0, 2 ) ) )

    Word16 iCh, jCh;
    Word32 norm_x;
@@ -1077,17 +1077,21 @@ static void biDiagonalReductionLeft_64(
            norm_64 = 0;
            for ( jCh = currChannel; jCh < nChannelsL; jCh++ )
            {
                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - MAGIC_HEADROOM_3 ) ); // q(factor1) = q(sing)-H3
                factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], 32 - MAGIC_HEADROOM_3 ) );         // q(factor2) = q(sing)-H3
                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], sub( 32 , MAGIC_HEADROOM_3 ) ) ); // q(factor1) = q(sing)-H3
                factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], sub( 32, MAGIC_HEADROOM_3 ) ) );         // q(factor2) = q(sing)-H3
                norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );                                      // q(norm)=2*q(sing)-2*H3
            }
            norm_x_e = W_norm( norm_64 );
            norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm
            f = Mpy_32_32( norm_x, invVal );                    // q(f)=q(norm_x)-q(invVal)
            magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e;
//            magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e;
            magic_shift = sub( norm_x_e, shl( MAGIC_HEADROOM_3, 1) );
            magic_shift = sub( magic_shift, sub( r_e, ( shl( MAGIC_HEADROOM_1, 1 ) ) ) );
            magic_shift = add( magic_shift, sub( 32, MAGIC_HEADROOM_4 ) );
            magic_shift = sub( magic_shift, shl( invVal_e, 1 ) );
            FOR( jCh = currChannel; jCh < nChannelsL; jCh++ )
            {
                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - MAGIC_HEADROOM_4 ) );
                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], sub( 32, MAGIC_HEADROOM_4 ) ) );
                singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr( W_mult0_32_32( f, factor1 ), magic_shift ) );
            }
        }
@@ -1180,19 +1184,23 @@ static void biDiagonalReductionRight_64(
                move64();
                FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                {
                    factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], 32 - MAGIC_HEADROOM_3 ) );         // q(factor1) = q(sing)-H3
                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - MAGIC_HEADROOM_3 ) ); // q(factor2) = q(sing)-H3
                    factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], sub( 32, MAGIC_HEADROOM_3 ) ) );         // q(factor1) = q(sing)-H3
                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor2) = q(sing)-H3
                    norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );                                      // q(norm)=2*q(sing)-2*H3
                }

                norm_x_e = W_norm( norm_64 );
                norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm
                f = Mpy_32_32( norm_x, invVal );                    // q(f)=q(norm_x)-q(invVal)
                magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e;
                // magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e;
                magic_shift = sub( norm_x_e, shl( MAGIC_HEADROOM_3, 1) );
                magic_shift = sub( magic_shift, sub( r_e, ( shl( MAGIC_HEADROOM_1, 1 ) ) ) );
                magic_shift = add( magic_shift, sub( 32, MAGIC_HEADROOM_4 ) );
                magic_shift = sub( magic_shift, shl( invVal_e, 1 ) );

                FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                {
                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - MAGIC_HEADROOM_4 ) );
                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], sub( 32, MAGIC_HEADROOM_4 ) ) );
                    singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2 ), magic_shift ) );
                }
            }