Commit 0cc97443 authored by Nicolas Roussin's avatar Nicolas Roussin
Browse files

Optimise matrix function.

parent 5f95f44e
Loading
Loading
Loading
Loading
Loading
+4 −3
Original line number Diff line number Diff line
@@ -109,6 +109,7 @@
#define OPT_2239_IVAS_FILTER_PROCESS               /* Dolby: Issue 2239, optimize ivas_filter_process_fx. */
#define NONBE_OPT_2193_EIG2X2                      /* Dolby: Issue 2193, optimize eig2x2_fx. */
#define BE_FIX_2240_COMPUTE_COV_MTC_FX_FAST        /* FhG: Speeds up covariance calculation e.g. 60 WMOPS for encoding -mc 7_1_4 24400 48 */
#define OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS /* Dolby: Issue 2269, optimize IVAS DIRAC DEC binaural functions. */

/* #################### End BASOP optimization switches ############################ */

+308 −8
Original line number Diff line number Diff line
@@ -4185,9 +4185,39 @@ static void matrixDiagMul_fx(
    Word32 imOut_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_Out*/
    Word16 *q_Out )
{
    Word16 chA, chB;
    Word32 not_zero = 0;

#ifdef OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS
    reOut_fx[0][0] = Mpy_32_32( reIn_fx[0][0], D_fx[0] );
    imOut_fx[0][0] = Mpy_32_32( imIn_fx[0][0], D_fx[0] );
    move32();
    move32();
    not_zero = L_or( not_zero, reOut_fx[0][0] );
    not_zero = L_or( not_zero, imOut_fx[0][0] );

    reOut_fx[0][1] = Mpy_32_32( reIn_fx[0][1], D_fx[1] );
    imOut_fx[0][1] = Mpy_32_32( imIn_fx[0][1], D_fx[1] );
    move32();
    move32();
    not_zero = L_or( not_zero, reOut_fx[0][1] );
    not_zero = L_or( not_zero, imOut_fx[0][1] );

    reOut_fx[1][0] = Mpy_32_32( reIn_fx[1][0], D_fx[0] );
    imOut_fx[1][0] = Mpy_32_32( imIn_fx[1][0], D_fx[0] );
    move32();
    move32();
    not_zero = L_or( not_zero, reOut_fx[1][0] );
    not_zero = L_or( not_zero, imOut_fx[1][0] );

    reOut_fx[1][1] = Mpy_32_32( reIn_fx[1][1], D_fx[1] );
    imOut_fx[1][1] = Mpy_32_32( imIn_fx[1][1], D_fx[1] );
    move32();
    move32();
    not_zero = L_or( not_zero, reOut_fx[1][1] );
    not_zero = L_or( not_zero, imOut_fx[1][1] );
#else
    Word16 chA, chB;

    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
    {
        FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
@@ -4200,7 +4230,7 @@ static void matrixDiagMul_fx(
            not_zero = L_or( not_zero, imOut_fx[chA][chB] );
        }
    }

#endif
    *q_Out = sub( add( q_In, q_D ), 31 );
    move16();
    if ( !not_zero )
@@ -4217,6 +4247,78 @@ static void matrixScale_fx(
    Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/
    Word16 *q_A )
{
#ifdef OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS
    Word16 shift_tmp;
    Word16 shift = 31;
    move16();

    shift_tmp = norm_l( Are_fx[0][0] );
    if ( Are_fx[0][0] != 0 )
    {
        shift = s_min( shift, shift_tmp );
    }
    shift_tmp = norm_l( Are_fx[0][1] );
    if ( Are_fx[0][1] != 0 )
    {
        shift = s_min( shift, shift_tmp );
    }
    shift_tmp = norm_l( Are_fx[1][0] );
    if ( Are_fx[1][0] != 0 )
    {
        shift = s_min( shift, shift_tmp );
    }
    shift_tmp = norm_l( Are_fx[1][1] );
    if ( Are_fx[1][1] != 0 )
    {
        shift = s_min( shift, shift_tmp );
    }

    shift_tmp = norm_l( Aim_fx[0][0] );
    if ( Aim_fx[0][0] != 0 )
    {
        shift = s_min( shift, shift_tmp );
    }
    shift_tmp = norm_l( Aim_fx[0][1] );
    if ( Aim_fx[0][1] != 0 )
    {
        shift = s_min( shift, shift_tmp );
    }
    shift_tmp = norm_l( Aim_fx[1][0] );
    if ( Aim_fx[1][0] != 0 )
    {
        shift = s_min( shift, shift_tmp );
    }
    shift_tmp = norm_l( Aim_fx[1][1] );
    if ( Aim_fx[1][1] != 0 )
    {
        shift = s_min( shift, shift_tmp );
    }

    shift = sub( shift, 1 );

    IF( shift != 0 )
    {
        Are_fx[0][0] = L_shl( Are_fx[0][0], shift );
        Are_fx[0][1] = L_shl( Are_fx[0][1], shift );
        Are_fx[1][0] = L_shl( Are_fx[1][0], shift );
        Are_fx[1][1] = L_shl( Are_fx[1][1], shift );
        Aim_fx[0][0] = L_shl( Aim_fx[0][0], shift );
        Aim_fx[0][1] = L_shl( Aim_fx[0][1], shift );
        Aim_fx[1][0] = L_shl( Aim_fx[1][0], shift );
        Aim_fx[1][1] = L_shl( Aim_fx[1][1], shift );
        move32();
        move32();
        move32();
        move32();
        move32();
        move32();
        move32();
        move32();
    }

    *q_A = add( *q_A, shift );
    move16();
#else
    Word16 shift;
    Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS;
    shift = sub( s_min( L_norm_arr( Are_fx[0], size ), L_norm_arr( Aim_fx[0], size ) ), 1 );
@@ -4224,6 +4326,7 @@ static void matrixScale_fx(
    scale_sig32( Aim_fx[0], size, shift );
    *q_A = add( *q_A, shift );
    move16();
#endif
}

static void matrixMul_fx(
@@ -4237,8 +4340,61 @@ static void matrixMul_fx(
    Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/
    Word16 *q_out )
{
    Word16 chA, chB;
    Word32 not_zero = 0;
#if defined( OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS ) && !defined( IVAS_ENH64_CADENCE_CHANGES )
    outRe_fx[0][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Bre_fx[0][0] ),
                                                         Are_fx[0][1], Bre_fx[1][0] ),
                                             Aim_fx[0][0], Bim_fx[0][0] ),
                                 Aim_fx[0][1], Bim_fx[1][0] );
    move32();
    outIm_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[0][0], Bre_fx[0][0] ),
                                                         Aim_fx[0][1], Bre_fx[1][0] ),
                                             Are_fx[0][0], Bim_fx[0][0] ),
                                 Are_fx[0][1], Bim_fx[1][0] );
    move32();
    not_zero = L_or( not_zero, outRe_fx[0][0] );
    not_zero = L_or( not_zero, outIm_fx[0][0] );

    outRe_fx[0][1] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Bre_fx[0][1] ),
                                                         Are_fx[0][1], Bre_fx[1][1] ),
                                             Aim_fx[0][0], Bim_fx[0][1] ),
                                 Aim_fx[0][1], Bim_fx[1][1] );
    move32();
    outIm_fx[0][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[0][0], Bre_fx[0][1] ),
                                                         Aim_fx[0][1], Bre_fx[1][1] ),
                                             Are_fx[0][0], Bim_fx[0][1] ),
                                 Are_fx[0][1], Bim_fx[1][1] );
    move32();
    not_zero = L_or( not_zero, outRe_fx[0][1] );
    not_zero = L_or( not_zero, outIm_fx[0][1] );

    outRe_fx[1][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[1][0], Bre_fx[0][0] ),
                                                         Are_fx[1][1], Bre_fx[1][0] ),
                                             Aim_fx[1][0], Bim_fx[0][0] ),
                                 Aim_fx[1][1], Bim_fx[1][0] );
    move32();
    outIm_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[1][0], Bre_fx[0][0] ),
                                                         Aim_fx[1][1], Bre_fx[1][0] ),
                                             Are_fx[1][0], Bim_fx[0][0] ),
                                 Are_fx[1][1], Bim_fx[1][0] );
    move32();
    not_zero = L_or( not_zero, outRe_fx[1][0] );
    not_zero = L_or( not_zero, outIm_fx[1][0] );

    outRe_fx[1][1] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[1][0], Bre_fx[0][1] ),
                                                         Are_fx[1][1], Bre_fx[1][1] ),
                                             Aim_fx[1][0], Bim_fx[0][1] ),
                                 Aim_fx[1][1], Bim_fx[1][1] );
    move32();
    outIm_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[1][0], Bre_fx[0][1] ),
                                                         Aim_fx[1][1], Bre_fx[1][1] ),
                                             Are_fx[1][0], Bim_fx[0][1] ),
                                 Are_fx[1][1], Bim_fx[1][1] );
    move32();
    not_zero = L_or( not_zero, outRe_fx[1][1] );
    not_zero = L_or( not_zero, outIm_fx[1][1] );
#else
    Word16 chA, chB;

    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
    {
@@ -4269,6 +4425,7 @@ static void matrixMul_fx(
            not_zero = L_or( not_zero, outIm_fx[chA][chB] );
        }
    }
#endif

    *q_out = sub( add( *q_A, *q_B ), 31 );
    move16();
@@ -4292,8 +4449,6 @@ static void matrixTransp1Mul_fx(
    Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/
    Word16 *q_out )
{
    Word16 chA, chB;

    Word64 tmp_outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
    Word64 tmp_outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
    Word64 tmp64;
@@ -4302,6 +4457,96 @@ static void matrixTransp1Mul_fx(
    common_lsh = sub( 63, q );
    move16();

    Word32 not_zero = 0;
    move32();

#ifdef OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS
    tmp64 = W_mult_32_32( Are_fx[0][0], Bre_fx[0][0] );
    tmp64 = W_mac_32_32( tmp64, Are_fx[1][0], Bre_fx[1][0] );
    tmp64 = W_mac_32_32( tmp64, Aim_fx[0][0], Bim_fx[0][0] );
    tmp_outRe_fx[0][0] = W_mac_32_32( tmp64, Aim_fx[1][0], Bim_fx[1][0] );
    move64();
    common_lsh = s_min( common_lsh, W_norm( tmp_outRe_fx[0][0] ) );

    tmp64 = W_mult_32_32( Aim_fx[0][0], Bre_fx[0][0] );
    tmp64 = W_mac_32_32( tmp64, Aim_fx[1][0], Bre_fx[1][0] );
    tmp64 = W_mac_32_32( W_neg( tmp64 ), Are_fx[0][0], Bim_fx[0][0] );
    tmp_outIm_fx[0][0] = W_mac_32_32( tmp64, Are_fx[1][0], Bim_fx[1][0] );
    move64();
    common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[0][0] ) );

    tmp64 = W_mult_32_32( Are_fx[0][0], Bre_fx[0][1] );
    tmp64 = W_mac_32_32( tmp64, Are_fx[1][0], Bre_fx[1][1] );
    tmp64 = W_mac_32_32( tmp64, Aim_fx[0][0], Bim_fx[0][1] );
    tmp_outRe_fx[0][1] = W_mac_32_32( tmp64, Aim_fx[1][0], Bim_fx[1][1] );
    move64();
    common_lsh = s_min( common_lsh, W_norm( tmp_outRe_fx[0][1] ) );

    tmp64 = W_mult_32_32( Aim_fx[0][0], Bre_fx[0][1] );
    tmp64 = W_mac_32_32( tmp64, Aim_fx[1][0], Bre_fx[1][1] );
    tmp64 = W_mac_32_32( W_neg( tmp64 ), Are_fx[0][0], Bim_fx[0][1] );
    tmp_outIm_fx[0][1] = W_mac_32_32( tmp64, Are_fx[1][0], Bim_fx[1][1] );
    move64();
    common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[0][1] ) );

    tmp64 = W_mult_32_32( Are_fx[0][1], Bre_fx[0][0] );
    tmp64 = W_mac_32_32( tmp64, Are_fx[1][1], Bre_fx[1][0] );
    tmp64 = W_mac_32_32( tmp64, Aim_fx[0][1], Bim_fx[0][0] );
    tmp_outRe_fx[1][0] = W_mac_32_32( tmp64, Aim_fx[1][1], Bim_fx[1][0] );
    move64();
    common_lsh = s_min( common_lsh, W_norm( tmp_outRe_fx[1][0] ) );

    tmp64 = W_mult_32_32( Aim_fx[0][1], Bre_fx[0][0] );
    tmp64 = W_mac_32_32( tmp64, Aim_fx[1][1], Bre_fx[1][0] );
    tmp64 = W_mac_32_32( W_neg( tmp64 ), Are_fx[0][1], Bim_fx[0][0] );
    tmp_outIm_fx[1][0] = W_mac_32_32( tmp64, Are_fx[1][1], Bim_fx[1][0] );
    move64();
    common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[1][0] ) );

    tmp64 = W_mult_32_32( Are_fx[0][1], Bre_fx[0][1] );
    tmp64 = W_mac_32_32( tmp64, Are_fx[1][1], Bre_fx[1][1] );
    tmp64 = W_mac_32_32( tmp64, Aim_fx[0][1], Bim_fx[0][1] );
    tmp_outRe_fx[1][1] = W_mac_32_32( tmp64, Aim_fx[1][1], Bim_fx[1][1] );
    move64();
    common_lsh = s_min( common_lsh, W_norm( tmp_outRe_fx[1][1] ) );

    tmp64 = W_mult_32_32( Aim_fx[0][1], Bre_fx[0][1] );
    tmp64 = W_mac_32_32( tmp64, Aim_fx[1][1], Bre_fx[1][1] );
    tmp64 = W_mac_32_32( W_neg( tmp64 ), Are_fx[0][1], Bim_fx[0][1] );
    tmp_outIm_fx[1][1] = W_mac_32_32( tmp64, Are_fx[1][1], Bim_fx[1][1] );
    move64();
    common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[1][1] ) );

    outRe_fx[0][0] = W_extract_h( W_shl( tmp_outRe_fx[0][0], common_lsh ) );
    move32();
    outIm_fx[0][0] = W_extract_h( W_shl( tmp_outIm_fx[0][0], common_lsh ) );
    move32();
    not_zero = L_or( not_zero, outRe_fx[0][0] );
    not_zero = L_or( not_zero, outIm_fx[0][0] );

    outRe_fx[0][1] = W_extract_h( W_shl( tmp_outRe_fx[0][1], common_lsh ) );
    move32();
    outIm_fx[0][1] = W_extract_h( W_shl( tmp_outIm_fx[0][1], common_lsh ) );
    move32();
    not_zero = L_or( not_zero, outRe_fx[0][1] );
    not_zero = L_or( not_zero, outIm_fx[0][1] );

    outRe_fx[1][0] = W_extract_h( W_shl( tmp_outRe_fx[1][0], common_lsh ) );
    move32();
    outIm_fx[1][0] = W_extract_h( W_shl( tmp_outIm_fx[1][0], common_lsh ) );
    move32();
    not_zero = L_or( not_zero, outRe_fx[1][0] );
    not_zero = L_or( not_zero, outIm_fx[1][0] );

    outRe_fx[1][1] = W_extract_h( W_shl( tmp_outRe_fx[1][1], common_lsh ) );
    move32();
    outIm_fx[1][1] = W_extract_h( W_shl( tmp_outIm_fx[1][1], common_lsh ) );
    move32();
    not_zero = L_or( not_zero, outRe_fx[1][1] );
    not_zero = L_or( not_zero, outIm_fx[1][1] );
#else
    Word16 chA, chB;

    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
    {
        FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
@@ -4321,7 +4566,7 @@ static void matrixTransp1Mul_fx(
            common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[chA][chB] ) );
        }
    }
    Word32 not_zero = 0;

    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
    {
        FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
@@ -4334,6 +4579,8 @@ static void matrixTransp1Mul_fx(
            not_zero = L_or( not_zero, outIm_fx[chA][chB] );
        }
    }
#endif

    *q_out = sub( add( q, common_lsh ), 32 );
    move16();
    if ( !not_zero )
@@ -4356,9 +4603,61 @@ static void matrixTransp2Mul_fx(
    Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/
    Word16 *q_out )
{
    Word16 chA, chB;
    // Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS;
    Word32 not_zero = 0;
#if defined( OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS ) && !defined( IVAS_ENH64_CADENCE_CHANGES )
    outRe_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Bre_fx[0][0] ),
                                                         Are_fx[0][1], Bre_fx[0][1] ),
                                             Aim_fx[0][0], Bim_fx[0][0] ),
                                 Aim_fx[0][1], Bim_fx[0][1] );
    move32();
    outIm_fx[0][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[0][0], Bre_fx[0][0] ),
                                                         Aim_fx[0][1], Bre_fx[0][1] ),
                                             Are_fx[0][0], Bim_fx[0][0] ),
                                 Are_fx[0][1], Bim_fx[0][1] );
    move32();
    not_zero = L_or( not_zero, outRe_fx[0][0] );
    not_zero = L_or( not_zero, outIm_fx[0][0] );

    outRe_fx[0][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Bre_fx[1][0] ),
                                                         Are_fx[0][1], Bre_fx[1][1] ),
                                             Aim_fx[0][0], Bim_fx[1][0] ),
                                 Aim_fx[0][1], Bim_fx[1][1] );
    move32();
    outIm_fx[0][1] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[0][0], Bre_fx[1][0] ),
                                                         Aim_fx[0][1], Bre_fx[1][1] ),
                                             Are_fx[0][0], Bim_fx[1][0] ),
                                 Are_fx[0][1], Bim_fx[1][1] );
    move32();
    not_zero = L_or( not_zero, outRe_fx[0][1] );
    not_zero = L_or( not_zero, outIm_fx[0][1] );

    outRe_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[1][0], Bre_fx[0][0] ),
                                                         Are_fx[1][1], Bre_fx[0][1] ),
                                             Aim_fx[1][0], Bim_fx[0][0] ),
                                 Aim_fx[1][1], Bim_fx[0][1] );
    move32();
    outIm_fx[1][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[1][0], Bre_fx[0][0] ),
                                                         Aim_fx[1][1], Bre_fx[0][1] ),
                                             Are_fx[1][0], Bim_fx[0][0] ),
                                 Are_fx[1][1], Bim_fx[0][1] );
    move32();
    not_zero = L_or( not_zero, outRe_fx[1][0] );
    not_zero = L_or( not_zero, outIm_fx[1][0] );

    outRe_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[1][0], Bre_fx[1][0] ),
                                                         Are_fx[1][1], Bre_fx[1][1] ),
                                             Aim_fx[1][0], Bim_fx[1][0] ),
                                 Aim_fx[1][1], Bim_fx[1][1] );
    move32();
    outIm_fx[1][1] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[1][0], Bre_fx[1][0] ),
                                                         Aim_fx[1][1], Bre_fx[1][1] ),
                                             Are_fx[1][0], Bim_fx[1][0] ),
                                 Are_fx[1][1], Bim_fx[1][1] );
    move32();
    not_zero = L_or( not_zero, outRe_fx[1][1] );
    not_zero = L_or( not_zero, outIm_fx[1][1] );
#else
    Word16 chA, chB;
    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
    {
        FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
@@ -4388,6 +4687,7 @@ static void matrixTransp2Mul_fx(
            not_zero = L_or( not_zero, outIm_fx[chA][chB] );
        }
    }
#endif

    *q_out = sub( add( *q_A, *q_B ), 31 );
    move16();