Commit 2304d459 authored by Nicolas Roussin's avatar Nicolas Roussin
Browse files

Complete optimisation.

parent 6ce068ce
Loading
Loading
Loading
Loading
Loading
+123 −241
Original line number Diff line number Diff line
@@ -4250,75 +4250,54 @@ static void matrixScale_fx(
    Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/
    Word16 *q_A )
{
#if 0 // def OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS
#ifdef OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS
    Word16 shift_tmp;
    Word16 shift = 31;
    move16();

    shift_tmp = norm_l( Are_fx[0][0] );
    if ( Are_fx[0][0] != 0 )
    {
        shift = s_min( shift, shift_tmp );
    }
    shift_tmp = norm_l( Are_fx[0][1] );
    if ( Are_fx[0][1] != 0 )
    {
        shift = s_min( shift, shift_tmp );
    }
    shift_tmp = norm_l( Are_fx[1][0] );
    if ( Are_fx[1][0] != 0 )
    {
        shift = s_min( shift, shift_tmp );
    }
    shift_tmp = norm_l( Are_fx[1][1] );
    if ( Are_fx[1][1] != 0 )
    {
        shift = s_min( shift, shift_tmp );
    }
#define MATRIX_NORM_FX( mat, chA, chB )        \
    do                                         \
    {                                          \
        shift_tmp = norm_l( mat[chA][chB] );   \
        if ( mat[chA][chB] != 0 )              \
        {                                      \
            shift = s_min( shift, shift_tmp ); \
        }                                      \
    } while ( 0 )

    shift_tmp = norm_l( Aim_fx[0][0] );
    if ( Aim_fx[0][0] != 0 )
    {
        shift = s_min( shift, shift_tmp );
    }
    shift_tmp = norm_l( Aim_fx[0][1] );
    if ( Aim_fx[0][1] != 0 )
    {
        shift = s_min( shift, shift_tmp );
    }
    shift_tmp = norm_l( Aim_fx[1][0] );
    if ( Aim_fx[1][0] != 0 )
    {
        shift = s_min( shift, shift_tmp );
    }
    shift_tmp = norm_l( Aim_fx[1][1] );
    if ( Aim_fx[1][1] != 0 )
    {
        shift = s_min( shift, shift_tmp );
    }
    MATRIX_NORM_FX( Are_fx, 0, 0 );
    MATRIX_NORM_FX( Are_fx, 0, 1 );
    MATRIX_NORM_FX( Are_fx, 1, 0 );
    MATRIX_NORM_FX( Are_fx, 1, 1 );
    MATRIX_NORM_FX( Aim_fx, 0, 0 );
    MATRIX_NORM_FX( Aim_fx, 0, 1 );
    MATRIX_NORM_FX( Aim_fx, 1, 0 );
    MATRIX_NORM_FX( Aim_fx, 1, 1 );

    shift = sub( shift, 1 );
#undef MATRIX_NORM_FX

#define MATRIX_SHL_FX( mat, chA, chB )                 \
    do                                                 \
    {                                                  \
        mat[chA][chB] = L_shl( mat[chA][chB], shift ); \
        move32();                                      \
    } while ( 0 )

    shift = sub( shift, 1 );
    IF( shift != 0 )
    {
        Are_fx[0][0] = L_shl( Are_fx[0][0], shift );
        Are_fx[0][1] = L_shl( Are_fx[0][1], shift );
        Are_fx[1][0] = L_shl( Are_fx[1][0], shift );
        Are_fx[1][1] = L_shl( Are_fx[1][1], shift );
        Aim_fx[0][0] = L_shl( Aim_fx[0][0], shift );
        Aim_fx[0][1] = L_shl( Aim_fx[0][1], shift );
        Aim_fx[1][0] = L_shl( Aim_fx[1][0], shift );
        Aim_fx[1][1] = L_shl( Aim_fx[1][1], shift );
        move32();
        move32();
        move32();
        move32();
        move32();
        move32();
        move32();
        move32();
        MATRIX_SHL_FX( Are_fx, 0, 0 );
        MATRIX_SHL_FX( Are_fx, 0, 1 );
        MATRIX_SHL_FX( Are_fx, 1, 0 );
        MATRIX_SHL_FX( Are_fx, 1, 1 );
        MATRIX_SHL_FX( Aim_fx, 0, 0 );
        MATRIX_SHL_FX( Aim_fx, 0, 1 );
        MATRIX_SHL_FX( Aim_fx, 1, 0 );
        MATRIX_SHL_FX( Aim_fx, 1, 1 );
    }

#undef MATRIX_SHL_FX

    *q_A = add( *q_A, shift );
    move16();
#else
@@ -4344,58 +4323,30 @@ static void matrixMul_fx(
    Word16 *q_out )
{
    Word32 not_zero = 0;
#if 0 // defined( OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS ) && !defined( IVAS_ENH64_CADENCE_CHANGES )
    outRe_fx[0][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Bre_fx[0][0] ),
                                                         Are_fx[0][1], Bre_fx[1][0] ),
                                             Aim_fx[0][0], Bim_fx[0][0] ),
                                 Aim_fx[0][1], Bim_fx[1][0] );
    move32();
    outIm_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[0][0], Bre_fx[0][0] ),
                                                         Aim_fx[0][1], Bre_fx[1][0] ),
                                             Are_fx[0][0], Bim_fx[0][0] ),
                                 Are_fx[0][1], Bim_fx[1][0] );
    move32();
    not_zero = L_or( not_zero, outRe_fx[0][0] );
    not_zero = L_or( not_zero, outIm_fx[0][0] );

    outRe_fx[0][1] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Bre_fx[0][1] ),
                                                         Are_fx[0][1], Bre_fx[1][1] ),
                                             Aim_fx[0][0], Bim_fx[0][1] ),
                                 Aim_fx[0][1], Bim_fx[1][1] );
    move32();
    outIm_fx[0][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[0][0], Bre_fx[0][1] ),
                                                         Aim_fx[0][1], Bre_fx[1][1] ),
                                             Are_fx[0][0], Bim_fx[0][1] ),
                                 Are_fx[0][1], Bim_fx[1][1] );
    move32();
    not_zero = L_or( not_zero, outRe_fx[0][1] );
    not_zero = L_or( not_zero, outIm_fx[0][1] );

    outRe_fx[1][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[1][0], Bre_fx[0][0] ),
                                                         Are_fx[1][1], Bre_fx[1][0] ),
                                             Aim_fx[1][0], Bim_fx[0][0] ),
                                 Aim_fx[1][1], Bim_fx[1][0] );
    move32();
    outIm_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[1][0], Bre_fx[0][0] ),
                                                         Aim_fx[1][1], Bre_fx[1][0] ),
                                             Are_fx[1][0], Bim_fx[0][0] ),
                                 Are_fx[1][1], Bim_fx[1][0] );
    move32();
    not_zero = L_or( not_zero, outRe_fx[1][0] );
    not_zero = L_or( not_zero, outIm_fx[1][0] );

    outRe_fx[1][1] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[1][0], Bre_fx[0][1] ),
                                                         Are_fx[1][1], Bre_fx[1][1] ),
                                             Aim_fx[1][0], Bim_fx[0][1] ),
                                 Aim_fx[1][1], Bim_fx[1][1] );
    move32();
    outIm_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[1][0], Bre_fx[0][1] ),
                                                         Aim_fx[1][1], Bre_fx[1][1] ),
                                             Are_fx[1][0], Bim_fx[0][1] ),
                                 Are_fx[1][1], Bim_fx[1][1] );
    move32();
    not_zero = L_or( not_zero, outRe_fx[1][1] );
    not_zero = L_or( not_zero, outIm_fx[1][1] );
#if defined( OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS ) && !defined( IVAS_ENH64_CADENCE_CHANGES )
#define MATRIX_MUL_FX( chA, chB )                                                                             \
    do                                                                                                        \
    {                                                                                                         \
        outRe_fx[chA][chB] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[chA][0], Bre_fx[0][chB] ), \
                                                                 Are_fx[chA][1], Bre_fx[1][chB] ),            \
                                                     Aim_fx[chA][0], Bim_fx[0][chB] ),                        \
                                         Aim_fx[chA][1], Bim_fx[1][chB] );                                    \
        move32();                                                                                             \
        outIm_fx[chA][chB] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[chA][0], Bre_fx[0][chB] ), \
                                                                 Aim_fx[chA][1], Bre_fx[1][chB] ),            \
                                                     Are_fx[chA][0], Bim_fx[0][chB] ),                        \
                                         Are_fx[chA][1], Bim_fx[1][chB] );                                    \
        move32();                                                                                             \
        not_zero = L_or( not_zero, outRe_fx[chA][chB] );                                                      \
        not_zero = L_or( not_zero, outIm_fx[chA][chB] );                                                      \
    } while ( 0 )

    MATRIX_MUL_FX( 0, 0 );
    MATRIX_MUL_FX( 0, 1 );
    MATRIX_MUL_FX( 1, 0 );
    MATRIX_MUL_FX( 1, 1 );

#undef MATRIX_MUL_FX
#else
    Word16 chA, chB;

@@ -4463,90 +4414,49 @@ static void matrixTransp1Mul_fx(
    Word32 not_zero = 0;
    move32();

#if 0 // def OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS
    tmp64 = W_mult_32_32( Are_fx[0][0], Bre_fx[0][0] );
    tmp64 = W_mac_32_32( tmp64, Are_fx[1][0], Bre_fx[1][0] );
    tmp64 = W_mac_32_32( tmp64, Aim_fx[0][0], Bim_fx[0][0] );
    tmp_outRe_fx[0][0] = W_mac_32_32( tmp64, Aim_fx[1][0], Bim_fx[1][0] );
    move64();
    common_lsh = s_min( common_lsh, W_norm( tmp_outRe_fx[0][0] ) );

    tmp64 = W_mult_32_32( Aim_fx[0][0], Bre_fx[0][0] );
    tmp64 = W_mac_32_32( tmp64, Aim_fx[1][0], Bre_fx[1][0] );
    tmp64 = W_mac_32_32( W_neg( tmp64 ), Are_fx[0][0], Bim_fx[0][0] );
    tmp_outIm_fx[0][0] = W_mac_32_32( tmp64, Are_fx[1][0], Bim_fx[1][0] );
    move64();
    common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[0][0] ) );

    tmp64 = W_mult_32_32( Are_fx[0][0], Bre_fx[0][1] );
    tmp64 = W_mac_32_32( tmp64, Are_fx[1][0], Bre_fx[1][1] );
    tmp64 = W_mac_32_32( tmp64, Aim_fx[0][0], Bim_fx[0][1] );
    tmp_outRe_fx[0][1] = W_mac_32_32( tmp64, Aim_fx[1][0], Bim_fx[1][1] );
    move64();
    common_lsh = s_min( common_lsh, W_norm( tmp_outRe_fx[0][1] ) );

    tmp64 = W_mult_32_32( Aim_fx[0][0], Bre_fx[0][1] );
    tmp64 = W_mac_32_32( tmp64, Aim_fx[1][0], Bre_fx[1][1] );
    tmp64 = W_mac_32_32( W_neg( tmp64 ), Are_fx[0][0], Bim_fx[0][1] );
    tmp_outIm_fx[0][1] = W_mac_32_32( tmp64, Are_fx[1][0], Bim_fx[1][1] );
    move64();
    common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[0][1] ) );

    tmp64 = W_mult_32_32( Are_fx[0][1], Bre_fx[0][0] );
    tmp64 = W_mac_32_32( tmp64, Are_fx[1][1], Bre_fx[1][0] );
    tmp64 = W_mac_32_32( tmp64, Aim_fx[0][1], Bim_fx[0][0] );
    tmp_outRe_fx[1][0] = W_mac_32_32( tmp64, Aim_fx[1][1], Bim_fx[1][0] );
    move64();
    common_lsh = s_min( common_lsh, W_norm( tmp_outRe_fx[1][0] ) );

    tmp64 = W_mult_32_32( Aim_fx[0][1], Bre_fx[0][0] );
    tmp64 = W_mac_32_32( tmp64, Aim_fx[1][1], Bre_fx[1][0] );
    tmp64 = W_mac_32_32( W_neg( tmp64 ), Are_fx[0][1], Bim_fx[0][0] );
    tmp_outIm_fx[1][0] = W_mac_32_32( tmp64, Are_fx[1][1], Bim_fx[1][0] );
    move64();
    common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[1][0] ) );
#ifdef OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS
#define MATRIX_TRANSP1_MUL_FX( chA, chB )                                              \
    do                                                                                 \
    {                                                                                  \
        tmp64 = W_mult_32_32( Are_fx[0][chA], Bre_fx[0][chB] );                        \
        tmp64 = W_mac_32_32( tmp64, Are_fx[1][chA], Bre_fx[1][chB] );                  \
        tmp64 = W_mac_32_32( tmp64, Aim_fx[0][chA], Bim_fx[0][chB] );                  \
        tmp_outRe_fx[chA][chB] = W_mac_32_32( tmp64, Aim_fx[1][chA], Bim_fx[1][chB] ); \
        move64();                                                                      \
        common_lsh = s_min( common_lsh, W_norm( tmp_outRe_fx[chA][chB] ) );            \
                                                                                       \
        tmp64 = W_mult_32_32( Aim_fx[0][chA], Bre_fx[0][chB] );                        \
        tmp64 = W_mac_32_32( tmp64, Aim_fx[1][chA], Bre_fx[1][chB] );                  \
        tmp64 = W_mac_32_32( W_neg( tmp64 ), Are_fx[0][chA], Bim_fx[0][chB] );         \
        tmp_outIm_fx[chA][chB] = W_mac_32_32( tmp64, Are_fx[1][chA], Bim_fx[1][chB] ); \
        move64();                                                                      \
        common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[chA][chB] ) );            \
    } while ( 0 )

    tmp64 = W_mult_32_32( Are_fx[0][1], Bre_fx[0][1] );
    tmp64 = W_mac_32_32( tmp64, Are_fx[1][1], Bre_fx[1][1] );
    tmp64 = W_mac_32_32( tmp64, Aim_fx[0][1], Bim_fx[0][1] );
    tmp_outRe_fx[1][1] = W_mac_32_32( tmp64, Aim_fx[1][1], Bim_fx[1][1] );
    move64();
    common_lsh = s_min( common_lsh, W_norm( tmp_outRe_fx[1][1] ) );
    MATRIX_TRANSP1_MUL_FX( 0, 0 );
    MATRIX_TRANSP1_MUL_FX( 0, 1 );
    MATRIX_TRANSP1_MUL_FX( 1, 0 );
    MATRIX_TRANSP1_MUL_FX( 1, 1 );

    tmp64 = W_mult_32_32( Aim_fx[0][1], Bre_fx[0][1] );
    tmp64 = W_mac_32_32( tmp64, Aim_fx[1][1], Bre_fx[1][1] );
    tmp64 = W_mac_32_32( W_neg( tmp64 ), Are_fx[0][1], Bim_fx[0][1] );
    tmp_outIm_fx[1][1] = W_mac_32_32( tmp64, Are_fx[1][1], Bim_fx[1][1] );
    move64();
    common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[1][1] ) );
#undef MATRIX_TRANSP1_MUL_FX

    outRe_fx[0][0] = W_extract_h( W_shl( tmp_outRe_fx[0][0], common_lsh ) );
    move32();
    outIm_fx[0][0] = W_extract_h( W_shl( tmp_outIm_fx[0][0], common_lsh ) );
    move32();
    not_zero = L_or( not_zero, outRe_fx[0][0] );
    not_zero = L_or( not_zero, outIm_fx[0][0] );

    outRe_fx[0][1] = W_extract_h( W_shl( tmp_outRe_fx[0][1], common_lsh ) );
    move32();
    outIm_fx[0][1] = W_extract_h( W_shl( tmp_outIm_fx[0][1], common_lsh ) );
    move32();
    not_zero = L_or( not_zero, outRe_fx[0][1] );
    not_zero = L_or( not_zero, outIm_fx[0][1] );
#define MATRIX_TRANSP1_SCALE_FX( chA, chB )                                              \
    do                                                                                   \
    {                                                                                    \
        outRe_fx[chA][chB] = W_extract_h( W_shl( tmp_outRe_fx[chA][chB], common_lsh ) ); \
        move32();                                                                        \
        outIm_fx[chA][chB] = W_extract_h( W_shl( tmp_outIm_fx[chA][chB], common_lsh ) ); \
        move32();                                                                        \
        not_zero = L_or( not_zero, outRe_fx[chA][chB] );                                 \
        not_zero = L_or( not_zero, outIm_fx[chA][chB] );                                 \
    } while ( 0 )

    outRe_fx[1][0] = W_extract_h( W_shl( tmp_outRe_fx[1][0], common_lsh ) );
    move32();
    outIm_fx[1][0] = W_extract_h( W_shl( tmp_outIm_fx[1][0], common_lsh ) );
    move32();
    not_zero = L_or( not_zero, outRe_fx[1][0] );
    not_zero = L_or( not_zero, outIm_fx[1][0] );
    MATRIX_TRANSP1_SCALE_FX( 0, 0 );
    MATRIX_TRANSP1_SCALE_FX( 0, 1 );
    MATRIX_TRANSP1_SCALE_FX( 1, 0 );
    MATRIX_TRANSP1_SCALE_FX( 1, 1 );

    outRe_fx[1][1] = W_extract_h( W_shl( tmp_outRe_fx[1][1], common_lsh ) );
    move32();
    outIm_fx[1][1] = W_extract_h( W_shl( tmp_outIm_fx[1][1], common_lsh ) );
    move32();
    not_zero = L_or( not_zero, outRe_fx[1][1] );
    not_zero = L_or( not_zero, outIm_fx[1][1] );
#undef MATRIX_TRANSP1_SCALE_FX
#else
    Word16 chA, chB;

@@ -4607,58 +4517,30 @@ static void matrixTransp2Mul_fx(
    Word16 *q_out )
{
    Word32 not_zero = 0;
#if 0 // defined( OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS ) && !defined( IVAS_ENH64_CADENCE_CHANGES )
    outRe_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Bre_fx[0][0] ),
                                                         Are_fx[0][1], Bre_fx[0][1] ),
                                             Aim_fx[0][0], Bim_fx[0][0] ),
                                 Aim_fx[0][1], Bim_fx[0][1] );
    move32();
    outIm_fx[0][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[0][0], Bre_fx[0][0] ),
                                                         Aim_fx[0][1], Bre_fx[0][1] ),
                                             Are_fx[0][0], Bim_fx[0][0] ),
                                 Are_fx[0][1], Bim_fx[0][1] );
    move32();
    not_zero = L_or( not_zero, outRe_fx[0][0] );
    not_zero = L_or( not_zero, outIm_fx[0][0] );

    outRe_fx[0][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Bre_fx[1][0] ),
                                                         Are_fx[0][1], Bre_fx[1][1] ),
                                             Aim_fx[0][0], Bim_fx[1][0] ),
                                 Aim_fx[0][1], Bim_fx[1][1] );
    move32();
    outIm_fx[0][1] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[0][0], Bre_fx[1][0] ),
                                                         Aim_fx[0][1], Bre_fx[1][1] ),
                                             Are_fx[0][0], Bim_fx[1][0] ),
                                 Are_fx[0][1], Bim_fx[1][1] );
    move32();
    not_zero = L_or( not_zero, outRe_fx[0][1] );
    not_zero = L_or( not_zero, outIm_fx[0][1] );

    outRe_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[1][0], Bre_fx[0][0] ),
                                                         Are_fx[1][1], Bre_fx[0][1] ),
                                             Aim_fx[1][0], Bim_fx[0][0] ),
                                 Aim_fx[1][1], Bim_fx[0][1] );
    move32();
    outIm_fx[1][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[1][0], Bre_fx[0][0] ),
                                                         Aim_fx[1][1], Bre_fx[0][1] ),
                                             Are_fx[1][0], Bim_fx[0][0] ),
                                 Are_fx[1][1], Bim_fx[0][1] );
    move32();
    not_zero = L_or( not_zero, outRe_fx[1][0] );
    not_zero = L_or( not_zero, outIm_fx[1][0] );

    outRe_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[1][0], Bre_fx[1][0] ),
                                                         Are_fx[1][1], Bre_fx[1][1] ),
                                             Aim_fx[1][0], Bim_fx[1][0] ),
                                 Aim_fx[1][1], Bim_fx[1][1] );
    move32();
    outIm_fx[1][1] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[1][0], Bre_fx[1][0] ),
                                                         Aim_fx[1][1], Bre_fx[1][1] ),
                                             Are_fx[1][0], Bim_fx[1][0] ),
                                 Are_fx[1][1], Bim_fx[1][1] );
    move32();
    not_zero = L_or( not_zero, outRe_fx[1][1] );
    not_zero = L_or( not_zero, outIm_fx[1][1] );
#if defined( OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS ) && !defined( IVAS_ENH64_CADENCE_CHANGES )
#define MATRIX_TRANSP2_MUL_FX( chA, chB )                                                                     \
    do                                                                                                        \
    {                                                                                                         \
        outRe_fx[chA][chB] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[chA][0], Bre_fx[chB][0] ), \
                                                                 Are_fx[chA][1], Bre_fx[chB][1] ),            \
                                                     Aim_fx[chA][0], Bim_fx[chB][0] ),                        \
                                         Aim_fx[chA][1], Bim_fx[chB][1] );                                    \
        move32();                                                                                             \
        outIm_fx[chA][chB] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[chA][0], Bre_fx[chB][0] ), \
                                                                 Aim_fx[chA][1], Bre_fx[chB][1] ),            \
                                                     Are_fx[chA][0], Bim_fx[chB][0] ),                        \
                                         Are_fx[chA][1], Bim_fx[chB][1] );                                    \
        move32();                                                                                             \
        not_zero = L_or( not_zero, outRe_fx[chA][chB] );                                                      \
        not_zero = L_or( not_zero, outIm_fx[chA][chB] );                                                      \
    } while ( 0 )

    MATRIX_TRANSP2_MUL_FX( 0, 0 );
    MATRIX_TRANSP2_MUL_FX( 0, 1 );
    MATRIX_TRANSP2_MUL_FX( 1, 0 );
    MATRIX_TRANSP2_MUL_FX( 1, 1 );

#undef MATRIX_TRANSP2_MUL_FX
#else
    Word16 chA, chB;
    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )