Commit 2d98600e authored by Sandesh Venkatesh's avatar Sandesh Venkatesh
Browse files

Merge branch...

Merge branch '1310-complexity-remaining-complexity-overhead-for-osba-decoding-follow-up-issue-to-1072' into 'main'

Resolve "Complexity: Remaining complexity overhead for OSBA Decoding - follow-up Issue to #1072"

Closes #1310

See merge request !1154
parents fb81980b 12d2affe
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -4984,6 +4984,15 @@ void ivas_dirac_dec_get_response_fx(
    const Word16 ambisonics_order,
    Word16 Q_out );

#ifdef FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx
/*This is a derivate to ivas_dirac_dec_get_response_fx with fixed Q_out=29*/
void ivas_dirac_dec_get_response_fx_29(
    const Word16 azimuth,
    const Word16 elevation,
    Word32 *response_fx, /*Q_out=29*/
    const Word16 ambisonics_order );
#endif

void calculate_hodirac_sector_parameters_fx(
    DIRAC_ENC_HANDLE hDirAC,                                          /* i  : DirAC handle                                  */
    Word32 RealBuffer_fx[DIRAC_MAX_ANA_CHANS][DIRAC_NO_FB_BANDS_MAX], /* i  : signal vector (L+1)^2 x N_bins, real part     */
+127 −0
Original line number Diff line number Diff line
@@ -7164,6 +7164,133 @@ void ivas_dirac_dec_get_response_fx(
    return;
}

#ifdef FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx
/*This is a derivate to ivas_dirac_dec_get_response_fx with fixed Q_out=29*/
void ivas_dirac_dec_get_response_fx_29(
    const Word16 azimuth,
    const Word16 elevation,
    Word32 *response_fx, /*Q_out*/
    const Word16 ambisonics_order )
{
    Word16 index_azimuth, index_elevation;
    Word16 el, az;
    Word32 cos_1_fx, cos_2_fx, sin_1_fx, cos_az_fx[3];
    Word32 sin_az_fx[3];
    Word32 f_fx;
    Word32 c_fx_better;
    Word16 l, m;
    Word16 b, b1, b_2, b1_2;
    // Word16 Q_out = 29;

    index_azimuth = add( azimuth, 180 ) % 360;
    move16();
    index_elevation = add( elevation, 90 );

    Word32 e_fac = L_add( 0x7FFFFFFF, 0 );

    if ( GT_16( index_elevation, 90 ) )
    {
        e_fac = L_add( 0x80000000, 0 );
    }


    el = index_elevation;
    move16();

    if ( GT_16( index_elevation, 90 ) )
    {
        el = sub( 180, index_elevation );
    }

    az = index_azimuth;
    move16();

    if ( GT_16( index_azimuth, 180 ) )
    {
        az = sub( 360, index_azimuth );
    }

    f_fx = 1;
    move16();

    if ( GT_16( index_azimuth, 180 ) )
    {
        f_fx = -1;
    }

    cos_1_fx = L_shr( dirac_gains_trg_term_fx[az][0], 1 );  // q30
    cos_2_fx = L_shl( Mpy_32_32( cos_1_fx, cos_1_fx ), 1 ); // q30
    sin_1_fx = L_shr( dirac_gains_trg_term_fx[az][1], 1 );  // q30

    if ( EQ_32( f_fx, -1 ) )
    {
        sin_1_fx = L_negate( sin_1_fx ); // q30
    }
    cos_az_fx[0] = cos_1_fx; // q30
    move32();
    cos_az_fx[1] = L_shl( L_sub( cos_2_fx, ONE_IN_Q29 /*0.5 q30*/ ), 1 ); /*q30*/
    move32();
    cos_az_fx[2] = L_sub( L_shl( Mpy_32_32( cos_1_fx, cos_az_fx[1] ), 2 ), cos_az_fx[0] /* cos_az_fx[0] q30*/ ); /*q30*/
    move32();
    sin_az_fx[0] = sin_1_fx; /*q30*/
    move32();
    sin_az_fx[1] = L_shl( Mpy_32_32( sin_1_fx, cos_1_fx ), 2 ); /*q30*/
    move32();
    sin_az_fx[2] = L_shl( Mpy_32_32( sin_1_fx, L_sub( cos_2_fx, ONE_IN_Q28 /*1/4 q30*/ ) ), 3 ); /*q30*/
    move32();

    response_fx[0] = 0x20000000;
    move32();

    FOR( l = 1; l <= ambisonics_order; l++ )
    {
        Word16 a;
        b_2 = imult1616( l, l );
        b1_2 = add( b_2, shl( l, 1 ) );
        FOR( m = 0; m < l; m += 2 )
        {
            b = b_2 + m;
            a = dirac_gains_P_idx[b];

            c_fx_better = local_result_table[el][a]; // q30
            move32();
            response_fx[b] = Mpy_32_32( c_fx_better, sin_az_fx[l - m - 1] ); // Q_out
            move32();

            b1 = b1_2 - m;
            response_fx[b1] = Mpy_32_32( c_fx_better, cos_az_fx[l - m - 1] ); // Q_out
            move32();
        }

        FOR( m = 1; m < l; m += 2 )
        {
            b = b_2 + m;
            a = dirac_gains_P_idx[b];
            c_fx_better = local_result_table[el][a]; // q30
            move32();
            c_fx_better = Mpy_32_32( c_fx_better, e_fac );                   // q30
            response_fx[b] = Mpy_32_32( c_fx_better, sin_az_fx[l - m - 1] ); // Q_out
            move32();

            b1 = b1_2 - m;
            response_fx[b1] = Mpy_32_32( c_fx_better, cos_az_fx[l - m - 1] ); // Q_out
            move32();
        }

        b = add( b_2, l );
        a = dirac_gains_P_idx[b];
        c_fx_better = local_result_table_2[el][a]; // q30
        move32();
        if ( s_and( l, 0x01 ) )
        {
            c_fx_better = Mpy_32_32( c_fx_better, e_fac ); // q30
        }
        response_fx[b] = L_shl( c_fx_better, -1 ); // Q_out
        move32();
    }
    return;
}
#endif /*FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx*/
/*-----------------------------------------------------------------------------------------*
 * Function ivas_get_bits_to_encode
 *
+2 −0
Original line number Diff line number Diff line
@@ -69,4 +69,6 @@

/* Note: each compile switch (FIX_1101_...) is independent from the other ones */
//#define OPT_STEREO_32KBPS_V1                    /* Optimization made in stereo decoding path for 32kbps decoding */
#define FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx                 /*FhG: WMOPS tuning, nonbe*/
#define FIX_1310_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot   /*FhG: WMOPS tuning, nonbe*/
#endif
+78 −1
Original line number Diff line number Diff line
@@ -904,6 +904,74 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(
                        }
                    }
                    c = Madd_32_16( ONE_IN_Q27 /*1 Q27*/, L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_fx, ONE_IN_Q27 /*1 Q27*/ ), 5461 ); /*Diffuseness modellling nrg compensation*/ /* 1.0 / 6.0  = 5461 in Q15*/ /*Q27*/
#ifdef FIX_1310_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot
                    FOR( ; k < num_freq_bands; k++ )
                    {
                        a = h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + k]; // Q = h_dirac_output_synthesis_state->q_direct_responses
                        move32();
                        IF( reference_power[k + num_freq_bands] == 0 )
                        {
                            sqr_inp = Mpy_32_32( diffuseness[k], c );
                            sqr_exp = sub( 31 + 4, q_diffuseness );
                        }
                        ELSE
                        {
                            Word16 diff_c_exp;
                            Word16 diff_aab_exp;
                            IF( reference_power[k + ( ch_idx + 1 ) * num_freq_bands] == 0 )
                            {
                                mpy_a_a_b = Mpy_32_32( a, a );                                                             // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31
                                mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31
                                mpy_diff_c = Mpy_32_32( diffuseness[k], c );                                               // Q = q_diffuseness - 4
                                diff_aab_exp = sub( 31 + 62, add( h_dirac_output_synthesis_state->direct_responses_q, add( h_dirac_output_synthesis_state->direct_responses_q, q_diffuseness ) ) );
                                diff_c_exp = sub( 31 + 4, q_diffuseness );

                                sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, diff_c_exp, mpy_diff_aab, diff_aab_exp, &sqr_exp ); /*q(31-sqr_exp)*/
                            }
                            ELSE
                            {
                                b = BASOP_Util_Divide3232_Scale( reference_power[k + num_freq_bands], reference_power[k + ( ch_idx + 1 ) * num_freq_bands], &b_exp ); /*q(15-b_exp)*/

                                mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) );                                           // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31
                                mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31
                                mpy_diff_c = Mpy_32_32( diffuseness[k], c );                                               // Q = q_diffuseness - 4
                                diff_aab_exp = sub( sub( add( sub( 31 + 62, h_dirac_output_synthesis_state->direct_responses_q ), b_exp ), h_dirac_output_synthesis_state->direct_responses_q ), q_diffuseness );
                                diff_c_exp = sub( 31 + 4, q_diffuseness );

                                sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, diff_c_exp, mpy_diff_aab, diff_aab_exp, &sqr_exp ); /*q(31-sqr_exp)*/
                            }
                        }
                        sqr = Sqrt32( sqr_inp, &sqr_exp ); /*Q(31-sqr_exp)*/
                        sqr = L_shr( sqr, 2 );             /*Q(31-sqr_exp)*/


                        IF( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] != 0 )
                        {
                            IF( LT_16( sub( 31, sqr_exp ), h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) )
                            {
                                h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_shr( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sub( h_dirac_output_synthesis_state->q_cy_cross_dir_smooth, sub( 31, sqr_exp ) ) ); /*h_dirac_output_synthesis_state->q_cy_cross_dir_smooth->Q( 31- sqr_exp )*/
                                move32();
                                Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp );
                                move16();
                            }
                            ELSE
                            {
                                sqr = L_shr( sqr, sub( sub( 31, sqr_exp ), h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ); /*Q(31- sqr_exp)->h_dirac_output_synthesis_state->q_cy_cross_dir_smooth*/
                                Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = h_dirac_output_synthesis_state->q_cy_cross_dir_smooth;
                                move16();
                            }
                            h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_add( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sqr ); /*Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k]*/
                            move32();
                        }
                        ELSE
                        {
                            h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_add( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sqr ); /*Q(31- sqr_exp)*/
                            move32();
                            Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp );
                            move16();
                        }
                    }
#else
                    FOR( ; k < num_freq_bands; k++ )
                    {
                        a = h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + k]; // Q = h_dirac_output_synthesis_state->q_direct_responses
@@ -934,7 +1002,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(
                        mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31
                        mpy_diff_c = Mpy_32_32( diffuseness[k], c );                                               // Q = q_diffuseness - 4

                        q_diff_aab = add( h_dirac_output_synthesis_state->direct_responses_q + sub( sub( 15, b_exp ), 15 ), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) );
                        q_diff_aab = add( add( h_dirac_output_synthesis_state->direct_responses_q, sub( sub( 15, b_exp ), 15 ) ), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) );
                        q_diff_c = sub( q_diffuseness, 4 );

                        test();
@@ -985,6 +1053,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(
                            move16();
                        }
                    }
#endif
                }
                ELSE
                {
@@ -3011,11 +3080,19 @@ void ivas_dirac_dec_compute_directional_responses_fx(
                }
                ELSE
                {
#ifdef FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx
                    ivas_dirac_dec_get_response_fx_29( azimuth[k], elevation[k], direct_response_hoa_fx, hDirACRend->hOutSetup.ambisonics_order );
#else
                    ivas_dirac_dec_get_response_fx( azimuth[k], elevation[k], direct_response_hoa_fx, hDirACRend->hOutSetup.ambisonics_order, Q_direct_response_hoa );
#endif

                    IF( hodirac_flag )
                    {
#ifdef FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx
                        ivas_dirac_dec_get_response_fx_29( azimuth2[k], elevation2[k], direct_response_dir2_fx, hDirACRend->hOutSetup.ambisonics_order );
#else
                        ivas_dirac_dec_get_response_fx( azimuth2[k], elevation2[k], direct_response_dir2_fx, hDirACRend->hOutSetup.ambisonics_order, Q_direct_response_dir2 );
#endif
                    }
                }