Commit 1bb7dd5c authored by Fabian Bauer's avatar Fabian Bauer
Browse files

added/changed FIX1072_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot &...

added/changed FIX1072_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot & FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx
parent 9ab12acf
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -4994,6 +4994,14 @@ void ivas_dirac_dec_get_response_fx(
    const Word16 ambisonics_order,
    Word16 Q_out );

#ifdef FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx
void ivas_dirac_dec_get_response_fx_29(
    const Word16 azimuth,
    const Word16 elevation,
    Word32 *response_fx, /*Q_out*/
    const Word16 ambisonics_order);
  #endif

void calculate_hodirac_sector_parameters_fx(
    DIRAC_ENC_HANDLE hDirAC,                                          /* i  : DirAC handle                                  */
    Word32 RealBuffer_fx[DIRAC_MAX_ANA_CHANS][DIRAC_NO_FB_BANDS_MAX], /* i  : signal vector (L+1)^2 x N_bins, real part     */
+135 −0
Original line number Diff line number Diff line
@@ -7173,6 +7173,141 @@ void ivas_dirac_dec_get_response_fx(
    return;
}

#ifdef FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx
void ivas_dirac_dec_get_response_fx_29(
    const Word16 azimuth,
    const Word16 elevation,
    Word32 *response_fx, /*Q_out*/
    const Word16 ambisonics_order)
{
    Word16 index_azimuth, index_elevation;
    Word16 el, az;
    Word32 cos_1_fx, cos_2_fx, sin_1_fx, cos_az_fx[3];
    Word32 sin_az_fx[3];
    Word32 f_fx;
    Word32 c_fx_better;
    Word16 l, m;
    Word16 b, b1, b_2, b1_2;
    //Word16 Q_out = 29;

    push_wmops( "ivas_dirac_dec_get_response_fx_29" );
    index_azimuth = add( azimuth, 180 ) % 360;
    move16();
    index_elevation = add( elevation, 90 );

    Word32 e_fac = L_add(0x7FFFFFFF, 0);

    if ( GT_16( index_elevation, 90 ) )
    {
        e_fac = L_add(0x80000000, 0);
    }


    el = index_elevation;
    move16();

    if ( GT_16( index_elevation, 90 ) )
    {
        el = sub( 180, index_elevation );
    }

    az = index_azimuth;
    move16();

    if ( GT_16( index_azimuth, 180 ) )
    {
        az = sub( 360, index_azimuth );
    }

    f_fx = 1;
    move16();

    if ( GT_16( index_azimuth, 180 ) )
    {
        f_fx = -1;
    }

    cos_1_fx = L_shr( dirac_gains_trg_term_fx[az][0], 1 );  // q30
    cos_2_fx = L_shl( Mpy_32_32( cos_1_fx, cos_1_fx ), 1 ); // q30
    sin_1_fx = L_shr( dirac_gains_trg_term_fx[az][1], 1 );  // q30

    if ( EQ_32( f_fx, -1 ) )
    {
        sin_1_fx = L_negate( sin_1_fx ); // q30
    }
    cos_az_fx[0] = cos_1_fx; // q30
    move32();
    cos_az_fx[1] = L_shl( L_sub( cos_2_fx, ONE_IN_Q29 /*0.5 q30*/ ), 1 ); /*q30*/
    move32();
    cos_az_fx[2] = L_sub( L_shl( Mpy_32_32( cos_1_fx, cos_az_fx[1] ), 2 ), cos_az_fx[0] /* cos_az_fx[0] q30*/ ); /*q30*/
    move32();
    sin_az_fx[0] = sin_1_fx; /*q30*/
    move32();
    sin_az_fx[1] = L_shl( Mpy_32_32( sin_1_fx, cos_1_fx ), 2 ); /*q30*/
    move32();
    sin_az_fx[2] = L_shl( Mpy_32_32( sin_1_fx, L_sub( cos_2_fx, ONE_IN_Q28 /*1/4 q30*/ ) ), 3 ); /*q30*/
    move32();

    //response_fx[0] = L_shl_sat( 1, Q_out ); // Q_out
    response_fx[0] = 0x20000000;
    move32();

    //q_diff = sub( Q_out, 29 );

    push_wmops( "ivas_dirac_dec_get_response_fx_29_LOOPS" );

FOR( l = 1; l <= ambisonics_order; l++ )
    {
        Word16 a;
        b_2 = imult1616( l, l );
        b1_2 = add( b_2, shl( l, 1 ) );
        FOR( m = 0; m < l; m += 2 )
        {
            b = b_2 + m;
            a = dirac_gains_P_idx[b];

            c_fx_better = local_result_table[el][a]; // q30
            move32();
            response_fx[b] = Mpy_32_32( c_fx_better, sin_az_fx[l - m - 1] ); // Q_out
            move32();

            b1 = b1_2 - m;
            response_fx[b1] = Mpy_32_32( c_fx_better, cos_az_fx[l - m - 1] ); // Q_out
            move32();
        }

        FOR( m = 1; m < l; m += 2 )
        {
            b = b_2 + m;
            a = dirac_gains_P_idx[b];
            c_fx_better = local_result_table[el][a]; // q30
            move32();
            c_fx_better = Mpy_32_32( c_fx_better, e_fac ); // q30
            response_fx[b] = Mpy_32_32( c_fx_better, sin_az_fx[l - m - 1] ); // Q_out
            move32();

            b1 = b1_2 - m;
            response_fx[b1] = Mpy_32_32( c_fx_better, cos_az_fx[l - m - 1] ); // Q_out
            move32();
        }

        b = add( b_2, l );
        a = dirac_gains_P_idx[b];
        c_fx_better = local_result_table_2[el][a]; // q30
        move32();
        if ( s_and( l, 0x01 ) )
        {
          c_fx_better = Mpy_32_32( c_fx_better, e_fac ); // q30
        }
        response_fx[b] = L_shl( c_fx_better, -1 ); // Q_out
        move32();
    }

    pop_wmops(); /*push_wmops( "ivas_dirac_dec_get_response_fx_29_LOOPS" );*/
    pop_wmops(); /*push_wmops( "ivas_dirac_dec_get_response_fx_29" );*/
    return;
}
#endif /*FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx*/
/*-----------------------------------------------------------------------------------------*
 * Function ivas_get_bits_to_encode
 *
+4 −3
Original line number Diff line number Diff line
@@ -164,11 +164,12 @@
#define FIX_ISSUE_1247
#define NONBE_FIX_1087_OOB_SBA_DTX_RS                   /* VA: issue 1087: Extend the length of the buffer for MCT decoding to avoid out-of-bound writing in SBA SID bitrate switching decoding */
#define FIX_1285_DECODER_CRASH

#define FIX_MINOR_SVD_WMOPS_MR1010X             /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */
#define FIX_1072_SPEEDUP_gainpanning            /* FhG: Minor WMOPS tuning, in development*/
#define FIX_1072_SPEEDUP_COMPUTEDIFUSENESS      /* "-" */


#define FIX_1072_SPEEDUP_gainpanning            /* FhG: WMOPS tuning, in development*/
#define FIX_1072_SPEEDUP_COMPUTEDIFUSENESS     /* "-" */
#define FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx /*FhG: WMOPS tuning, in development*/
#define FIX1072_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot /*FhG: WMOPS tuning, in development*/

#endif
 No newline at end of file
+90 −27
Original line number Diff line number Diff line
@@ -821,7 +821,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(
                Word16 k;
                IF( ch_idx != 0 )
                {
                    push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop IF <<<<<-|" );
                    push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop A <<<<<-|" );
                    ;
                    Word32 a, c;
                    Word16 b, b_exp, sqr_exp, q_diff_aab, q_diff_c;
@@ -912,8 +912,50 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(
                        }
                    }
                    c = Madd_32_16( ONE_IN_Q27 /*1 Q27*/, L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_fx, ONE_IN_Q27 /*1 Q27*/ ), 5461 ); /*Diffuseness modellling nrg compensation*/ /* 1.0 / 6.0  = 5461 in Q15*/ /*Q27*/
                    pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop A <<<<<-|" );*/
                    push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop B <<<<<-|" );
#ifdef FIX1072_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot
                    FOR( ; k < num_freq_bands; k++ )
                    {
                        a = h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + k]; // Q = h_dirac_output_synthesis_state->q_direct_responses
                        move32();
                        IF( reference_power[k + num_freq_bands] == 0 )
                        {
                            sqr_inp = Mpy_32_32( diffuseness[k], c );
                            sqr_exp = sub( 31 - 4, q_diffuseness );
                        }
                        ELSE
                        {
                            IF( reference_power[k + ( ch_idx + 1 ) * num_freq_bands] == 0 )
                            {
                                mpy_a_a_b = Mpy_32_32( a, a );                                                             // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31
                                mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31
                                mpy_diff_c = Mpy_32_32( diffuseness[k], c );                                               // Q = q_diffuseness - 4
                                //q_diff_aab = add( h_dirac_output_synthesis_state->direct_responses_q, add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) );
                                q_diff_aab = sub( add( h_dirac_output_synthesis_state->direct_responses_q, add( h_dirac_output_synthesis_state->direct_responses_q, q_diffuseness ) ), 62 );
                                q_diff_c = sub( q_diffuseness, 4 );

                    push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop IF nfreqbds <<<<<<-|" );
                                sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, sub( 31, q_diff_c ), mpy_diff_aab, sub( 31, q_diff_aab ), &sqr_exp ); /*q(31-sqr_exp)*/
                            }
                            ELSE
                            {
                                b = BASOP_Util_Divide3232_Scale( reference_power[k + num_freq_bands], reference_power[k + ( ch_idx + 1 ) * num_freq_bands], &b_exp ); /*q(15-b_exp)*/

                                mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) );                                           // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31
                                mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31
                                mpy_diff_c = Mpy_32_32( diffuseness[k], c );                                               // Q = q_diffuseness - 4
                                //q_diff_aab = add( add(h_dirac_output_synthesis_state->direct_responses_q , sub( sub( 15, b_exp ), 15 )), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) );
                                q_diff_aab = add( sub( h_dirac_output_synthesis_state->direct_responses_q, b_exp ), ( sub( add( h_dirac_output_synthesis_state->direct_responses_q, q_diffuseness ), 62 ) ) );
                                q_diff_c = sub( q_diffuseness, 4 );

                                sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, sub( 31, q_diff_c ), mpy_diff_aab, sub( 31, q_diff_aab ), &sqr_exp ); /*q(31-sqr_exp)*/

                            }
                        }
                        sqr = Sqrt32( sqr_inp, &sqr_exp ); /*Q(31-sqr_exp)*/
                        sqr = L_shr( sqr, 2 );             /*Q(31-sqr_exp)*/
                    }
#else
                    FOR( ; k < num_freq_bands; k++ )
                    {
                        a = h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + k]; // Q = h_dirac_output_synthesis_state->q_direct_responses
@@ -942,7 +984,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(
                        mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) );                                           // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31
                        mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31
                        mpy_diff_c = Mpy_32_32( diffuseness[k], c );                                               // Q = q_diffuseness - 4
                        q_diff_aab = add( h_dirac_output_synthesis_state->direct_responses_q + sub( sub( 15, b_exp ), 15 ), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) );
                        q_diff_aab = add( add( h_dirac_output_synthesis_state->direct_responses_q, sub( sub( 15, b_exp ), 15 ) ), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) );
                        q_diff_c = sub( q_diffuseness, 4 );

                        test();
@@ -968,6 +1010,9 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(

                        sqr = Sqrt32( sqr_inp, &sqr_exp ); /*Q(31-sqr_exp)*/
                        sqr = L_shr( sqr, 2 );             /*Q(31-sqr_exp)*/
                    }
#endif

                    IF( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] != 0 )
                    {
                        IF( LT_16( sub( 31, sqr_exp ), h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) )
@@ -993,13 +1038,12 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(
                        Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp );
                        move16();
                    }
                    }
                    pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop IF nfreqbds <<<<<<-|" );*/
                    pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop IF <<<<<-|" );*/

                    pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop B <<<<<-|" );*/
                }
                ELSE
                {
                    push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE <<<<<-|" );
                    push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop C <<<<<-|" );
                    Word32 sqr_inp, mpy_diff, sqr;
                    Word16 sqr_exp;
                    /*Diffuseness modellling nrg compensation*/
@@ -1037,7 +1081,8 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(
                            move16();
                        }
                    }
                    push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE nfreqbds <<<<<<-|" );
                    pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop C <<<<<<-|" );*/
                    push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop D <<<<<<-|" );
                    FOR( ; k < num_freq_bands; k++ )
                    {
                        mpy_diff = Mpy_32_32( diffuseness[k], L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_decorr_fx, ONE_IN_Q29 /*1 Q29*/ ) ); // Q = q_diffuseness - 1
@@ -1071,8 +1116,8 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(
                            move16();
                        }
                    }
                    pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE nfreqbds <<<<<<-|" );*/
                    pop_wmops();/*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE <<<<<-|" );/*/

                    pop_wmops();/*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop D <<<<<-|" );/*/
                }
            }
            pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop <<<<-|" );/*/
@@ -3066,24 +3111,38 @@ void ivas_dirac_dec_compute_directional_responses_fx(
                exp_direct_response_dir2 = 0;
                move16();

                push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- getResponse" );
                IF( p_Rmat != 0 )
                {
                    push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- getResponse_SPLIT_FX" );
                    ivas_dirac_dec_get_response_split_order_fx( azimuth[k], elevation[k], direct_response_hoa_fx, shd_rot_max_order, p_Rmat, &Q_direct_response_hoa );

                    IF( hodirac_flag )
                    {
                        ivas_dirac_dec_get_response_split_order_fx( azimuth2[k], elevation2[k], direct_response_dir2_fx, shd_rot_max_order, p_Rmat, &Q_direct_response_dir2 );
                    }
                    pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- getResponse_SPLIT_FX" );*/
                }
                ELSE
                {
                    push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- getResponse__FX" );
#ifdef FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx
                    ivas_dirac_dec_get_response_fx_29( azimuth[k], elevation[k], direct_response_hoa_fx, hDirACRend->hOutSetup.ambisonics_order);
#else
                    ivas_dirac_dec_get_response_fx( azimuth[k], elevation[k], direct_response_hoa_fx, hDirACRend->hOutSetup.ambisonics_order, Q_direct_response_hoa );
#endif

                    IF( hodirac_flag )
                    {
#ifdef FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx
                        ivas_dirac_dec_get_response_fx_29( azimuth2[k], elevation2[k], direct_response_dir2_fx, hDirACRend->hOutSetup.ambisonics_order);
#else
                        ivas_dirac_dec_get_response_fx( azimuth2[k], elevation2[k], direct_response_dir2_fx, hDirACRend->hOutSetup.ambisonics_order, Q_direct_response_dir2 );
#endif
                    }
                    pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- getResponse__FX" );*/
                }
                pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- getResponse" );*/

                test();
                test();
@@ -3091,16 +3150,19 @@ void ivas_dirac_dec_compute_directional_responses_fx(
                test();
                IF( masa_band_mapping == NULL && EQ_16( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_GAIN_SHD ) )
                {
                    push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- PATH1" );
                    mvr2r_inc_fixed( direct_response_hoa_fx, 1, &hDirACRend->h_output_synthesis_psd_state.direct_responses_fx[k], hSpatParamRendCom->num_freq_bands, num_channels_dir ); /*Q_direct_response_hoa*/

                    IF( hodirac_flag )
                    {
                        mvr2r_inc_fixed( direct_response_dir2_fx, 1, &hDirACRend->h_output_synthesis_psd_state.direct_responses_fx[k + hSpatParamRendCom->num_freq_bands * num_channels_dir], hSpatParamRendCom->num_freq_bands, num_channels_dir ); /*Q_direct_response_dir2*/
                    }
                    pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- PATH1" );*/
                }
                ELSE IF( ( ( EQ_16( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_GAIN_SHD ) ) && ( masa_band_mapping != NULL ) ) ||
                         EQ_16( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_PSD_SHD ) || EQ_16( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_MONO ) )
                {
                    push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- PATH2" );
                    /* Synthesize the first direction */
                    IF( GT_16( Q_direct_response_hoa, Q29 ) )
                    {
@@ -3347,6 +3409,7 @@ void ivas_dirac_dec_compute_directional_responses_fx(
                    }

                    mvr2r_inc_fixed( direct_response_fx, 1, &hDirACRend->h_output_synthesis_psd_state.direct_responses_fx[k], hSpatParamRendCom->num_freq_bands, num_channels_dir ); /*q29*/
                    pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- PATH2" );*/
                }
                ELSE
                {