Commit 93b07a6f authored by Adityaraj Jain's avatar Adityaraj Jain
Browse files

optimizations

parent 1ccc3c58
Loading
Loading
Loading
Loading
Loading
+32 −8
Original line number Diff line number Diff line
@@ -901,7 +901,7 @@ void cldfbAnalysis_ts_fx_fixed_q(
    const Word16 *ptr_pf_fx;
    Word16 ptr_pf_sf;
    Word32 *timeBuffer_fx, buffer_fx[( CLDFB_NO_CHANNELS_MAX * CLDFB_NO_COL_MAX ) + ( 9 * CLDFB_NO_CHANNELS_MAX )];
    Word16 offset, frameSize;
    Word16 offset, frameSize, gb, hr, shift;

    offset = sub( h_cldfb->p_filter_length, h_cldfb->no_channels );
    frameSize = i_mult( h_cldfb->no_channels, h_cldfb->no_col );
@@ -1071,10 +1071,33 @@ void cldfbAnalysis_ts_fx_fixed_q(
        *q_cldfb = sub( *q_cldfb, 2 );
        move16();

        gb = find_guarded_bits_fx( M1 );
        hr = L_norm_arr( iBuffer_fx, M1 );

        IF( LT_16( hr, gb ) )
        {
            scale_sig32( iBuffer_fx, M1, sub( hr, gb ) );
        }

        /* FFT of DCT IV */
        fft_cldfb_fx( iBuffer_fx, M2 );

        /* post modulation of DCT IV */
        IF( LT_16( hr, gb ) )
        {
            shift = sub( gb, hr );
            FOR( k = 0; k < M2; k++ )
            {
                /* do it inplace */
                /*cplxMult(&imagBuffer[2*k],&imagBuffer[M1-1-(2*k)],iBuffer[2*k],iBuffer[2*k+1],rot_vctr_re[k],rot_vctr_im[k]);*/
                imagBuffer_fx[2 * k] = L_shl( Msub_32_32( Mpy_32_32( iBuffer_fx[2 * k], rot_vctr_re_fx[k] ), iBuffer_fx[2 * k + 1], rot_vctr_im_fx[k] ), shift );                  // q - 5
                imagBuffer_fx[( M1 - 1 ) - ( k * 2 )] = L_shl( Madd_32_32( Mpy_32_32( iBuffer_fx[2 * k], rot_vctr_im_fx[k] ), iBuffer_fx[2 * k + 1], rot_vctr_re_fx[k] ), shift ); // q - 5
                move32();
                move32();
            }
        }
        ELSE
        {
            FOR( k = 0; k < M2; k++ )
            {
                /* do it inplace */
@@ -1084,6 +1107,7 @@ void cldfbAnalysis_ts_fx_fixed_q(
                move32();
                move32();
            }
        }

        IF( EQ_32( h_cldfb->prototype, CLDFB_PROTOTYPE_5_00MS ) )
        {
+10 −3
Original line number Diff line number Diff line
@@ -202,7 +202,14 @@ void sns_compute_scf_fx(
      -Q6 is for division with FDNS_NPTS and -Q1 is to reduce Q by one */
    mean = W_shl_sat_l( sum, -Q7 );                                   // q_out
    nf = Mpy_32_32( mean, 214748 /* powf( 10.0f, -4.0f ) in Q31 */ ); // q_out
    nf = L_max( nf, L_shl( 256, sub( q_out, 40 ) ) /* powf( 2.0f, -32.0f ) in Q40 */ ); // q_out

    IF( LE_32( nf, L_shl_sat( 256, sub( q_out, 40 ) ) ) ) /* powf( 2.0f, -32.0f ) in Q40 */
    {
        nf = 256;
        move32();
        q_out = 40;
        move16();
    }

    FOR( i = 0; i < FDNS_NPTS; i++ )
    {
+1 −0
Original line number Diff line number Diff line
@@ -84,4 +84,5 @@
//#define HARM_SCE_INIT
#define DIV32_OPT_NEWTON                               /* FhG: faster 32 by 32 bit division */ 
#define	MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE /* FhG: reduce WMOPS of Cy calculation in ivas_param_mc_param_est_enc_fx() by using 64 Bit addition. Obsoletes IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE. */
#define OUT_SYNTH_BUF_OPT_v1
#endif
+10 −1
Original line number Diff line number Diff line
@@ -3171,10 +3171,11 @@ void ivas_dirac_dec_render_sf_fx(
        test();
        IF( ( h_dirac_output_synthesis_params->use_onset_filters && ( NE_16( hDirAC->hConfig->dec_param_estim, TRUE ) && NE_16( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_GAIN_SHD ) ) ) )
        {
#ifndef OUT_SYNTH_BUF_OPT_v1
            Scale_sig32( h_dirac_output_synthesis_state->diffuse_power_factor_fx, h_dirac_output_synthesis_state->diff_dir_power_factor_len, sub( Q31, h_dirac_output_synthesis_state->diffuse_power_factor_q ) ); // Q31
            h_dirac_output_synthesis_state->diffuse_power_factor_q = Q31;
            move16();

#endif /* OUT_SYNTH_BUF_OPT_v1 */
            exp = getScaleFactor32( h_dirac_output_synthesis_state->cy_auto_diff_smooth_fx, i_mult( num_channels_dir, hSpatParamRendCom->num_freq_bands ) );
            scale_sig32( h_dirac_output_synthesis_state->cy_auto_diff_smooth_fx, i_mult( num_channels_dir, hSpatParamRendCom->num_freq_bands ), exp ); // h_dirac_output_synthesis_state->q_cy_auto_diff_smooth + exp
            h_dirac_output_synthesis_state->q_cy_auto_diff_smooth = add( h_dirac_output_synthesis_state->q_cy_auto_diff_smooth, exp );
@@ -3184,6 +3185,7 @@ void ivas_dirac_dec_render_sf_fx(
        test();
        IF( ( EQ_16( hDirAC->hConfig->dec_param_estim, TRUE ) && NE_16( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_GAIN_SHD ) ) )
        {
#ifndef OUT_SYNTH_BUF_OPT_v1
            scale_sig32( h_dirac_output_synthesis_state->direct_power_factor_fx, h_dirac_output_synthesis_state->diff_dir_power_factor_len, sub( Q31, h_dirac_output_synthesis_state->direct_power_factor_q ) ); // Q31
            h_dirac_output_synthesis_state->direct_power_factor_q = Q31;
            move16();
@@ -3199,6 +3201,7 @@ void ivas_dirac_dec_render_sf_fx(
            scale_sig32( h_dirac_output_synthesis_state->direct_responses_square_fx, i_mult( num_channels_dir, hSpatParamRendCom->num_freq_bands ), sub( Q31, h_dirac_output_synthesis_state->direct_responses_square_q ) ); // Q31
            h_dirac_output_synthesis_state->direct_responses_square_q = Q31;
            move16();
#endif /* OUT_SYNTH_BUF_OPT_v1 */

            exp = getScaleFactor32( h_dirac_output_synthesis_state->cy_auto_dir_smooth_fx, i_mult( num_channels_dir, hSpatParamRendCom->num_freq_bands ) );
            scale_sig32( h_dirac_output_synthesis_state->cy_auto_dir_smooth_fx, i_mult( num_channels_dir, hSpatParamRendCom->num_freq_bands ), exp ); // h_dirac_output_synthesis_state->q_cy_auto_dir_smooth, exp
@@ -3431,6 +3434,7 @@ void ivas_dirac_dec_render_sf_fx(
        {
            scale_sig32( hDirACRend->h_output_synthesis_psd_state.cy_cross_dir_smooth_prev_fx, size_ho, sub( Q26, hDirACRend->h_output_synthesis_psd_state.q_cy_cross_dir_smooth_prev ) ); // Q26
        }
#ifndef OUT_SYNTH_BUF_OPT_v1
        IF( NE_16( hDirACRend->h_output_synthesis_psd_state.direct_power_factor_q, Q31 ) )
        {
            Scale_sig32( hDirACRend->h_output_synthesis_psd_state.direct_power_factor_fx, h_dirac_output_synthesis_state->diff_dir_power_factor_len, sub( Q31, hDirACRend->h_output_synthesis_psd_state.direct_power_factor_q ) ); // Q31
@@ -3443,6 +3447,7 @@ void ivas_dirac_dec_render_sf_fx(
        {
            Scale_sig32( hDirACRend->h_output_synthesis_psd_state.diffuse_power_factor_fx, h_dirac_output_synthesis_state->diff_dir_power_factor_len, sub( Q31, hDirACRend->h_output_synthesis_psd_state.diffuse_power_factor_q ) ); // Q31
        }
#endif /* OUT_SYNTH_BUF_OPT_v1 */
        IF( NE_16( hDirACRend->h_output_synthesis_psd_state.q_cy_auto_diff_smooth, Q26 ) )
        {
            scale_sig32( hDirACRend->h_output_synthesis_psd_state.cy_auto_diff_smooth_fx, i_mult( hDirACRend->num_outputs_diff, hDirACRend->h_output_synthesis_psd_params.max_band_decorr ), sub( Q26, hDirACRend->h_output_synthesis_psd_state.q_cy_auto_diff_smooth ) ); // Q26
@@ -3514,6 +3519,7 @@ void ivas_dirac_dec_render_sf_fx(
            qualityBasedSmFactor_fx = L_mult( st_ivas->hMasa->data.dir_decode_quality_fx, st_ivas->hMasa->data.dir_decode_quality_fx ); /* (Q15, Q15) -> Q31 */
        }

#ifndef OUT_SYNTH_BUF_OPT_v1
        IF( NE_16( hDirACRend->h_output_synthesis_psd_state.direct_power_factor_q, Q31 ) )
        {
            Scale_sig32( hDirACRend->h_output_synthesis_psd_state.direct_power_factor_fx, h_dirac_output_synthesis_state->diff_dir_power_factor_len, sub( Q31, hDirACRend->h_output_synthesis_psd_state.direct_power_factor_q ) ); // Q31
@@ -3526,14 +3532,17 @@ void ivas_dirac_dec_render_sf_fx(
        {
            Scale_sig32( hDirACRend->h_output_synthesis_psd_state.diffuse_power_factor_fx, h_dirac_output_synthesis_state->diff_dir_power_factor_len, sub( Q31, hDirACRend->h_output_synthesis_psd_state.diffuse_power_factor_q ) ); // Q31
        }
#endif /* OUT_SYNTH_BUF_OPT_v1 */
        IF( NE_16( q_diffuseness_vector, Q31 ) )
        {
            Scale_sig32( diffuseness_vector_fx, hSpatParamRendCom->num_freq_bands, sub( Q31, q_diffuseness_vector ) ); // Q31
        }
#ifndef OUT_SYNTH_BUF_OPT_v1
        IF( NE_16( hDirACRend->h_output_synthesis_psd_state.direct_responses_square_q, Q31 ) )
        {
            Scale_sig32( hDirACRend->h_output_synthesis_psd_state.direct_responses_square_fx, i_mult( hDirACRend->num_outputs_dir, hSpatParamRendCom->num_freq_bands ), sub( Q31, hDirACRend->h_output_synthesis_psd_state.direct_responses_square_q ) ); // Q31
        }
#endif /* OUT_SYNTH_BUF_OPT_v1 */

#ifdef FIX_867_CLDFB_NRG_SCALE
        exp = L_norm_arr( reference_power_smooth_fx, s_min( hSpatParamRendCom->num_freq_bands, CLDFB_NO_CHANNELS_HALF ) );
+29 −2
Original line number Diff line number Diff line
@@ -2550,7 +2550,7 @@ ivas_error ivas_jbm_dec_flush_renderer_fx(
    DECODER_TC_BUFFER_HANDLE hTcBuffer;
    Word32 output_fx[MAX_OUTPUT_CHANNELS + MAX_NUM_OBJECTS][L_FRAME48k / MAX_PARAM_SPATIAL_SUBFRAMES];
    Word32 *p_output_fx[MAX_OUTPUT_CHANNELS + MAX_NUM_OBJECTS];
    Word16 nchan_in, nchan_out;
    Word16 nchan_in, nchan_out, gd_bits, n_chan_inp, i, shift;
    IF( !st_ivas->hDecoderConfig->Opt_tsm )
    {
        return IVAS_ERR_OK;
@@ -2637,17 +2637,44 @@ ivas_error ivas_jbm_dec_flush_renderer_fx(
                    /* Convert to CICPxx; used also for ISM->CICP19->binaural_room rendering */
                    set16_fx( st_ivas->hIsmRendererData->interpolator_fx, 32767, hTcBuffer->n_samples_granularity ); // 32767=1.0f in Q15

                    ivas_ism_render_sf_fx( st_ivas, p_output_fx, *nSamplesRendered );
                    ivas_ism_render_sf_fx( st_ivas, p_output_fx, hTcBuffer->n_samples_granularity );

                    st_ivas->hCrendWrapper->p_io_qfactor = &st_ivas->hCrendWrapper->io_qfactor;
                    *st_ivas->hCrendWrapper->p_io_qfactor = 11;
                    move16();

                    shift = MAX_16;
                    move16();
                    n_chan_inp = add( st_ivas->hIntSetup.nchan_out_woLFE, st_ivas->hIntSetup.num_lfe );

                    FOR( i = 0; i < n_chan_inp; i++ )
                    {
                        shift = s_min( shift, L_norm_arr( p_output_fx[i], hTcBuffer->n_samples_granularity ) );
                    }

                    gd_bits = sub( find_guarded_bits_fx( imult1616( hTcBuffer->subframe_nbslots[0], hTcBuffer->n_samples_granularity ) ), shift );

                    *st_ivas->hCrendWrapper->p_io_qfactor = sub( *st_ivas->hCrendWrapper->p_io_qfactor, gd_bits );
                    move16();

                    FOR( i = 0; i < n_chan_inp; i++ )
                    {
                        scale_sig32( p_output_fx[i], hTcBuffer->n_samples_granularity, sub( *st_ivas->hCrendWrapper->p_io_qfactor, Q11 ) ); // Q(*st_ivas->hCrendWrapper->p_io_qfactor)
                    }

                    IF( NE_32( ( error = ivas_rend_crendProcessSubframe( st_ivas->hCrendWrapper, IVAS_AUDIO_CONFIG_7_1_4, IVAS_AUDIO_CONFIG_BINAURAL_ROOM_IR, st_ivas->hDecoderConfig, NULL,
                                                                         NULL, NULL, st_ivas->hTcBuffer, p_output_fx, p_output_fx, hTcBuffer->n_samples_granularity, st_ivas->hDecoderConfig->output_Fs ) ),
                               IVAS_ERR_OK ) )
                    {
                        return error;
                    }

                    FOR( i = 0; i < n_chan_inp; i++ )
                    {
                        scale_sig32( p_output_fx[i], hTcBuffer->n_samples_granularity, sub( Q11, *st_ivas->hCrendWrapper->p_io_qfactor ) ); // Q(11)
                    }
                    *st_ivas->hCrendWrapper->p_io_qfactor = Q11;
                    move16();
                }
            }
            ELSE
Loading