Commit abc297aa authored by Fabian Bauer's avatar Fabian Bauer
Browse files

some more mods to FIX1072_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot

parent 1bb7dd5c
Loading
Loading
Loading
Loading
+41 −15
Original line number Diff line number Diff line
@@ -821,8 +821,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(
                Word16 k;
                IF( ch_idx != 0 )
                {
                    push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop A <<<<<-|" );
                    ;
                    Word32 a, c;
                    Word16 b, b_exp, sqr_exp, q_diff_aab, q_diff_c;
                    Word32 mpy_a_a_b, mpy_diff_c, mpy_diff_aab;
@@ -912,7 +910,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(
                        }
                    }
                    c = Madd_32_16( ONE_IN_Q27 /*1 Q27*/, L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_fx, ONE_IN_Q27 /*1 Q27*/ ), 5461 ); /*Diffuseness modellling nrg compensation*/ /* 1.0 / 6.0  = 5461 in Q15*/ /*Q27*/
                    pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop A <<<<<-|" );*/

                    push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop B <<<<<-|" );
#ifdef FIX1072_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot
                    FOR( ; k < num_freq_bands; k++ )
@@ -926,16 +924,20 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(
                        }
                        ELSE
                        {
                            Word16 diff_c_exp;
                            Word16 diff_aab_exp;
                            IF( reference_power[k + ( ch_idx + 1 ) * num_freq_bands] == 0 )
                            {
                                mpy_a_a_b = Mpy_32_32( a, a );                                                             // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31
                                mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31
                                mpy_diff_c = Mpy_32_32( diffuseness[k], c );                                               // Q = q_diffuseness - 4
                                //q_diff_aab = add( h_dirac_output_synthesis_state->direct_responses_q, add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) );
                                q_diff_aab = sub( add( h_dirac_output_synthesis_state->direct_responses_q, add( h_dirac_output_synthesis_state->direct_responses_q, q_diffuseness ) ), 62 );
                                q_diff_c = sub( q_diffuseness, 4 );
                                //q_diff_aab = sub( add( h_dirac_output_synthesis_state->direct_responses_q, add( h_dirac_output_synthesis_state->direct_responses_q, q_diffuseness ) ), 62 );
                                diff_aab_exp = sub( 31 + 62, add( h_dirac_output_synthesis_state->direct_responses_q, add( h_dirac_output_synthesis_state->direct_responses_q, q_diffuseness ) ) );
                                //q_diff_c = sub( q_diffuseness, 4 );
                                diff_c_exp = sub( 31 + 4, q_diffuseness );

                                sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, sub( 31, q_diff_c ), mpy_diff_aab, sub( 31, q_diff_aab ), &sqr_exp ); /*q(31-sqr_exp)*/
                                sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, diff_c_exp, mpy_diff_aab, diff_aab_exp, &sqr_exp ); /*q(31-sqr_exp)*/
                            }
                            ELSE
                            {
@@ -945,16 +947,44 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(
                                mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31
                                mpy_diff_c = Mpy_32_32( diffuseness[k], c );                                               // Q = q_diffuseness - 4
                                //q_diff_aab = add( add(h_dirac_output_synthesis_state->direct_responses_q , sub( sub( 15, b_exp ), 15 )), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) );
                                q_diff_aab = add( sub( h_dirac_output_synthesis_state->direct_responses_q, b_exp ), ( sub( add( h_dirac_output_synthesis_state->direct_responses_q, q_diffuseness ), 62 ) ) );
                                q_diff_c = sub( q_diffuseness, 4 );
                                //q_diff_aab = add( sub( h_dirac_output_synthesis_state->direct_responses_q, b_exp ), ( sub( add( h_dirac_output_synthesis_state->direct_responses_q, q_diffuseness ), 62 ) ) );
                                diff_aab_exp = sub( sub( add( sub( 31 + 62, h_dirac_output_synthesis_state->direct_responses_q ), b_exp ), h_dirac_output_synthesis_state->direct_responses_q ), q_diffuseness );
                                //q_diff_c = sub( q_diffuseness, 4 );
                                diff_c_exp = sub( 31 + 4, q_diffuseness );

                                sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, sub( 31, q_diff_c ), mpy_diff_aab, sub( 31, q_diff_aab ), &sqr_exp ); /*q(31-sqr_exp)*/
                                sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, diff_c_exp, mpy_diff_aab, diff_aab_exp, &sqr_exp ); /*q(31-sqr_exp)*/

                            }
                        }
                        sqr = Sqrt32( sqr_inp, &sqr_exp ); /*Q(31-sqr_exp)*/
                        sqr = L_shr( sqr, 2 );             /*Q(31-sqr_exp)*/
                    }

                    IF( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] != 0 )
                    {
                        IF( LT_16( sub( 31, sqr_exp ), h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) )
                        {
                            h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_shr( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sub( h_dirac_output_synthesis_state->q_cy_cross_dir_smooth, sub( 31, sqr_exp ) ) ); /*h_dirac_output_synthesis_state->q_cy_cross_dir_smooth->Q( 31- sqr_exp )*/
                            move32();
                            Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp );
                            move16();
                        }
                        ELSE
                        {
                            sqr = L_shr( sqr, sub( sub( 31, sqr_exp ), h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ); /*Q(31- sqr_exp)->h_dirac_output_synthesis_state->q_cy_cross_dir_smooth*/
                            Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = h_dirac_output_synthesis_state->q_cy_cross_dir_smooth;
                            move16();
                        }
                        h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_add( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sqr ); /*Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k]*/
                        move32();
                    }
                    ELSE
                    {
                        h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_add( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sqr ); /*Q(31- sqr_exp)*/
                        move32();
                        Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp );
                        move16();
                    }
#else
                    FOR( ; k < num_freq_bands; k++ )
                    {
@@ -1011,7 +1041,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(
                        sqr = Sqrt32( sqr_inp, &sqr_exp ); /*Q(31-sqr_exp)*/
                        sqr = L_shr( sqr, 2 );             /*Q(31-sqr_exp)*/
                    }
#endif

                    IF( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] != 0 )
                    {
@@ -1038,12 +1067,13 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(
                        Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp );
                        move16();
                    }
#endif


                    pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop B <<<<<-|" );*/
                }
                ELSE
                {
                    push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop C <<<<<-|" );
                    Word32 sqr_inp, mpy_diff, sqr;
                    Word16 sqr_exp;
                    /*Diffuseness modellling nrg compensation*/
@@ -1081,8 +1111,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(
                            move16();
                        }
                    }
                    pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop C <<<<<<-|" );*/
                    push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop D <<<<<<-|" );
                    FOR( ; k < num_freq_bands; k++ )
                    {
                        mpy_diff = Mpy_32_32( diffuseness[k], L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_decorr_fx, ONE_IN_Q29 /*1 Q29*/ ) ); // Q = q_diffuseness - 1
@@ -1116,8 +1144,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(
                            move16();
                        }
                    }

                    pop_wmops();/*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop D <<<<<-|" );/*/
                }
            }
            pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop <<<<-|" );/*/