Commit 6e315e53 authored by Sandesh Venkatesh's avatar Sandesh Venkatesh
Browse files

Merge branch '1072-complexity-high-complexity-overhead-for-osba-decoding' into 'main'

Resolve "Complexity: High complexity overhead for OSBA Decoding"

Closes #1072

See merge request !1091
parents 9ae2ae46 c9e2364c
Loading
Loading
Loading
Loading
+48 −0
Original line number Diff line number Diff line
@@ -967,6 +967,28 @@ void computeDiffuseness_fixed(

        q_tmp = add( q_factor_energy[i], min_q_shift1 );


#ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESS
        Word16 shift_q = sub( q_tmp, q_ene );
        Word32 shiftEquiv;
        Word16 shift_qtotal;
        if ( shift_q < 0 )
        {
            shiftEquiv = L_lshl( 0x80000000, shift_q );
        }
        if ( shift_q >= 0 )
        {
            shiftEquiv = L_add( 0x7FFFFFFF, 0 );
        }
        shift_qtotal = sub( min_q_shift1, s_max( shift_q, 0 ) );

        FOR( k = 0; k < num_freq_bands; k++ )
        {
            tmp = L_shl( p_tmp_c[k], shift_qtotal );
            energy_slow[k] = Madd_32_32_r( tmp, energy_slow[k], shiftEquiv );
            move32();
        }
#else
        Word16 shift_q = sub( q_tmp, q_ene );
        IF( shift_q < 0 )
        {
@@ -986,6 +1008,8 @@ void computeDiffuseness_fixed(
                move32();
            }
        }
#endif


        q_ene = s_min( q_ene, q_tmp );

@@ -993,6 +1017,28 @@ void computeDiffuseness_fixed(
        q_tmp = add( q_factor_intensity[i], min_q_shift2 );

        shift_q = sub( q_tmp, q_intensity );
#ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESS
        if ( shift_q < 0 )
        {
            shiftEquiv = L_lshl( 0x80000000, shift_q );
        }
        if ( shift_q >= 0 )
        {
            shiftEquiv = L_lshl( 0x7FFFFFFF, 0 );
        }
        shift_qtotal = sub( min_q_shift2, s_max( shift_q, 0 ) );

        FOR( j = 0; j < DIRAC_NUM_DIMS; ++j )
        {
            p_tmp = buffer_intensity[j][i];
            FOR( k = 0; k < num_freq_bands; k++ )
            {
                tmp = L_shl( p_tmp[k], shift_qtotal );
                intensity_slow[j * num_freq_bands + k] = Madd_32_32_r( tmp, intensity_slow[j * num_freq_bands + k], shiftEquiv );
                move32();
            }
        }
#else
        IF( shift_q > 0 )
        {
            FOR( j = 0; j < DIRAC_NUM_DIMS; ++j )
@@ -1019,6 +1065,8 @@ void computeDiffuseness_fixed(
                }
            }
        }
#endif

        q_intensity = s_min( q_intensity, q_tmp );
    }

lib_com/options.h

100755 → 100644
+2 −1
Original line number Diff line number Diff line
@@ -165,6 +165,7 @@
#define FIX_ISSUE_1247
#define NONBE_FIX_1087_OOB_SBA_DTX_RS                   /* VA: issue 1087: Extend the length of the buffer for MCT decoding to avoid out-of-bound writing in SBA SID bitrate switching decoding */
#define FIX_1285_DECODER_CRASH

#define FIX_1072_SPEEDUP_gainpanning            /* FhG: Minor WMOPS tuning, nonbe */
#define FIX_1072_SPEEDUP_COMPUTEDIFUSENESS      /* FhG: Minor WMOPS tuning, nonbe */
#define FIX_1320_LOWRATE_ACELP
#endif
+53 −0
Original line number Diff line number Diff line
@@ -1081,6 +1081,16 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(
            Word16 temp_q = sub( add( h_dirac_output_synthesis_state->direct_power_factor_q, h_dirac_output_synthesis_state->direct_responses_q ), 31 );
            IF( LT_16( temp_q, h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) )
            {
#ifdef FIX_1072_SPEEDUP_gainpanning /*is there any difference in any bitstream?*/
                Word16 temp_q1 = sub( temp_q, h_dirac_output_synthesis_state->q_cy_cross_dir_smooth );
                FOR( Word16 kk = 0; kk < tmp16; kk++ )
                {
                    h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[kk] = L_shl( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[kk], temp_q1 ); /*h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ->temp_q*/
                    move32();
                }
                h_dirac_output_synthesis_state->q_cy_cross_dir_smooth = temp_q;
                move16();
#else
                FOR( Word16 kk = 0; kk < tmp16; kk++ )
                {
                    h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[kk] = L_shl( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[kk], sub( temp_q, h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ); /*h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ->temp_q*/
@@ -1088,7 +1098,49 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(
                }
                h_dirac_output_synthesis_state->q_cy_cross_dir_smooth = temp_q;
                move16();
#endif
            }
#ifdef FIX_1072_SPEEDUP_gainpanning
            Word16 temp_q1 = sub( h_dirac_output_synthesis_state->q_cy_cross_dir_smooth, temp_q );
            FOR( ch_idx = s_min( 4, nchan_transport ); ch_idx < num_channels_dir; ch_idx++ )
            {
                IF( NE_16( temp_q, h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) )
                {
                    Word16 i;
                    Word32 aux;
                    IF( temp_q1 < 0 )
                    {
                        Word32 temp_q1_equiv = L_lshl( 0x80000000, temp_q1 );
                        FOR( i = 0; i < num_freq_bands; i++ )
                        {
                            aux = Mpy_32_32( h_dirac_output_synthesis_state->direct_power_factor_fx[i], h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + i] );
                            h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i] = Madd_32_32( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i], aux, temp_q1_equiv );
                            move32();
                        }
                    }
                    ELSE
                    {
                        FOR( i = 0; i < num_freq_bands; i++ )
                        {
                            aux = Mpy_32_32( h_dirac_output_synthesis_state->direct_power_factor_fx[i], h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + i] );
                            aux = L_shl( aux, temp_q1 );
                            h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i] = L_add( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i], aux );
                            move32();
                        }
                    }
                }
                ELSE
                {
                    Word16 i;
                    FOR( i = 0; i < num_freq_bands; i++ )
                    {
                        h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i] = Madd_32_32( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i], h_dirac_output_synthesis_state->direct_power_factor_fx[i], h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + i] );
                        move32();
                    }
                }
            }

#else
            FOR( ch_idx = s_min( 4, nchan_transport ); ch_idx < num_channels_dir; ch_idx++ )
            {
                v_mult_fixed( h_dirac_output_synthesis_state->direct_power_factor_fx,
@@ -1106,6 +1158,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(
                             &h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands],
                             num_freq_bands, 0 ); /*Q(h_dirac_output_synthesis_state->q_cy_cross_dir_smooth)*/
            }
#endif

            /*Diffuse gain*/
            FOR( ch_idx = s_min( 4, nchan_transport ); ch_idx < num_channels_diff; ch_idx++ )