Commit 69a34489 authored by Arthur Tritthart's avatar Arthur Tritthart
Browse files

Changes made to improve the function:

- Replaced some L_add_sat+Mpy_32_16_1 using Word16 by Madd_32_32 using Word32 (better accuracy)
- Computed often used shift operand only once outside the loop
- divided cldfb loop with inner IF-statements into 2 loops without inner IF-statements
- replaced smoothing constant 16-Bit values for 1/6 and 1/20 by 32-Bit values (better accuracy)
The output file is slightly different (9e-5), the WMOPS are reduced by 34 (max. 150 -> 116).
parent 552563cd
Loading
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -55,7 +55,7 @@

#define SUPPORT_JBM_TRACEFILE                   /* Support for JBM tracefile, which is needed for 3GPP objective/subjective testing, but not relevant for real-world implementations */

/*#define WMOPS*/                                   /* Activate complexity and memory counters */
/*#define WMOPSi*/                                   /* Activate complexity and memory counters */
#ifdef WMOPS
/*#define WMOPS_PER_FRAME*/                     /* Output per-frame complexity (writes one float value per frame to the file "wmops_analysis") */
/*#define WMOPS_DETAIL*/                        /* Output detailed complexity printout for every function. Increases runtime overhead */
@@ -91,4 +91,5 @@
#define FIX_1054_IF_ELSE_CMPLX                          /* VA: Fix 1054 incorrect counting of complexity when ELSE-IF sequence is encoutered in two functions */
#define FIX_1052_COPY_CMPLX_DISCREPANCY       /* VA: modify IF-ELSE statements used in Copy*() functions to avoid dependency on x[] and y[] in RAM */
#define FIX_1049_SHR_RO_COMPLEXITY              /* VA: fix for issue 1049: incorrect counting of complexity in the shr_ro() function */
#define FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF       /* FhG: fix for issue 1101: complexity of spar dec upmixer */
#endif
+94 −10
Original line number Diff line number Diff line
@@ -1160,6 +1160,8 @@ void ivas_spar_get_parameters_fx(

    split_band = SPAR_DIRAC_SPLIT_START_BAND;
    move16();
    Word16 add_weight_fx = sub( MAX_WORD16, weight_fx );
    Word16 add_weight_20ms_fx = sub( MAX_WORD16, weight_20ms_fx );
    FOR( spar_band = 0; spar_band < num_spar_bands; spar_band++ )
    {
        FOR( out_ch = 0; out_ch < num_ch_out; out_ch++ )
@@ -1175,8 +1177,8 @@ void ivas_spar_get_parameters_fx(
                    IF( GT_16( hSpar->i_subframe, 3 ) )
                    {

                        par_mat_fx[out_ch][in_ch][spar_band] = L_add_sat( Mpy_32_16_1( hSpar->hMdDec->mixer_mat_prev_fx[ts0][out_ch][in_ch][spar_band], sub( MAX_WORD16, weight_fx ) ),
                                                                          Mpy_32_16_1( hSpar->hMdDec->mixer_mat_prev_fx[ts1][out_ch][in_ch][spar_band], weight_fx ) ); /*hSpar->hMdDec->Q_mixer_mat*/
                        par_mat_fx[out_ch][in_ch][spar_band] = Madd_32_16( Mpy_32_16_1( hSpar->hMdDec->mixer_mat_prev_fx[ts1][out_ch][in_ch][spar_band], weight_fx ),
                                                                                        hSpar->hMdDec->mixer_mat_prev_fx[ts0][out_ch][in_ch][spar_band], add_weight_fx );
                        move32();
                    }
                    ELSE
@@ -1193,7 +1195,8 @@ void ivas_spar_get_parameters_fx(
                    /* 20ms Transport channel reconstruction with matching encoder/decoder processing */
                    Word16 prev_idx = SPAR_DIRAC_SPLIT_START_BAND < IVAS_MAX_NUM_BANDS ? 1 : 0; /* if SPAR_DIRAC_SPLIT_START_BAND == IVAS_MAX_NUM_BANDS, then the sub-frame mixer_mat delay line is not active */
                    move16();
                    par_mat_fx[out_ch][in_ch][spar_band] = L_add_sat( Mpy_32_16_1( hSpar->hMdDec->mixer_mat_prev_fx[prev_idx][out_ch][in_ch][spar_band], sub( MAX_WORD16, weight_20ms_fx ) ), Mpy_32_16_1( hSpar->hMdDec->mixer_mat_fx[out_ch][in_ch][spar_band], weight_20ms_fx ) ); /*hSpar->hMdDec->Q_mixer_mat*/
                    par_mat_fx[out_ch][in_ch][spar_band] = Madd_32_16( Mpy_32_16_1( hSpar->hMdDec->mixer_mat_prev_fx[prev_idx][out_ch][in_ch][spar_band], add_weight_20ms_fx ),
                                                                                    hSpar->hMdDec->mixer_mat_fx[out_ch][in_ch][spar_band], weight_20ms_fx); /*hSpar->hMdDec->Q_mixer_mat*/
                    move32();
                }
            }
@@ -1353,9 +1356,9 @@ static void ivas_spar_calc_smooth_facs_fx(
            smooth_long_avg_fx[b] = L_add( smooth_long_avg_fx[b], smooth_buf_fx[b][i] ); // Q0
            move32();
        }
        smooth_short_avg_fx[b] = Mpy_32_16_1( smooth_short_avg_fx[b], 5461 /*(1/6 in Q15)*/ ); // Q0
        smooth_short_avg_fx[b] = Mpy_32_32( smooth_short_avg_fx[b], 357913941 /*(1/6 in Q31)*/ ); // Q0
        move32();
        smooth_long_avg_fx[b] = Mpy_32_16_1( smooth_long_avg_fx[b], 1639 /*(1/20 in Q15)*/ ); // Q0
        smooth_long_avg_fx[b] = Mpy_32_32( smooth_long_avg_fx[b], 107374182 /*(1/20 in Q31)*/ ); // Q0
        move32();

        /* calculate smoothing factor based on energy averages */
@@ -1843,6 +1846,8 @@ void ivas_spar_dec_upmixer_sf_fx(
        ivas_spar_calc_smooth_facs_fx( cldfb_in_ts_re_fx[0], cldfb_in_ts_im_fx[0], q_cldfb, num_spar_bands, hSpar->subframe_nbslots[hSpar->subframes_rendered],
                                       hSpar->subframes_rendered == 0, &hSpar->hFbMixer->pFb->fb_bin_to_band, hSpar->hMdDec->smooth_fac_fx, hSpar->hMdDec->smooth_buf_fx );
    }

    Word16 sh_l = sub( 31, q1 );
    FOR( ts = 0; ts < hSpar->subframe_nbslots[hSpar->subframes_rendered]; ts++ )
    {
        md_idx = hSpar->render_to_md_map[( ts + slot_idx_start )]; /*Q0*/
@@ -1870,7 +1875,7 @@ void ivas_spar_dec_upmixer_sf_fx(
                }
            }
        }

#ifndef FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF
        FOR( cldfb_band = 0; cldfb_band < num_cldfb_bands; cldfb_band++ )
        {
            Word32 out_re_fx[IVAS_SPAR_MAX_CH];
@@ -1902,8 +1907,8 @@ void ivas_spar_dec_upmixer_sf_fx(
                            FOR( spar_band = bin2band->p_spar_start_bands[cldfb_band]; spar_band < num_spar_bands; spar_band++ )
                            {
                                /* accumulate contributions from all SPAR bands */
                                Word16 tmp = extract_l( L_shr( bin2band->pp_cldfb_weights_per_spar_band_fx[cldfb_band][spar_band], 7 ) ); /*Q15*/
                                cldfb_par_fx = L_add_sat( cldfb_par_fx, Mpy_32_16_1( mixer_mat_fx[out_ch][in_ch][spar_band], tmp ) );     /*q1*/
                                Word32 tmp = L_shl( bin2band->pp_cldfb_weights_per_spar_band_fx[cldfb_band][spar_band], 9 ); /*Q22 -> Q31*/
                                cldfb_par_fx = Madd_32_32( cldfb_par_fx, mixer_mat_fx[out_ch][in_ch][spar_band], tmp );                                                                                                                                                /*q1*/
                            }
                        }

@@ -1918,13 +1923,92 @@ void ivas_spar_dec_upmixer_sf_fx(
            /*update CLDFB data with the parameter-modified data*/
            FOR( out_ch = 0; out_ch < numch_out; out_ch++ )
            {
                cldfb_in_ts_re_fx[out_ch][ts][cldfb_band] = L_shl( out_re_fx[out_ch], sub( 31, q1 ) ); /*Q=6*/
                cldfb_in_ts_re_fx[out_ch][ts][cldfb_band] = L_shl( out_re_fx[out_ch], sh_l ); /*Q=6*/
                move32();
                cldfb_in_ts_im_fx[out_ch][ts][cldfb_band] = L_shl( out_im_fx[out_ch], sh_l ); /*Q=6*/
                move32();
            }
        }
#else
        /* Note: This version splits the cldfb band loop into 2 loops, removing some inner-loop IF_statements */
        Word16 min_cldf_band = s_min( CLDFB_PAR_WEIGHT_START_BAND, num_cldfb_bands );
        Word32 out_re_fx[IVAS_SPAR_MAX_CH];
        Word32 out_im_fx[IVAS_SPAR_MAX_CH];
        Word32 cldfb_par_fx; /*q1*/
        ivas_fb_bin_to_band_data_t *bin2band = &hSpar->hFbMixer->pFb->fb_bin_to_band;

        /* First loop from cldfb_band=0 till min_cldf_band (CLDFB_PAR_WEIGHT_START_BAND) */
        FOR( cldfb_band = 0; cldfb_band < min_cldf_band; cldfb_band++ )
        {
            spar_band = bin2band->p_cldfb_map_to_spar_band[cldfb_band]; /*Q0*/
            move16();
            FOR( out_ch = 0; out_ch < numch_out; out_ch++ )
            {
                out_re_fx[out_ch] = 0;
                move32();
                out_im_fx[out_ch] = 0;
                move32();
                FOR( in_ch = 0; in_ch < numch_in; in_ch++ )
                {
                    IF( b_skip_mat[out_ch][in_ch] == 0 )
                    {
                        cldfb_par_fx = mixer_mat_fx[out_ch][in_ch][spar_band]; /*q1*/
                        move32();
                        out_re_fx[out_ch] = Madd_32_32( out_re_fx[out_ch], cldfb_in_ts_re_fx[in_ch][ts][cldfb_band], cldfb_par_fx ); /*q1-25*/
                        move32();
                        out_im_fx[out_ch] = Madd_32_32( out_im_fx[out_ch], cldfb_in_ts_im_fx[in_ch][ts][cldfb_band], cldfb_par_fx ); /*q1-25*/
                        move32();
                    }
                }
            }
            /*update CLDFB data with the parameter-modified data*/
            FOR( out_ch = 0; out_ch < numch_out; out_ch++ )
            {
                cldfb_in_ts_re_fx[out_ch][ts][cldfb_band] = L_shl( out_re_fx[out_ch], sh_l ); /*Q=6*/
                move32();
                cldfb_in_ts_im_fx[out_ch][ts][cldfb_band] = L_shl( out_im_fx[out_ch], sub( 31, q1 ) ); /*Q=6*/
                cldfb_in_ts_im_fx[out_ch][ts][cldfb_band] = L_shl( out_im_fx[out_ch], sh_l ); /*Q=6*/
                move32();
            }
        }


        /* Second loop from min_cldf_band (CLDFB_PAR_WEIGHT_START_BAND) till num_cldfb_bands */
        FOR( ; cldfb_band < num_cldfb_bands; cldfb_band++ )
        {
            FOR( out_ch = 0; out_ch < numch_out; out_ch++ )
            {
                Word32 Out_re_fx = L_add(0,0);
                Word32 Out_im_fx = L_add(0,0);
                FOR( in_ch = 0; in_ch < numch_in; in_ch++ )
                {
                    IF( b_skip_mat[out_ch][in_ch] == 0 )
                    {
                        cldfb_par_fx = 0;
                        move32();
                        FOR( spar_band = bin2band->p_spar_start_bands[cldfb_band]; spar_band < num_spar_bands; spar_band++ )
                        {
                            /* accumulate contributions from all SPAR bands */
                            Word32 tmp = L_shl( bin2band->pp_cldfb_weights_per_spar_band_fx[cldfb_band][spar_band], 9 ); /*Q22 -> Q31*/
                            cldfb_par_fx = Madd_32_32( cldfb_par_fx, mixer_mat_fx[out_ch][in_ch][spar_band], tmp );      /*q1*/
                        }
                        Out_re_fx = Madd_32_32( Out_re_fx, cldfb_in_ts_re_fx[in_ch][ts][cldfb_band], cldfb_par_fx ); /*q1-25*/
                        Out_im_fx = Madd_32_32( Out_im_fx, cldfb_in_ts_im_fx[in_ch][ts][cldfb_band], cldfb_par_fx ); /*q1-25*/
                    }
                }
                out_re_fx[out_ch] = Out_re_fx;
                out_im_fx[out_ch] = Out_im_fx;
            }

            /*update CLDFB data with the parameter-modified data*/
            FOR( out_ch = 0; out_ch < numch_out; out_ch++ )
            {
                cldfb_in_ts_re_fx[out_ch][ts][cldfb_band] = L_shl( out_re_fx[out_ch], sh_l ); /*Q=6*/
                move32();
                cldfb_in_ts_im_fx[out_ch][ts][cldfb_band] = L_shl( out_im_fx[out_ch], sh_l ); /*Q=6*/
                move32();
            }
        }
#endif
        test();
        IF( ( EQ_16( ( add( add( slot_idx_start, ts ), 1 ) ), hSpar->num_slots ) ) || ( NE_16( ( shr( md_idx, 2 ) /* md_idx / JBM_CLDFB_SLOTS_IN_SUBFRAME */ ), ( hSpar->render_to_md_map[( ( slot_idx_start + ts ) + 1 )] / JBM_CLDFB_SLOTS_IN_SUBFRAME /*It's value is 4*/ ) ) ) )
        {