Commit 4491d00b authored by Sandesh Venkatesh's avatar Sandesh Venkatesh
Browse files

Merge branch '1101-IVAS-SPAR-DEC-UPMIXER-SF' into 'main'

Optimizations to SPAR decoder functions

Closes #1101

See merge request !871
parents d57385c1 210d6205
Loading
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -3836,7 +3836,11 @@ void ivas_compute_spar_params_fx(
            }
            ELSE
            {
#ifdef FIX_11_1_IVAS_SPAR_DEC_UPMIXER_SF_RND_COEFFS
                hSparMd->band_coeffs[b + ( i_ts * IVAS_MAX_NUM_BANDS )].pred_re_fx[i] = L_shr_r( hSparMd->band_coeffs[b + ( i_ts * IVAS_MAX_NUM_BANDS )].pred_re_fx[i], sub( tmp, 22 ) ); // q22
#else
                hSparMd->band_coeffs[b + ( i_ts * IVAS_MAX_NUM_BANDS )].pred_re_fx[i] = L_shr( hSparMd->band_coeffs[b + ( i_ts * IVAS_MAX_NUM_BANDS )].pred_re_fx[i], sub( tmp, 22 ) ); // q22
#endif
                move32();
            }
        }

lib_com/options.h

100644 → 100755
+7 −0
Original line number Diff line number Diff line
@@ -139,6 +139,13 @@
#define FIX_ISSUE_1230                          /* Ittiam: Fix for issue 1230: Basop Enc audible differences and distortion @16kbps */
#define NONBE_1211_DTX_BR_SWITCHING             /* VA: port float issue 1211: fix crash in MASA DTX bitrate switching */
#define FIX_1189_GSC_IVAS_OMASA                 /* VA: Fix for issue 1189: Bitstream desynchornization due to reading/writing of the GSC_IVAS_mode parameter */
#define FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF       /* FhG: fix for issue 1101: complexity of spar dec upmixer */
/* Note: each compile switch (FIX_1101_...) is independent from the other ones */
#define FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF_USE_MADD_ADD_WEIGHTS  /* FhG: Defines 1.0f-weight variables, uses Madd operation instead of L_add_sat */
#define FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF_SPLIT_LOOPS           /* FhG: Splits single loop with IF-statements into two low-complex loops */
#define FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF_HQ_CONSTANTS          /* FhG: IMPROVE PRECISION: Uses 1/6 and 1/20 in full-precise Q31 constants instead of Q15 */
#define FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF_USE_UNIQUE_SHL        /* FhG: Uses unique shift amount in each loop iteration */
#define FIX_11_1_IVAS_SPAR_DEC_UPMIXER_SF_RND_COEFFS            /* FhG  ivas_spar_com.c: Zeroes very small negative coeffs via L_shr_r (was L_shr) */
#define FIX_ISSUE_1237                          /* VA: replacement of Copy_Scale_sig_16_32_DEPREC() that are doing 16 bits left shift by Copy_Scale_sig_16_32_no_sat() */
#define FIX_ISSUE_1237_KEEP_EVS_BE              /* VA: Fix to keep EVS bitexactness to 26.444 */
#endif
+128 −2
Original line number Diff line number Diff line
@@ -1160,6 +1160,10 @@ void ivas_spar_get_parameters_fx(

    split_band = SPAR_DIRAC_SPLIT_START_BAND;
    move16();
#ifdef FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF_USE_MADD_ADD_WEIGHTS
    Word16 add_weight_fx = sub( MAX_WORD16, weight_fx );
    Word16 add_weight_20ms_fx = sub( MAX_WORD16, weight_20ms_fx );
#endif
    FOR( spar_band = 0; spar_band < num_spar_bands; spar_band++ )
    {
        FOR( out_ch = 0; out_ch < num_ch_out; out_ch++ )
@@ -1174,9 +1178,13 @@ void ivas_spar_get_parameters_fx(
                {
                    IF( GT_16( hSpar->i_subframe, 3 ) )
                    {

#ifndef FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF_USE_MADD_ADD_WEIGHTS
                        par_mat_fx[out_ch][in_ch][spar_band] = L_add_sat( Mpy_32_16_1( hSpar->hMdDec->mixer_mat_prev_fx[ts0][out_ch][in_ch][spar_band], sub( MAX_WORD16, weight_fx ) ),
                                                                          Mpy_32_16_1( hSpar->hMdDec->mixer_mat_prev_fx[ts1][out_ch][in_ch][spar_band], weight_fx ) ); /*hSpar->hMdDec->Q_mixer_mat*/
#else
                        par_mat_fx[out_ch][in_ch][spar_band] = Madd_32_16( Mpy_32_16_1( hSpar->hMdDec->mixer_mat_prev_fx[ts1][out_ch][in_ch][spar_band], weight_fx ),
                                                                           hSpar->hMdDec->mixer_mat_prev_fx[ts0][out_ch][in_ch][spar_band], add_weight_fx );
#endif
                        move32();
                    }
                    ELSE
@@ -1193,7 +1201,12 @@ void ivas_spar_get_parameters_fx(
                    /* 20ms Transport channel reconstruction with matching encoder/decoder processing */
                    Word16 prev_idx = SPAR_DIRAC_SPLIT_START_BAND < IVAS_MAX_NUM_BANDS ? 1 : 0; /* if SPAR_DIRAC_SPLIT_START_BAND == IVAS_MAX_NUM_BANDS, then the sub-frame mixer_mat delay line is not active */
                    move16();
#ifndef FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF_USE_MADD_ADD_WEIGHTS
                    par_mat_fx[out_ch][in_ch][spar_band] = L_add_sat( Mpy_32_16_1( hSpar->hMdDec->mixer_mat_prev_fx[prev_idx][out_ch][in_ch][spar_band], sub( MAX_WORD16, weight_20ms_fx ) ), Mpy_32_16_1( hSpar->hMdDec->mixer_mat_fx[out_ch][in_ch][spar_band], weight_20ms_fx ) ); /*hSpar->hMdDec->Q_mixer_mat*/
#else
                    par_mat_fx[out_ch][in_ch][spar_band] = Madd_32_16( Mpy_32_16_1( hSpar->hMdDec->mixer_mat_prev_fx[prev_idx][out_ch][in_ch][spar_band], add_weight_20ms_fx ),
                                                                       hSpar->hMdDec->mixer_mat_fx[out_ch][in_ch][spar_band], weight_20ms_fx ); /*hSpar->hMdDec->Q_mixer_mat*/
#endif
                    move32();
                }
            }
@@ -1353,10 +1366,17 @@ static void ivas_spar_calc_smooth_facs_fx(
            smooth_long_avg_fx[b] = L_add( smooth_long_avg_fx[b], smooth_buf_fx[b][i] ); // Q0
            move32();
        }
#ifndef FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF_HQ_CONSTANTS
        smooth_short_avg_fx[b] = Mpy_32_16_1( smooth_short_avg_fx[b], 5461 /*(1/6 in Q15)*/ ); // Q0
        move32();
        smooth_long_avg_fx[b] = Mpy_32_16_1( smooth_long_avg_fx[b], 1639 /*(1/20 in Q15)*/ ); // Q0
        move32();
#else
        smooth_short_avg_fx[b] = Mpy_32_32( smooth_short_avg_fx[b], 357913941 /*(1/6 in Q31)*/ );                                               // Q0
        move32();
        smooth_long_avg_fx[b] = Mpy_32_32( smooth_long_avg_fx[b], 107374182 /*(1/20 in Q31)*/ ); // Q0
        move32();
#endif

        /* calculate smoothing factor based on energy averages */
        /* reduce factor for higher short-term energy */
@@ -1846,6 +1866,9 @@ void ivas_spar_dec_upmixer_sf_fx(
        ivas_spar_calc_smooth_facs_fx( cldfb_in_ts_re_fx[0], cldfb_in_ts_im_fx[0], q_cldfb, num_spar_bands, hSpar->subframe_nbslots[hSpar->subframes_rendered],
                                       hSpar->subframes_rendered == 0, &hSpar->hFbMixer->pFb->fb_bin_to_band, hSpar->hMdDec->smooth_fac_fx, hSpar->hMdDec->smooth_buf_fx );
    }
#ifdef FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF_USE_UNIQUE_SHL
    Word16 sh_l = sub( 31, q1 );
#endif
    FOR( ts = 0; ts < hSpar->subframe_nbslots[hSpar->subframes_rendered]; ts++ )
    {
        md_idx = hSpar->render_to_md_map[( ts + slot_idx_start )]; /*Q0*/
@@ -1873,7 +1896,7 @@ void ivas_spar_dec_upmixer_sf_fx(
                }
            }
        }

#ifndef FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF_SPLIT_LOOPS
        FOR( cldfb_band = 0; cldfb_band < num_cldfb_bands; cldfb_band++ )
        {
            Word32 out_re_fx[IVAS_SPAR_MAX_CH];
@@ -1921,13 +1944,116 @@ void ivas_spar_dec_upmixer_sf_fx(
            /*update CLDFB data with the parameter-modified data*/
            FOR( out_ch = 0; out_ch < numch_out; out_ch++ )
            {
#ifndef FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF_USE_UNIQUE_SHL
                cldfb_in_ts_re_fx[out_ch][ts][cldfb_band] = L_shl( out_re_fx[out_ch], sub( 31, q1 ) ); /*Q=6*/
                move32();
                cldfb_in_ts_im_fx[out_ch][ts][cldfb_band] = L_shl( out_im_fx[out_ch], sub( 31, q1 ) ); /*Q=6*/
                move32();
#else
                cldfb_in_ts_re_fx[out_ch][ts][cldfb_band] = L_shl( out_re_fx[out_ch], sh_l ); /*Q=6*/
                move32();
                cldfb_in_ts_im_fx[out_ch][ts][cldfb_band] = L_shl( out_im_fx[out_ch], sh_l ); /*Q=6*/
                move32();
#endif
            }
        }
#else /* FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF_SPLIT_LOOPS */
        /* Note: This version splits the cldfb band loop into 2 loops, removing some inner-loop IF_statements */
        Word16 min_cldf_band = s_min( CLDFB_PAR_WEIGHT_START_BAND, num_cldfb_bands );
        Word32 out_re_fx[IVAS_SPAR_MAX_CH];
        Word32 out_im_fx[IVAS_SPAR_MAX_CH];
        Word32 cldfb_par_fx; /*q1*/
        ivas_fb_bin_to_band_data_t *bin2band = &hSpar->hFbMixer->pFb->fb_bin_to_band;

        /* First loop from cldfb_band=0 till min_cldf_band (CLDFB_PAR_WEIGHT_START_BAND) */
        FOR( cldfb_band = 0; cldfb_band < min_cldf_band; cldfb_band++ )
        {
            spar_band = bin2band->p_cldfb_map_to_spar_band[cldfb_band]; /*Q0*/
            move16();
            FOR( out_ch = 0; out_ch < numch_out; out_ch++ )
            {
                out_re_fx[out_ch] = 0;
                move32();
                out_im_fx[out_ch] = 0;
                move32();
                FOR( in_ch = 0; in_ch < numch_in; in_ch++ )
                {
                    IF( b_skip_mat[out_ch][in_ch] == 0 )
                    {
                        cldfb_par_fx = mixer_mat_fx[out_ch][in_ch][spar_band]; /*q1*/
                        move32();
                        out_re_fx[out_ch] = Madd_32_32( out_re_fx[out_ch], cldfb_in_ts_re_fx[in_ch][ts][cldfb_band], cldfb_par_fx ); /*q1-25*/
                        move32();
                        out_im_fx[out_ch] = Madd_32_32( out_im_fx[out_ch], cldfb_in_ts_im_fx[in_ch][ts][cldfb_band], cldfb_par_fx ); /*q1-25*/
                        move32();
                    }
                }
            }
            /*update CLDFB data with the parameter-modified data*/
            FOR( out_ch = 0; out_ch < numch_out; out_ch++ )
            {
#ifndef FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF_USE_UNIQUE_SHL
                cldfb_in_ts_re_fx[out_ch][ts][cldfb_band] = L_shl( out_re_fx[out_ch], sub( 31, q1 ) ); /*Q=6*/
                move32();
                cldfb_in_ts_im_fx[out_ch][ts][cldfb_band] = L_shl( out_im_fx[out_ch], sub( 31, q1 ) ); /*Q=6*/
                move32();
#else
                cldfb_in_ts_re_fx[out_ch][ts][cldfb_band] = L_shl( out_re_fx[out_ch], sh_l ); /*Q=6*/
                move32();
                cldfb_in_ts_im_fx[out_ch][ts][cldfb_band] = L_shl( out_im_fx[out_ch], sh_l ); /*Q=6*/
                move32();
#endif
            }
        }


        /* Second loop from min_cldf_band (CLDFB_PAR_WEIGHT_START_BAND) till num_cldfb_bands */
        FOR( ; cldfb_band < num_cldfb_bands; cldfb_band++ )
        {
            FOR( out_ch = 0; out_ch < numch_out; out_ch++ )
            {
                Word32 Out_re_fx = L_add( 0, 0 );
                Word32 Out_im_fx = L_add( 0, 0 );
                FOR( in_ch = 0; in_ch < numch_in; in_ch++ )
                {
                    IF( b_skip_mat[out_ch][in_ch] == 0 )
                    {
                        Word64 acc = 0;
                        move64();

                        cldfb_par_fx = 0;
                        move32();
                        FOR( spar_band = bin2band->p_spar_start_bands[cldfb_band]; spar_band < num_spar_bands; spar_band++ )
                        {
                            /* accumulate contributions from all SPAR bands */
                            acc = W_mac_32_32( acc, mixer_mat_fx[out_ch][in_ch][spar_band], bin2band->pp_cldfb_weights_per_spar_band_fx[cldfb_band][spar_band] ); // q1+ Q23
                        }
                        cldfb_par_fx = W_shl_sat_l( acc, -23 );                                                      // q1
                        Out_re_fx = Madd_32_32( Out_re_fx, cldfb_in_ts_re_fx[in_ch][ts][cldfb_band], cldfb_par_fx ); /*q1-25*/
                        Out_im_fx = Madd_32_32( Out_im_fx, cldfb_in_ts_im_fx[in_ch][ts][cldfb_band], cldfb_par_fx ); /*q1-25*/
                    }
                }
                out_re_fx[out_ch] = Out_re_fx;
                out_im_fx[out_ch] = Out_im_fx;
            }

            /*update CLDFB data with the parameter-modified data*/
            FOR( out_ch = 0; out_ch < numch_out; out_ch++ )
            {
#ifndef FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF_USE_UNIQUE_SHL
                cldfb_in_ts_re_fx[out_ch][ts][cldfb_band] = L_shl( out_re_fx[out_ch], sub( 31, q1 ) ); /*Q=6*/
                move32();
                cldfb_in_ts_im_fx[out_ch][ts][cldfb_band] = L_shl( out_im_fx[out_ch], sub( 31, q1 ) ); /*Q=6*/
                move32();
#else
                cldfb_in_ts_re_fx[out_ch][ts][cldfb_band] = L_shl( out_re_fx[out_ch], sh_l ); /*Q=6*/
                move32();
                cldfb_in_ts_im_fx[out_ch][ts][cldfb_band] = L_shl( out_im_fx[out_ch], sh_l ); /*Q=6*/
                move32();
#endif
            }
        }
#endif /* FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF_SPLIT_LOOPS */
        test();
        IF( ( EQ_16( ( add( add( slot_idx_start, ts ), 1 ) ), hSpar->num_slots ) ) || ( NE_16( ( shr( md_idx, 2 ) /* md_idx / JBM_CLDFB_SLOTS_IN_SUBFRAME */ ), ( hSpar->render_to_md_map[( ( slot_idx_start + ts ) + 1 )] / JBM_CLDFB_SLOTS_IN_SUBFRAME /*It's value is 4*/ ) ) ) )
        {