Commit 2512f444 authored by Sandesh Venkatesh's avatar Sandesh Venkatesh Committed by Manuel Jander
Browse files

Precision improvements in mdct core enc

parent 62f09ebe
Loading
Loading
Loading
Loading
+3 −5
Original line number Diff line number Diff line
@@ -2658,16 +2658,14 @@ static void ivas_calc_p_coeffs_per_band_enc_fx(
            factor = L_max( factor, tmp ); // q_factor
        }

        tmp = L_shl_sat( 189 /* 1e-20 in Q74 */, sub( q_factor, 74 ) );
        tmp = L_shl_sat( IVAS_FIX_EPS_Q40, sub( q_factor, 40 ) );

        Word16 factor_exp = 0;
        move16();
        IF( LE_32( factor, tmp ) )
        {
            factor = 22204; // (1 / 1e-20) in Q(-52)
            factor_exp = Q15 - ( -52 );
            move32();
            move16();
            factor = 1250000000;
            factor_exp = Q31 - ( -4 );
        }
        ELSE
        {
+28 −23
Original line number Diff line number Diff line
@@ -1002,14 +1002,16 @@ void core_signal_analysis_high_bitrate_ivas_fx(
                {
                    L_tmp = L_mult( hTcxEnc->speech_TCX[-1 - i], st->hTcxCfg->tcx_aldo_window_1_FB[left_overlap / 2 + minWindowLen - i] ); // (q_tcx20Win, Q15) -> Q16 + q_tcx20Win
                    L_tmp = Mpy_32_16_1( L_tmp, st->hTcxCfg->tcx_mdct_window_minimumFB[minWindowLen - i].v.im );                           // (Q16 + q_tcx20Win, Q15) -> Q16 + q_tcx20Win
                    tcx20Win[left_overlap + i] = sub_sat( tcx20Win[left_overlap + i], extract_h( L_tmp ) );                                // q_tcx20Win
                    L_tmp = L_shl( L_tmp, sub( 0, Q16 ) );                                                                                 // q_tcx20Win
                    tcx20Win[left_overlap + i] = sub_sat( tcx20Win[left_overlap + i], extract_l( L_tmp ) );                                // q_tcx20Win
                    move32();
                }
                FOR( i = tmp - 1; i >= 0; i-- ) /* outer left folding of shortened long ALDO slope */
                {
                    L_tmp = L_mult( hTcxEnc->speech_TCX[-1 - i], st->hTcxCfg->tcx_aldo_window_1_FB[left_overlap / 2 + minWindowLen - i] ); // (q_tcx20Win, Q15) -> Q16 + q_tcx20Win
                    L_tmp = Mpy_32_16_1( L_tmp, st->hTcxCfg->tcx_mdct_window_minimumFB[i].v.re );                                          // (Q16 + q_tcx20Win, Q15) -> Q16 + q_tcx20Win
                    tcx20Win[left_overlap + i] = sub_sat( tcx20Win[left_overlap + i], extract_h( L_tmp ) );                                // q_tcx20Win
                    L_tmp = L_shl( L_tmp, sub( 0, Q16 ) );                                                                                 // q_tcx20Win
                    tcx20Win[left_overlap + i] = sub_sat( tcx20Win[left_overlap + i], extract_l( L_tmp ) );                                // q_tcx20Win
                    move32();
                }
            }
@@ -1030,7 +1032,7 @@ void core_signal_analysis_high_bitrate_ivas_fx(
            tmpP32 = hTcxEnc->spectrum_fx[frameno];
            assert( st->mct_chan_mode != MCT_CHAN_MODE_LFE );
            Word16 len[2], exp[2];
            hTcxEnc->spectrum_e[frameno] = sub( 16, q_tcx20Win );
            hTcxEnc->spectrum_e[frameno] = 16;
            exp[0] = exp[1] = hTcxEnc->spectrum_e[frameno];
            move16();
            move16();
@@ -1077,8 +1079,11 @@ void core_signal_analysis_high_bitrate_ivas_fx(

            FOR( i = 0; i < 2; i++ )
            {
                scale_sig32( hTcxEnc->spectrum_fx[frameno] + i * L_subframe, len[i], sub( exp[i], hTcxEnc->spectrum_e[frameno] ) );
                Scale_sig32( hTcxEnc->spectrum_fx[frameno] + i * L_subframe, len[i], sub( exp[i], hTcxEnc->spectrum_e[frameno] ) );
            }

            hTcxEnc->spectrum_e[frameno] = sub( hTcxEnc->spectrum_e[frameno], q_tcx20Win );
            move16();
        }
        ELSE /* transform_type[frameno] != TCX_5 */
        {
@@ -1122,14 +1127,16 @@ void core_signal_analysis_high_bitrate_ivas_fx(
                    {
                        L_tmp = L_mult( hTcxEnc->speech_TCX[-1 - i], st->hTcxCfg->tcx_aldo_window_1_FB[left_overlap / 2 + minWindowLen - i] ); // (q_tcx20Win, Q15) -> Q16 + q_tcx20Win
                        L_tmp = Mpy_32_16_1( L_tmp, st->hTcxCfg->tcx_mdct_window_minimumFB[minWindowLen - i].v.im );                           // (Q16, Q15) -> Q16 + q_tcx20Win
                        tcx20Win[left_overlap + i] = sub_sat( tcx20Win[left_overlap + i], extract_h( L_tmp ) );                                // q_tcx20Win
                        L_tmp = L_shl( L_tmp, sub( 0, Q16 ) );                                                                                 // q_tcx20Win
                        tcx20Win[left_overlap + i] = sub_sat( tcx20Win[left_overlap + i], extract_l( L_tmp ) );                                // q_tcx20Win
                        move32();
                    }
                    FOR( i = tmp - 1; i >= 0; i-- ) /* outer left folding of shortened long ALDO slope */
                    {
                        L_tmp = L_mult( hTcxEnc->speech_TCX[-1 - i], st->hTcxCfg->tcx_aldo_window_1_FB[left_overlap / 2 + minWindowLen - i] ); // (q_tcx20Win, Q15) -> Q16 + q_tcx20Win
                        L_tmp = Mpy_32_16_1( L_tmp, st->hTcxCfg->tcx_mdct_window_minimumFB[i].v.re );                                          // (Q16 + q_tcx20Win, Q15) -> Q16 + q_tcx20Win
                        tcx20Win[left_overlap + i] = sub_sat( tcx20Win[left_overlap + i], extract_h( L_tmp ) );                                // q_tcx20Win
                        L_tmp = L_shl( L_tmp, sub( 0, Q16 ) );                                                                                 // q_tcx20Win
                        tcx20Win[left_overlap + i] = sub_sat( tcx20Win[left_overlap + i], extract_l( L_tmp ) );                                // q_tcx20Win
                        move32();
                    }
                }
@@ -1182,7 +1189,7 @@ void core_signal_analysis_high_bitrate_ivas_fx(

        IF( EQ_16( st->element_mode, IVAS_CPE_MDCT ) )
        {
            Word16 q_mdstWin, scale;
            Word16 scale;
            L_subframe = idiv1616( L_frameTCX, nSubframes ); /* Q0 */

            test();
@@ -1192,8 +1199,7 @@ void core_signal_analysis_high_bitrate_ivas_fx(
                scale = sub( norm_arr( mdstWin, L_frameTCX ), 1 );
                scale = s_min( 1, scale ); // restricting the Q to zero or less
                scale_sig( mdstWin, L_frameTCX, scale );
                q_mdstWin = add( -1, scale );
                move16();
                q_mdstWin = add( add( st->q_inp, -1 ), scale );
            }
            ELSE
            {
@@ -1204,8 +1210,7 @@ void core_signal_analysis_high_bitrate_ivas_fx(
                scale = sub( norm_arr( mdstWin, sig_len ), 1 );
                scale = s_min( 0, scale ); // restricting the Q to zero or less
                scale_sig( mdstWin, sig_len, scale );
                q_mdstWin = scale;
                move16();
                q_mdstWin = add( scale, st->q_inp );
            }

            IF( EQ_16( transform_type[frameno], TCX_5 ) )
@@ -1225,17 +1230,17 @@ void core_signal_analysis_high_bitrate_ivas_fx(
                    Word32 L_tmp;
                    FOR( i = minWindowLen; i >= tmp; i-- ) /* outer left folding of shortened long ALDO slope */
                    {
                        L_tmp = L_mult( hTcxEnc->speech_TCX[-1 - i], st->hTcxCfg->tcx_aldo_window_1_FB[left_overlap / 2 + minWindowLen - i] ); // (Q0, Q15) -> Q16
                        L_tmp = Mpy_32_16_1( L_tmp, st->hTcxCfg->tcx_mdct_window_minimumFB[minWindowLen - i].v.im );                           // (Q16, Q15) -> Q16
                        L_tmp = L_shl( L_tmp, sub( q_mdstWin, Q16 ) );                                                                         // q_mdstWin
                        L_tmp = L_mult( hTcxEnc->speech_TCX[-1 - i], st->hTcxCfg->tcx_aldo_window_1_FB[left_overlap / 2 + minWindowLen - i] ); // (q_inp, Q15) -> Q16 + q_inp
                        L_tmp = Mpy_32_16_1( L_tmp, st->hTcxCfg->tcx_mdct_window_minimumFB[minWindowLen - i].v.im );                           // (Q16 + q_inp, Q15) -> Q16 + q_inp
                        L_tmp = L_shl( L_tmp, sub( q_mdstWin, add( Q16, st->q_inp ) ) );                                                       // q_mdstWin
                        mdstWin[left_overlap + i] = add( mdstWin[left_overlap + i], extract_l( L_tmp ) );                                      // q_mdstWin
                        move32();
                    }
                    FOR( i = tmp - 1; i >= 0; i-- ) /* outer left folding of shortened long ALDO slope */
                    {
                        L_tmp = L_mult( hTcxEnc->speech_TCX[-1 - i], st->hTcxCfg->tcx_aldo_window_1_FB[left_overlap / 2 + minWindowLen - i] ); // (Q0, Q15) -> Q16
                        L_tmp = Mpy_32_16_1( L_tmp, st->hTcxCfg->tcx_mdct_window_minimumFB[i].v.re );                                          // (Q16, Q15) -> Q16
                        L_tmp = L_shl( L_tmp, sub( q_mdstWin, Q16 ) );                                                                         // q_mdstWin
                        L_tmp = L_mult( hTcxEnc->speech_TCX[-1 - i], st->hTcxCfg->tcx_aldo_window_1_FB[left_overlap / 2 + minWindowLen - i] ); // (q_inp, Q15) -> Q16 + q_inp
                        L_tmp = Mpy_32_16_1( L_tmp, st->hTcxCfg->tcx_mdct_window_minimumFB[i].v.re );                                          // (Q16 + q_inp, Q15) -> Q16 + q_inp
                        L_tmp = L_shl( L_tmp, sub( q_mdstWin, add( Q16, st->q_inp ) ) );                                                       // q_mdstWin
                        mdstWin[left_overlap + i] = add( mdstWin[left_overlap + i], extract_l( L_tmp ) );                                      // q_mdstWin
                        move32();
                    }
@@ -1313,17 +1318,17 @@ void core_signal_analysis_high_bitrate_ivas_fx(
                        shift = sub( q_mdstWin, add( Q16, st->q_inp ) );
                        FOR( i = minWindowLen; i >= tmp; i-- ) /* outer left folding of shortened long ALDO slope */
                        {
                            L_tmp = L_mult( hTcxEnc->speech_TCX[-1 - i], st->hTcxCfg->tcx_aldo_window_1_FB[left_overlap / 2 + minWindowLen - i] ); // (Q0, Q15) -> Q16
                            L_tmp = Mpy_32_16_1( L_tmp, st->hTcxCfg->tcx_mdct_window_minimumFB[minWindowLen - i].v.im );                           // (Q16, Q15) -> Q16
                            L_tmp = L_shl( L_tmp, sub( q_mdstWin, Q16 ) );                                                                         // q_mdstWin
                            L_tmp = L_mult( hTcxEnc->speech_TCX[-1 - i], st->hTcxCfg->tcx_aldo_window_1_FB[left_overlap / 2 + minWindowLen - i] ); // (q_inp, Q15) -> Q16 + q_inp
                            L_tmp = Mpy_32_16_1( L_tmp, st->hTcxCfg->tcx_mdct_window_minimumFB[minWindowLen - i].v.im );                           // (Q16 + q_inp, Q15) -> Q16 + q_inp
                            L_tmp = L_shl( L_tmp, sub( q_mdstWin, add( Q16, st->q_inp ) ) );                                                       // q_mdstWin
                            mdstWin[left_overlap + i] = add( mdstWin[left_overlap + i], extract_l( L_tmp ) );                                      // q_mdstWin
                            move32();
                        }
                        FOR( i = tmp - 1; i >= 0; i-- ) /* outer left folding of shortened long ALDO slope */
                        {
                            L_tmp = L_mult( hTcxEnc->speech_TCX[-1 - i], st->hTcxCfg->tcx_aldo_window_1_FB[left_overlap / 2 + minWindowLen - i] ); // (Q0, Q15) -> Q16
                            L_tmp = Mpy_32_16_1( L_tmp, st->hTcxCfg->tcx_mdct_window_minimumFB[i].v.re );                                          // (Q16, Q15) -> Q16
                            L_tmp = L_shl( L_tmp, sub( q_mdstWin, Q16 ) );                                                                         // q_mdstWin
                            L_tmp = L_mult( hTcxEnc->speech_TCX[-1 - i], st->hTcxCfg->tcx_aldo_window_1_FB[left_overlap / 2 + minWindowLen - i] ); // (q_inp, Q15) -> Q16 + q_inp
                            L_tmp = Mpy_32_16_1( L_tmp, st->hTcxCfg->tcx_mdct_window_minimumFB[i].v.re );                                          // (Q16 + q_inp, Q15) -> Q16 + q_inp
                            L_tmp = L_shl( L_tmp, sub( q_mdstWin, add( Q16, st->q_inp ) ) );                                                       // q_mdstWin
                            mdstWin[left_overlap + i] = add( mdstWin[left_overlap + i], extract_l( L_tmp ) );                                      // q_mdstWin
                            move32();
                        }
+6 −6
Original line number Diff line number Diff line
@@ -430,9 +430,9 @@ static void kernel_switch_update_transforms_fx(
        Word32 factor;

        n = extract_l( Mpy_32_32( s, 603979776 /* N_ZERO_MDCT_NS / FRAME_SIZE_NS in Q31 */ ) );
        Scale_sig( &tcxTimeSignal[n - s], add( sub( shl( s, 1 ), n ), 1 ), -Q1 );                                                                   // Q0 -> Q-1
        Scale_sig( &tcxTimeSignal[n - s], add( sub( shl( s, 1 ), n ), 1 ), sub( -Q1, q_speech ) );                                                  // q_speech -> Q-1
        wtda_ext_fx( tcxTimeSignal, windowedTimeSignal_16, extract_l( windowedTimeSignal[0] ), extract_l( windowedTimeSignal[1] ), s, kernelType ); // Q-2
        Scale_sig( &tcxTimeSignal[n - s], add( sub( shl( s, 1 ), n ), 1 ), Q1 );                                                                    // Q-1 -> Q0
        Scale_sig( &tcxTimeSignal[n - s], add( sub( shl( s, 1 ), n ), 1 ), sub( q_speech, -Q1 ) );                                                  // Q-1 -> q_speech
        Copy_Scale_sig_16_32_no_sat( windowedTimeSignal_16 /* Q(-2) */, windowedTimeSignal, s, Q16 );                                               // Q14
        scale_sig32( windowedTimeSignal, s, -Q8 /* guard bits */ );                                                                                 // Q6
        edxt_fx( windowedTimeSignal, sigR, s, kernelType, FALSE );
@@ -473,7 +473,7 @@ static void kernel_switch_update_transforms_fx(
                {
                    L_tmp = L_mult( speech_TCX[-1 - i], hTcxCfg->tcx_aldo_window_1_FB[leftOverlap / 2 + minWindowLen - i] ); // (q_speech, Q15) -> Q16 + q_speech
                    L_tmp = Mpy_32_16_1( L_tmp, hTcxCfg->tcx_mdct_window_minimumFB[minWindowLen - i].v.im );                 // (Q16 + q_speech, Q15) -> Q16 + q_speech
                    L_tmp = L_shl( L_tmp, shift );                                                                           // *q_windowedTimeSignal
                    L_tmp = L_shl( L_tmp, sub( *q_windowedTimeSignal, add( Q16, q_speech ) ) );                              // *q_windowedTimeSignal
                    windowedTimeSignal[2 + leftOverlap + i] = L_add( windowedTimeSignal[2 + leftOverlap + i], L_tmp );       // *q_windowedTimeSignal
                    move32();
                }
@@ -481,7 +481,7 @@ static void kernel_switch_update_transforms_fx(
                {
                    L_tmp = L_mult( speech_TCX[-1 - i], hTcxCfg->tcx_aldo_window_1_FB[leftOverlap / 2 + minWindowLen - i] ); // (q_speech, Q15) -> Q16 + q_speech
                    L_tmp = Mpy_32_16_1( L_tmp, hTcxCfg->tcx_mdct_window_minimumFB[i].v.re );                                // (Q16 + q_speech, Q15) -> Q16 + q_speech
                    L_tmp = L_shl( L_tmp, shift );                                                                           // *q_windowedTimeSignal
                    L_tmp = L_shl( L_tmp, sub( *q_windowedTimeSignal, add( Q16, q_speech ) ) );                              // *q_windowedTimeSignal
                    windowedTimeSignal[2 + leftOverlap + i] = L_add( windowedTimeSignal[2 + leftOverlap + i], L_tmp );       // *q_windowedTimeSignal
                    move32();
                }
@@ -492,7 +492,7 @@ static void kernel_switch_update_transforms_fx(
                {
                    L_tmp = L_mult( negate( speech_TCX[-1 - i] ), hTcxCfg->tcx_aldo_window_1_FB[leftOverlap / 2 + minWindowLen - i] ); // (q_speech, Q15) -> Q16 + q_speech
                    L_tmp = Mpy_32_16_1( L_tmp, hTcxCfg->tcx_mdct_window_minimumFB[minWindowLen - i].v.im );                           // (Q16 + q_speech, Q15) -> Q16 + q_speech
                    L_tmp = L_shl( L_tmp, shift );                                                                                     // *q_windowedTimeSignal
                    L_tmp = L_shl( L_tmp, sub( *q_windowedTimeSignal, add( Q16, q_speech ) ) );                                        // *q_windowedTimeSignal
                    windowedTimeSignal[2 + leftOverlap + i] = L_add( windowedTimeSignal[2 + leftOverlap + i], L_tmp );                 // *q_windowedTimeSignal
                    move32();
                }
@@ -500,7 +500,7 @@ static void kernel_switch_update_transforms_fx(
                {
                    L_tmp = L_mult( negate( speech_TCX[-1 - i] ), hTcxCfg->tcx_aldo_window_1_FB[leftOverlap / 2 + minWindowLen - i] ); // (q_speech, Q15) -> Q16 + q_speech
                    L_tmp = Mpy_32_16_1( L_tmp, hTcxCfg->tcx_mdct_window_minimumFB[i].v.re );                                          // (Q16 + q_speech, Q15) -> Q16 + q_speech
                    L_tmp = L_shl( L_tmp, shift );                                                                                     // *q_windowedTimeSignal
                    L_tmp = L_shl( L_tmp, sub( *q_windowedTimeSignal, add( Q16, q_speech ) ) );                                        // *q_windowedTimeSignal
                    windowedTimeSignal[2 + leftOverlap + i] = L_add( windowedTimeSignal[2 + leftOverlap + i], L_tmp );                 // *q_windowedTimeSignal
                    move32();
                }
+3 −3
Original line number Diff line number Diff line
@@ -253,7 +253,7 @@ void stereo_mdct_core_enc_fx(
    Word16 len = extract_l( Mpy_32_32( sts[0]->input_Fs, ONE_BY_FRAMES_PER_SEC_Q31 ) );
    Word16 q_com = s_min( s_min( add( sts[0]->q_inp, getScaleFactor16( sts[0]->input_fx, add( len, NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) ) ) ), add( sts[0]->q_old_inp, getScaleFactor16( sts[0]->old_input_signal_fx, len ) ) ),
                          s_min( add( sts[1]->q_inp, getScaleFactor16( sts[1]->input_fx, add( len, NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) ) ) ), add( sts[1]->q_old_inp, getScaleFactor16( sts[1]->old_input_signal_fx, len ) ) ) );
    q_com = sub( q_com, Q1 );
    q_com = s_min( 0, q_com );
    FOR( ch = 0; ch < CPE_CHANNELS; ch++ )
    {
        inv_mdst_spectrum_fx[ch][0] = powerSpecMsInv_fx[ch][0] = powerSpecMsInv_long_fx[ch];
@@ -275,8 +275,8 @@ void stereo_mdct_core_enc_fx(
        sts[ch]->hTcxEnc->tns_ms_flag[1] = 0;
        move16();

        scale_sig( sts[ch]->input_fx, add( extract_l( Mpy_32_32( sts[ch]->input_Fs, ONE_BY_FRAMES_PER_SEC_Q31 ) ), NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) ), sub( q_com, sts[ch]->q_inp ) ); /* q_com */
        scale_sig( sts[ch]->old_input_signal_fx, extract_l( Mpy_32_32( sts[ch]->input_Fs, ONE_BY_FRAMES_PER_SEC_Q31 ) ), sub( q_com, sts[ch]->q_old_inp ) );                               /* q_com */
        Scale_sig( sts[ch]->input_fx, add( extract_l( Mpy_32_32( sts[ch]->input_Fs, ONE_BY_FRAMES_PER_SEC_Q31 ) ), NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) ), sub( q_com, sts[ch]->q_inp ) ); /* Q0 */
        Scale_sig( sts[ch]->old_input_signal_fx, extract_l( Mpy_32_32( sts[ch]->input_Fs, ONE_BY_FRAMES_PER_SEC_Q31 ) ), sub( q_com, sts[ch]->q_old_inp ) );                               /* Q0 */
        sts[ch]->q_old_inp = q_com;
        move16();
        sts[ch]->q_inp = q_com;