Commit 2c5740bf authored by Jain, Adityaraj's avatar Jain, Adityaraj
Browse files

experiment to pass h1 without scaling

parent 072987c1
Loading
Loading
Loading
Loading
Loading
+23 −12
Original line number Diff line number Diff line
@@ -237,6 +237,7 @@ void decoder_tcx_imdct_fx(
    Word16 q_a_itf = 15;
    Word16 x_e = sub( 31, q_x );
    move16();
    Word16 shift_q = sub( q_x, q_win );

    /*-----------------------------------------------------------------*
     * Initializations
@@ -364,9 +365,10 @@ void decoder_tcx_imdct_fx(

    IF( EQ_16( st->element_mode, IVAS_CPE_MDCT ) )
    {
        Word16 copy_len = s_min( L_FRAME48k, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) );
        set32_fx( x_tmp_fx, 0, L_FRAME_PLUS );
        Copy32( x_fx, x_tmp_fx, s_min( L_FRAME48k, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) ) );    // q_x
        Copy32( x_fx, xn_bufFB_fx, s_min( L_FRAME48k, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) ) ); // q_x
        Copy32( x_fx, x_tmp_fx, copy_len );    // q_x
        Copy32( x_fx, xn_bufFB_fx, copy_len ); // q_x
    }
    ELSE IF( ( st->element_mode == EVS_MONO ) )
    {
@@ -374,8 +376,9 @@ void decoder_tcx_imdct_fx(
    }
    ELSE
    {
        Copy32( x_fx, x_tmp_fx, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) );    // q_x
        Copy32( x_fx, xn_bufFB_fx, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) ); // q_x
        Word16 copy_len = s_max( L_spec, s_max( L_frame, L_frameTCX ) );
        Copy32( x_fx, x_tmp_fx, copy_len );    // q_x
        Copy32( x_fx, xn_bufFB_fx, copy_len ); // q_x
    }

    IF( ( st->igf != 0 ) )
@@ -416,24 +419,29 @@ void decoder_tcx_imdct_fx(

    FOR( Word16 ind = 0; ind < L_MDCT_OVLP_MAX + L_FRAME_PLUS + L_MDCT_OVLP_MAX; ind++ )
    {
        xn_bufFB_fx_16[ind] = extract_l( L_shr( xn_bufFB_fx[ind], sub( q_x, q_win ) ) ); // q_x
        xn_bufFB_fx_16[ind] = extract_l( L_shr( xn_bufFB_fx[ind], shift_q ) ); // q_x
        move16();
    }

    Word16 ratio_e;
    Word16 ratio = BASOP_Util_Divide1616_Scale( L_frameTCX_glob, L_frame_glob, &ratio_e ); // Q = 15-ratio_e. * FSCALE_DENOM is (1 << 9)
    ratio = shr( ratio, sub( 6, ratio_e ) );

    IF( st->element_mode != EVS_MONO )
    {
        IMDCT_ivas_fx( x_tmp_fx, q_x, hTcxDec->syn_OverlFB, hTcxDec->syn_Overl_TDACFB, xn_bufFB_fx_16, hTcxCfg->tcx_aldo_window_1_FB, hTcxCfg->tcx_aldo_window_1_FB_trunc, hTcxCfg->tcx_aldo_window_2_FB,
                       hTcxCfg->tcx_mdct_window_halfFB, hTcxCfg->tcx_mdct_window_minimumFB, hTcxCfg->tcx_mdct_window_transFB, hTcxCfg->tcx_mdct_window_half_lengthFB, hTcxCfg->tcx_mdct_window_min_lengthFB, index,
                       kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, max( L_frameTCX, L_spec ) >> 1, L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, 1, st, FSCALE_DENOM * L_frameTCX_glob / L_frame_glob, acelp_zir_fx, q_win );
                       kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, shr( max( L_frameTCX, L_spec ), 1 ), L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, 1, st, ratio, acelp_zir_fx, q_win );
    }
    ELSE
    {

        IMDCT_ivas_fx( x_fx, q_x, hTcxDec->syn_OverlFB, hTcxDec->syn_Overl_TDACFB, xn_bufFB_fx_16, hTcxCfg->tcx_aldo_window_1_FB, hTcxCfg->tcx_aldo_window_1_FB_trunc, hTcxCfg->tcx_aldo_window_2_FB, hTcxCfg->tcx_mdct_window_halfFB, hTcxCfg->tcx_mdct_window_minimumFB, hTcxCfg->tcx_mdct_window_transFB, hTcxCfg->tcx_mdct_window_half_lengthFB, hTcxCfg->tcx_mdct_window_min_lengthFB, index,
                       kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, 1, st, FSCALE_DENOM * L_frameTCX_glob / L_frame_glob, acelp_zir_fx, q_win );
                       kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, 1, st, ratio, acelp_zir_fx, q_win );
    }
    FOR( Word16 ind = 0; ind < L_MDCT_OVLP_MAX + L_FRAME_PLUS + L_MDCT_OVLP_MAX; ind++ )
    {
        xn_bufFB_fx[ind] = L_shl( xn_bufFB_fx_16[ind], sub( q_x, q_win ) ); // Q_x
        xn_bufFB_fx[ind] = L_shl( L_deposit_l( xn_bufFB_fx_16[ind] ), shift_q ); // Q_x
    }

    IF( ( bfi == 0 ) )
@@ -453,19 +461,22 @@ void decoder_tcx_imdct_fx(

        IF( EQ_16( st->element_mode, IVAS_CPE_MDCT ) )
        {
            res_m = BASOP_Util_Divide1616_Scale( L_frame_glob, L_FRAME, &res_e );
            st->old_fpitch = L_shl( Mpy_32_16_1( st->old_fpitch, res_m ), res_e );
            // Using sat as a single instruction shifts and extracts
            st->old_fpitch = W_shl_sat_l( W_mult0_32_32( st->old_fpitch, L_frame_glob ), -8 ); // Divide by 256 ==> SHR by 8
            move32();
        }

        IF( GT_16( st->element_mode, EVS_MONO ) )
        {
            res_m = BASOP_Util_Divide1616_Scale( L_frameTCX_glob, L_frame_glob, &res_e );
            st->old_fpitchFB = L_shl( Mpy_32_16_1( st->old_fpitch, res_m ), res_e );
            move32();
        }
        ELSE
        {
            res_m = BASOP_Util_Divide1616_Scale( L_frameTCX, L_frame, &res_e );
            st->old_fpitchFB = L_shl( Mpy_32_16_1( st->old_fpitch, res_m ), res_e );
            move32();
        }
    }

@@ -475,7 +486,7 @@ void decoder_tcx_imdct_fx(
        Copy( xn_buf_fx + L_frame, hTcxDec->syn_Overl, overlap ); // Q(-2)
        FOR( Word16 ind = 0; ind < overlapFB; ind++ )
        {
            hTcxDec->syn_OverlFB[ind] = (Word16) L_shr( xn_bufFB_fx[( ind + L_frameTCX )], sub( q_x, q_win ) ); // q_x
            hTcxDec->syn_OverlFB[ind] = extract_l( L_shr( xn_bufFB_fx[( ind + L_frameTCX )], shift_q ) ); // q_x
        }
    }

@@ -483,7 +494,7 @@ void decoder_tcx_imdct_fx(
    Copy( xn_buf_fx + sub( shr( overlap, 1 ), tcx_offset ), synth_fx, L_frame_glob ); // Q(-2)
    FOR( Word16 ind = 0; ind < L_frameTCX_glob; ind++ )
    {
        synthFB_fx[ind] = (Word16) L_shr( xn_bufFB_fx[( ind + ( ( overlapFB >> 1 ) - tcx_offsetFB ) )], sub( q_x, q_win ) ); // q_x
        synthFB_fx[ind] = extract_l( L_shr( xn_bufFB_fx[( ind + ( ( overlapFB >> 1 ) - tcx_offsetFB ) )], shift_q ) ); // q_x
    }


+40 −41
Original line number Diff line number Diff line
@@ -70,6 +70,7 @@ static void ivas_binRenderer_filterModule_fx(
    Word32 *filterStatesLeftRealPtr_fx, *filterStatesLeftImagPtr_fx;
    Word16 *Q_filterStates;
    const Word32 *filterTapsLeftRealPtr_fx, *filterTapsLeftImagPtr_fx, *filterTapsRightRealPtr_fx, *filterTapsRightImagPtr_fx;
    Word16 shift_q;

    FOR( bandIdx = 0; bandIdx < hBinRenderer->conv_band; bandIdx++ )
    {
@@ -87,11 +88,6 @@ static void ivas_binRenderer_filterModule_fx(
            FOR( k = 0; k < numTimeSlots; k++ )
            {
                Word64 outRealLeft_fx = 0, outRealRight_fx = 0, outImagLeft_fx = 0, outImagRight_fx = 0;
                Word64 W_sub1 = 0, W_add1 = 0, W_sub2 = 0, W_add2 = 0;
                move64();
                move64();
                move64();
                move64();
                move64();
                move64();
                move64();
@@ -104,31 +100,32 @@ static void ivas_binRenderer_filterModule_fx(
                    filterStatesLeftImagPtr_fx[tapIdx] = filterStatesLeftImagPtr_fx[tapIdx - 1];
                    move32();

                    W_sub1 = W_sub( W_mult0_32_32( filterStatesLeftRealPtr_fx[tapIdx], filterTapsLeftRealPtr_fx[tapIdx] ),
                                    W_mult0_32_32( filterStatesLeftImagPtr_fx[tapIdx], filterTapsLeftImagPtr_fx[tapIdx] ) ); // Q29 + Q_filterStates[tapIdx - 1]
                    W_add1 = W_add( W_mult0_32_32( filterStatesLeftRealPtr_fx[tapIdx], filterTapsLeftImagPtr_fx[tapIdx] ),
                                    W_mult0_32_32( filterStatesLeftImagPtr_fx[tapIdx], filterTapsLeftRealPtr_fx[tapIdx] ) ); // Q29 + Q_filterStates[tapIdx - 1]
                    W_sub2 = W_sub( W_mult0_32_32( filterStatesLeftRealPtr_fx[tapIdx], filterTapsRightRealPtr_fx[tapIdx] ),
                                    W_mult0_32_32( filterStatesLeftImagPtr_fx[tapIdx], filterTapsRightImagPtr_fx[tapIdx] ) ); // Q29 + Q_filterStates[tapIdx - 1]
                    W_add2 = W_add( W_mult0_32_32( filterStatesLeftRealPtr_fx[tapIdx], filterTapsRightImagPtr_fx[tapIdx] ),
                                    W_mult0_32_32( filterStatesLeftImagPtr_fx[tapIdx], filterTapsRightRealPtr_fx[tapIdx] ) ); // Q29 + Q_filterStates[tapIdx - 1]
                    shift_q = sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] );
                    outRealLeft_fx = W_shr( outRealLeft_fx, shift_q );
                    outImagLeft_fx = W_shr( outImagLeft_fx, shift_q );
                    outRealRight_fx = W_shr( outRealRight_fx, shift_q );
                    outImagRight_fx = W_shr( outImagRight_fx, shift_q );

                    outRealLeft_fx = W_shr( outRealLeft_fx, sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] ) );
                    outImagLeft_fx = W_shr( outImagLeft_fx, sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] ) );
                    outRealRight_fx = W_shr( outRealRight_fx, sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] ) );
                    outImagRight_fx = W_shr( outImagRight_fx, sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] ) );
                    outRealLeft_fx = W_mac_32_32( outRealLeft_fx, filterStatesLeftRealPtr_fx[tapIdx], filterTapsLeftRealPtr_fx[tapIdx] );
                    outRealLeft_fx = W_mac_32_32( outRealLeft_fx, L_negate( filterStatesLeftImagPtr_fx[tapIdx] ), filterTapsLeftImagPtr_fx[tapIdx] ); // Q30 + Q_filterStates[tapIdx - 1]

                    Q_filterStates[tapIdx] = Q_filterStates[tapIdx - 1];
                    move16();
                    outImagLeft_fx = W_mac_32_32( outImagLeft_fx, filterStatesLeftRealPtr_fx[tapIdx], filterTapsLeftImagPtr_fx[tapIdx] );
                    outImagLeft_fx = W_mac_32_32( outImagLeft_fx, filterStatesLeftImagPtr_fx[tapIdx], filterTapsLeftRealPtr_fx[tapIdx] );

                    /* Left Real and Imag */
                    outRealLeft_fx = W_add( outRealLeft_fx, W_sub1 ); // Q29 + Q_filterStates[1]
                    outImagLeft_fx = W_add( outImagLeft_fx, W_add1 ); // Q29 + Q_filterStates[1]
                    outRealRight_fx = W_mac_32_32( outRealRight_fx, filterStatesLeftRealPtr_fx[tapIdx], filterTapsRightRealPtr_fx[tapIdx] );
                    outRealRight_fx = W_mac_32_32( outRealRight_fx, L_negate( filterStatesLeftImagPtr_fx[tapIdx] ), filterTapsRightImagPtr_fx[tapIdx] );

                    /* Right Real and Imag*/
                    outRealRight_fx = W_add( outRealRight_fx, W_sub2 ); // Q29 + Q_filterStates[1]
                    outImagRight_fx = W_add( outImagRight_fx, W_add2 ); // Q29 + Q_filterStates[1]
                    outImagRight_fx = W_mac_32_32( outImagRight_fx, filterStatesLeftRealPtr_fx[tapIdx], filterTapsRightImagPtr_fx[tapIdx] );
                    outImagRight_fx = W_mac_32_32( outImagRight_fx, filterStatesLeftImagPtr_fx[tapIdx], filterTapsRightRealPtr_fx[tapIdx] );

                    Q_filterStates[tapIdx] = Q_filterStates[tapIdx - 1];
                    move16();
                }
                shift_q = add( sub( Q_filterStates[1], Q_curr ), 1 );
                outRealLeft_fx = W_shr( outRealLeft_fx, shift_q );
                outImagLeft_fx = W_shr( outImagLeft_fx, shift_q );
                outRealRight_fx = W_shr( outRealRight_fx, shift_q );
                outImagRight_fx = W_shr( outImagRight_fx, shift_q );

                filterStatesLeftRealPtr_fx[0] = CLDFB_real[chIdx][k][bandIdx];
                move32();
@@ -141,27 +138,29 @@ static void ivas_binRenderer_filterModule_fx(
                /* Left Real and Imag */
                // Q29 + Q_curr

                out_Conv_CLDFB_real[0][k][bandIdx] = W_add( out_Conv_CLDFB_real[0][k][bandIdx],
                                                            W_add( W_shr( outRealLeft_fx, sub( Q_filterStates[1], Q_curr ) ),
                                                                   W_sub( W_mult0_32_32( filterStatesLeftRealPtr_fx[0], filterTapsLeftRealPtr_fx[0] ),
                                                                          W_mult0_32_32( filterStatesLeftImagPtr_fx[0], filterTapsLeftImagPtr_fx[0] ) ) ) ); // Q29
                Word32 temp1 = L_shr( filterStatesLeftRealPtr_fx[0], 1 );
                Word32 temp2 = L_shr( filterStatesLeftImagPtr_fx[0], 1 );


                outRealLeft_fx = W_mac_32_32( outRealLeft_fx, temp1, filterTapsLeftRealPtr_fx[0] );
                outRealLeft_fx = W_mac_32_32( outRealLeft_fx, L_negate( temp2 ), filterTapsLeftImagPtr_fx[0] );
                out_Conv_CLDFB_real[0][k][bandIdx] = W_add( out_Conv_CLDFB_real[0][k][bandIdx], outRealLeft_fx ); // Q29
                move64();
                out_Conv_CLDFB_imag[0][k][bandIdx] = W_add( out_Conv_CLDFB_imag[0][k][bandIdx],
                                                            W_add( W_shr( outImagLeft_fx, sub( Q_filterStates[1], Q_curr ) ),
                                                                   W_add( W_mult0_32_32( filterStatesLeftRealPtr_fx[0], filterTapsLeftImagPtr_fx[0] ),
                                                                          W_mult0_32_32( filterStatesLeftImagPtr_fx[0], filterTapsLeftRealPtr_fx[0] ) ) ) ); // Q29

                outImagLeft_fx = W_mac_32_32( outImagLeft_fx, temp1, filterTapsLeftImagPtr_fx[0] );
                outImagLeft_fx = W_mac_32_32( outImagLeft_fx, temp2, filterTapsLeftRealPtr_fx[0] );
                out_Conv_CLDFB_imag[0][k][bandIdx] = W_add( out_Conv_CLDFB_imag[0][k][bandIdx], outImagLeft_fx ); // Q29
                move64();

                /* Right Real and Imag */
                out_Conv_CLDFB_real[1][k][bandIdx] = W_add( out_Conv_CLDFB_real[1][k][bandIdx],
                                                            W_add( W_shr( outRealRight_fx, sub( Q_filterStates[1], Q_curr ) ),
                                                                   W_sub( W_mult0_32_32( filterStatesLeftRealPtr_fx[0], filterTapsRightRealPtr_fx[0] ),
                                                                          W_mult0_32_32( filterStatesLeftImagPtr_fx[0], filterTapsRightImagPtr_fx[0] ) ) ) ); // Q29
                outRealRight_fx = W_mac_32_32( outRealRight_fx, temp1, filterTapsRightRealPtr_fx[0] );
                outRealRight_fx = W_mac_32_32( outRealRight_fx, L_negate( temp2 ), filterTapsRightImagPtr_fx[0] );
                out_Conv_CLDFB_real[1][k][bandIdx] = W_add( out_Conv_CLDFB_real[1][k][bandIdx], outRealRight_fx ); // Q29
                move64();
                out_Conv_CLDFB_imag[1][k][bandIdx] = W_add( out_Conv_CLDFB_imag[1][k][bandIdx],
                                                            W_add( W_shr( outImagRight_fx, sub( Q_filterStates[1], Q_curr ) ),
                                                                   W_add( W_mult0_32_32( filterStatesLeftRealPtr_fx[0], filterTapsRightImagPtr_fx[0] ),
                                                                          W_mult0_32_32( filterStatesLeftImagPtr_fx[0], filterTapsRightRealPtr_fx[0] ) ) ) ); // Q29

                outImagRight_fx = W_mac_32_32( outImagRight_fx, temp1, filterTapsRightImagPtr_fx[0] );
                outImagRight_fx = W_mac_32_32( outImagRight_fx, temp2, filterTapsRightRealPtr_fx[0] );
                out_Conv_CLDFB_imag[1][k][bandIdx] = W_add( out_Conv_CLDFB_imag[1][k][bandIdx], outImagRight_fx ); // Q29
                move64();
            }
        }
+15 −42
Original line number Diff line number Diff line
@@ -410,8 +410,6 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx(
    const Word16 nchan_in                                                           /* i  : number of input channels                              */
)
{
    Word16 cx_init_e;
    Word16 cx_init_imag_e;
    Word16 band_idx, ch_idx;
    Word16 brange[2];
    Word32 real_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
@@ -421,10 +419,9 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx(
    Word32 real_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
    Word32 imag_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
    Word16 output_e;
    Word16 i, j, tmp1, tmp2, tmp1_e, tmp2_e, shift_imag, shift_real;
    Word32 L_tmp;
    Word16 tmp1_e, tmp2_e, shift_imag, shift_real;
    Word16 band, num_bands;

    Word16 cx_fx_norm, cx_imag_fx_norm;
    /* estimate input covariance */
    /* Already stack here instead of in the process_subframe */

@@ -451,8 +448,11 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx(
    move16();
    imag_in_e = ImagBuffer_e;
    move16();
    shift_real = sub( L_norm_arr( real_in_buffer_fx, imult1616( num_bands, nchan_in ) ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
    shift_imag = sub( L_norm_arr( imag_in_buffer_fx, imult1616( num_bands, nchan_in ) ), find_guarded_bits_fx( add( num_bands, 1 ) ) );

    Word16 buf_len = imult1616( num_bands, nchan_in );

    shift_real = sub( L_norm_arr( real_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
    shift_imag = sub( L_norm_arr( imag_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );

    real_in_e = sub( real_in_e, shift_real );
    imag_in_e = sub( imag_in_e, shift_imag );
@@ -460,50 +460,23 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx(

    output_e = s_max( real_in_e, imag_in_e );

    FOR( i = 0; i < num_bands * nchan_in; ++i )
    {
        real_in_buffer_fx[i] = L_shr( real_in_buffer_fx[i], sub( output_e, RealBuffer_e ) ); // Q(31-output_e)
        move32();
        imag_in_buffer_fx[i] = L_shr( imag_in_buffer_fx[i], sub( output_e, ImagBuffer_e ) ); // Q(31-output_e)
        move32();
    }
    scale_sig32( real_in_buffer_fx, buf_len, sub( RealBuffer_e, output_e ) );
    scale_sig32( imag_in_buffer_fx, buf_len, sub( ImagBuffer_e, output_e ) );

    cmplx_matrix_square_fx( real_in_buffer_fx, imag_in_buffer_fx, num_bands, nchan_in, real_buffer_fx, imag_buffer_fx, output_e, &output_e );
    v_add_fixed_me( cx_fx, *cx_e, real_buffer_fx, output_e, cx_fx, &tmp1_e, imult1616( nchan_in, nchan_in ), 1 );

    v_add_fixed_me( cx_imag_fx, *cx_imag_e, imag_buffer_fx, output_e, cx_imag_fx, &tmp2_e, imult1616( nchan_in, nchan_in ), 1 );
    cx_init_e = tmp1_e;
    move16();
    cx_init_imag_e = tmp2_e;
    move16();

    // normalizing both the matrices to a common exponent for a better precision
    tmp1 = 0;
    move16();
    tmp2 = 0;
    move16();

    FOR( j = 0; j < PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS; j++ )
    {
        L_tmp = BASOP_Util_Add_Mant32Exp( cx_fx[j], cx_init_e, 0, 0, &tmp1_e );
        L_tmp = BASOP_Util_Add_Mant32Exp( cx_imag_fx[j], cx_init_imag_e, 0, 0, &tmp2_e );
        tmp1 = s_max( tmp1, tmp1_e );
        tmp2 = s_max( tmp2, tmp2_e );
    }
    cx_fx_norm = L_norm_arr( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
    cx_imag_fx_norm = L_norm_arr( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );

    FOR( j = 0; j < PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS; j++ )
    {
        L_tmp = BASOP_Util_Add_Mant32Exp( cx_fx[j], cx_init_e, 0, 0, &tmp1_e );
        cx_fx[j] = L_shr( L_tmp, sub( tmp1, tmp1_e ) ); // Q(31-tmp1)
        move32();
        L_tmp = BASOP_Util_Add_Mant32Exp( cx_imag_fx[j], cx_init_imag_e, 0, 0, &tmp2_e );
        cx_imag_fx[j] = L_shr( L_tmp, sub( tmp2, tmp2_e ) ); // Q(31-tmp2)
        move32();
    }
    scale_sig32( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_fx_norm );
    scale_sig32( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_imag_fx_norm );

    *cx_e = tmp1;
    *cx_e = sub( tmp1_e, cx_fx_norm );
    move16();
    *cx_imag_e = tmp2;
    *cx_imag_e = sub( tmp2_e, cx_imag_fx_norm );
    move16();

    return;
+67 −7
Original line number Diff line number Diff line
@@ -912,13 +912,55 @@ static void ApplyRotation_fx(
)
{
    Word16 ch;
    Word16 temp_exp;

    *d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x11 ), add( c_e, x11_e ), Mpy_32_32( s, x12 ), add( s_e, x12_e ), d_e ); /* exp(d_e) */
    move32();
    *g = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x12 ), add( c_e, x12_e ), Mpy_32_32( L_negate( s ), x11 ), add( s_e, x11_e ), g_e ); /* exp(g_e) */
    move32();

#ifdef SVD_WMOPS_OPT
    Word16 c_q = sub( 31, c_e );
    Word16 s_q = sub( 31, s_e );
    Word32 op1, op2;
    Word16 op_e;

    // Bring c and s to same Q
    IF( GT_16( c_q, s_q ) )
    {
        op1 = L_shr( c, sub( c_q, s_q ) );
        op2 = s;
        move32();
        op_e = s_q;
        move16();
    }
    ELSE
    {
        op1 = c;
        move32();
        op2 = L_shr( s, sub( s_q, c_q ) );
        op_e = c_q;
        move16();
    }
    op_e = add( op_e, 1 ); // 64 bit mac -> +1

    FOR( ch = 0; ch < nChannels; ch++ )
    {
        x11 = singularVector[ch][currentIndex2];
        move32();
        x12 = singularVector[ch][currentIndex1];
        move32();

        Word64 temp = W_mac_32_32( W_mult_32_32( op1, x11 ), op2, x12 ); // Q(singularVector) + op_e
        temp = W_shr( temp, op_e );                                      // Q(singularVector)
        singularVector[ch][currentIndex2] = W_sat_l( temp );             // Q(singularVector)
        move32();

        temp = W_mac_32_32( W_mult_32_32( op1, x12 ), L_negate( op2 ), x11 ); // Q(singularVector) + op_e
        temp = W_shr( temp, op_e );                                           // Q(singularVector)
        singularVector[ch][currentIndex1] = W_sat_l( temp );                  // Q(singularVector)
        move32();
    }
#else
#ifndef FIX_MINOR_SVD_WMOPS_MR1010X
    FOR( ch = 0; ch < nChannels; ch++ )
    {
@@ -952,6 +994,7 @@ static void ApplyRotation_fx(
        move32();
    }

#endif
#endif

    return;
@@ -1605,26 +1648,43 @@ static void singularVectorsAccumulationLeft_fx(
            t_ii = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxWithSign_fx( t_ii ), &temp_exp ); /* exp(1 + (temp_exp + tii_e)) */
            t_ii_e = add( 1, sub( temp_exp, t_ii_e ) );
#endif
            Word16 tempe;
            Word32 temp = BASOP_Util_Divide3232_Scale_cadence( t_ii, maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &tempe );
            tempe = add( tempe, sub( t_ii_e, singularVectors_Left_e[nCh][nCh] ) );
            // fprintf( fp, "%e\n", me2f( t_ii, t_ii_e ) );
            FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
            {
                norm_y = 0;
                move32();
                norm_y_e = 0;
                Word64 acc = 0;
                move64();
                Word64 prod[16];
                Word16 prod_e[16];
                Word16 max_e = -31;
                move16();
                FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */
                {
#ifndef FIX_1010_OPT_SINGLE_RESCALE
                    norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
#else
                    norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
                    prod[k] = W_mult0_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] );
                    prod_e[k] = add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] );
                    max_e = s_max( max_e, prod_e[k] );
#endif
                }
                t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e,

                FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */
                {
                    acc = W_add( acc, W_shr( prod[k], sub( max_e, prod_e[k] ) ) );
                }
                Word16 acc_e = W_norm( acc );
                acc = W_shl( acc, acc_e );

                norm_y = W_extract_h( acc );
                norm_y_e = add( sub( max_e, acc_e ), 1 );
                t_jj = Mpy_32_32( temp, norm_y );
#ifndef FIX_1010_OPT_SINGLE_RESCALE
                t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
#else
                t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) );
                t_jj_e = add( tempe, norm_y_e );
#endif
                FOR( k = nCh; k < nChannelsL; k++ ) /* nChannelsL */
                {
+46 −47

File changed.

Preview size limit exceeded, changes collapsed.