Commit d9628e4d authored by Sandesh Venkatesh's avatar Sandesh Venkatesh
Browse files

Merge branch 'ism_path_opt_nbe_2' into 'main'

ISM decoder path optimizations - 2

See merge request !1900
parents 5eb18d8b 0b34b364
Loading
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -82,6 +82,7 @@
#define OPT_MCT_ENC_V2_BE
#define OPT_MCH_DEC_V1_NBE
#define OPT_MASA_DEC_V1_NBE
#define OPT_MASA_DEC_V2_NBE
#define OPT_MCT_ENC_48KB_NBE
#define OPT_MCH_DEC_V1_BE
#define OPT_MCT_ENC_V2_NBE
+25 −2
Original line number Diff line number Diff line
@@ -604,11 +604,18 @@ static void GenerateITD_fx(
        }
        ELSE
        {
#ifdef OPT_MASA_DEC_V2_NBE
            Word16 temp_e = add( imult1616( EvIdx[p], model->azimDim3 ), elev_offset );
#endif /* OPT_MASA_DEC_V2_NBE */
            FOR( i = 0; i < num_az_idx; i++ )
            {
                modelEval->BM_ITD_fx[qp + i] = L_shl( Mpy_32_32( modelEval->elevBfVecITD_fx[p], modelEval->azimBfVecITD_fx[i] ), 1 ); // Q30
                move32();
#ifdef OPT_MASA_DEC_V2_NBE
                BM_idx[qp + i] = add( temp_e, AzIdx[i] );
#else  /* OPT_MASA_DEC_V2_NBE */
                BM_idx[qp + i] = add( add( imult1616( EvIdx[p], model->azimDim3 ), elev_offset ), AzIdx[i] );
#endif /* OPT_MASA_DEC_V2_NBE */
                move16();
            }
            qp = add( qp, num_az_idx );
@@ -623,19 +630,35 @@ static void GenerateITD_fx(
    }

    /* Matrix multiplcation (row x column) */
#ifndef OPT_MASA_DEC_V2_NBE
    modelEval->itdMod_fx = 0;
    move16();
    itdMod_e = 0;
    move16();
#else  /* OPT_MASA_DEC_V2_NBE */
    Word64 temp = 0;
    move64();
    Word16 res_e = add( model->W_e, 1 );
#endif /* OPT_MASA_DEC_V2_NBE */
    FOR( i = 0; i < qp; i++ )
    {
        Word16 tmp_e;
        index = BM_idx[i];
        move32();
#ifndef OPT_MASA_DEC_V2_NBE
        Word16 tmp_e;
        modelEval->itdMod_fx = BASOP_Util_Add_Mant32Exp( modelEval->itdMod_fx, itdMod_e, Mpy_32_32( modelEval->BM_ITD_fx[i], model->W_fx[index] ), add( model->W_e, 1 ), &tmp_e );
        itdMod_e = tmp_e;
        move16();
#else  /* OPT_MASA_DEC_V2_NBE */
        temp = W_add( temp, Mpy_32_32( modelEval->BM_ITD_fx[i], model->W_fx[index] ) );
#endif /* OPT_MASA_DEC_V2_NBE */
    }
#ifdef OPT_MASA_DEC_V2_NBE
    itdMod_e = W_norm( temp );
    itdMod_e = sub( itdMod_e, 32 );
    modelEval->itdMod_fx = W_shl_sat_l( temp, itdMod_e );
    itdMod_e = sub( res_e, itdMod_e );
#endif /* OPT_MASA_DEC_V2_NBE */

    Word32 tmp32 = Mpy_32_16_1( modelEval->itdMod_fx, model->resamp_factor_fx ); // Q = 31 - ( itdMod_e + 1 )
    Word16 tmp_q = sub( 30, itdMod_e );
+26 −2
Original line number Diff line number Diff line
@@ -191,6 +191,9 @@ static void sincResample_fx(
    Word16 t_step_e;
    Word32 t_frac_fx;
    Word16 t_frac_e;
#ifdef OPT_MASA_DEC_V2_NBE
    Word64 t_frac_fx_acc;
#endif               /* OPT_MASA_DEC_V2_NBE */
    Word64 tmp64_fx; // Qx + 32
    const Word32 *p_mid_fx;
    const Word32 *p_forward_fx;
@@ -212,6 +215,10 @@ static void sincResample_fx(

    /* Compute fractional time step */
    t_step_fx = L_deposit_h( BASOP_Util_Divide1616_Scale( length_in, length_out, &t_step_e ) ); // exp(t_step_e)
#ifdef OPT_MASA_DEC_V2_NBE
    t_frac_fx_acc = 0;
    move64();
#endif /* OPT_MASA_DEC_V2_NBE */
    t_frac_fx = 0;
    move32();
    t_frac_e = 0;
@@ -224,13 +231,21 @@ static void sincResample_fx(
        t = extract_l( L_shr( t_frac_plus_eps, sub( 31, t_frac_plus_eps_e ) ) );                                  // Q0

        /* Calculate the sinc-index for the center value of the sinc */
        Word32 center_val;
        Word16 center_val_e;
#ifndef OPT_MASA_DEC_V2_NBE
        Word32 center_val;
        center_val = BASOP_Util_Add_Mant32Exp( t_frac_plus_eps, t_frac_plus_eps_e, L_negate( L_deposit_h( t ) ), 15, &center_val_e ); // exp(center_val_e)
        center_val_e = add( center_val_e, 6 );                                                                                        // center_val * SFX_SPAT_BIN_NUM_SUBSAMPLES (i.e. 64)
        center_val = BASOP_Util_Add_Mant32Exp( center_val, center_val_e, ONE_IN_Q29, 1, &center_val_e );                              // exp(center_val_e)
        snc0 = extract_l( L_shr( center_val, sub( 31, center_val_e ) ) );                                                             // Q0

#else                                                                                                                                 /* OPT_MASA_DEC_V2_NBE */
        Word64 center_val;
        center_val = W_sub( t_frac_plus_eps, W_shl( t, sub( 31, t_frac_plus_eps_e ) ) ); // exp(center_val_e)
        center_val_e = add( t_frac_plus_eps_e, 6 );
        Word16 com_e = s_max( 0, center_val_e );
        center_val = W_add( W_shr( center_val, sub( com_e, center_val_e ) ), W_shl( 1, sub( 30, com_e ) ) ); // exp(center_val_e)
        snc0 = extract_l( W_shl_sat_l( center_val, sub( com_e, 31 ) ) );
#endif                                                                                                                                /* OPT_MASA_DEC_V2_NBE */
        /* Run convolution forward and backward from mid point */
        p_mid_fx = input_fx + t;                                                      // Qx
        p_forward_fx = p_mid_fx + 1;                                                  // Qx
@@ -255,7 +270,16 @@ static void sincResample_fx(
        move32();

        /* Advance fractional time */
#ifndef OPT_MASA_DEC_V2_NBE
        t_frac_fx = BASOP_Util_Add_Mant32Exp( t_frac_fx, t_frac_e, t_step_fx, t_step_e, &t_frac_e ); // exp( t_frac_fx )
#else                                                                                                /* OPT_MASA_DEC_V2_NBE */
        t_frac_fx_acc = W_add( t_frac_fx_acc, t_step_fx ); // t_step_e
        Word16 hdrm = W_norm( t_frac_fx_acc );
        hdrm = sub( hdrm, 32 );
        t_frac_fx = W_shl_sat_l( t_frac_fx_acc, hdrm );
        t_frac_e = sub( t_step_e, hdrm );
        move16();
#endif                                                                                               /* OPT_MASA_DEC_V2_NBE */
    }

    return;