Commit f4725f6e authored by Jonas Svedberg's avatar Jonas Svedberg
Browse files

modularized msvq_enc stage1 dct code shoud be BE

parent 03d98545
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -158,7 +158,8 @@

#define ERI_FDCNGVQ_LOW_ROM                             /* Eri: Contribution #31 Table ROM saving for IVAS FDCNG-VQ modes       */

#define ERI_MSVQ_CLEANUP                                /* Eri:   BE modularization of msvq encoder side DCT c-code     */
#define ERI_MSVQ_CLEANUP                             /* Eri:   BE modularization, mainly for future BASOP of msvq encoder side DCT21&DCT24  c-code   */
                


/* ################## End DEVELOPMENT switches ######################### */
+138 −94
Original line number Diff line number Diff line
@@ -56,29 +56,46 @@ void dctT2_N_apply_matrix( const float *input, float *output, const int16_t dct_

#ifdef ERI_MSVQ_CLEANUP

  

int16_t msvq_stage1_dct_search(                              /* o  : (p_max , best candidate sofar )                                  */
                                const float *u,              /* i  : target                                   */
                                const int16_t N,             /* i  : target length and  IDCT synthesis length */
                                const int16_t maxC_st1, /* i  : number of candidates to provide */

                                float *invTrfMatrix, /* i/o: IDCT synthesis matrix for dim N          */
                                /* parameterization of segmented DCT domain storage  */
                                const int16_t maxC_st1,      /* i  : number of final stage 1 candidates to provide */

                                const DCTTYPE dcttype,       /* e.g. DCT_T2_16_XX, DCT_T2_24_XX; */
                                const int16_t max_dct_trunc, /* i:  maximum of truncation lenghts */
                                float *invTrfMatrix,         /* i : IDCT synthesis matrix for dim N          */

                                const float *midQ_truncQ,                 /* i: midQ  vector */
                                const float *dct_invScaleF,               /* i: global inv scale factors*/
                                const float *dct_scaleF,                  /* i: global scale factors*/
                                const Word16 n_segm,                      /* i: number of segments  */
                                const Word16 *cols_per_segment,           /* i: remaining length per segment  */
                                const Word16 *trunc_dct_cols_per_segment, /* i: trunc length per segment   */
                                const Word16 *entries_per_segment,        /* i: number of rows per segment */
                                const Word16 *cum_entries_per_segment,    /* i: number of cumulative entries  */

                                const Word8 *const W8Qx_dct_sections[],  /*i: Word8(byte) segment  table ptrs  */
                                const Word16 *col_syn_shift[],           /*i: columnwise  syn shift tables  */
                                const Word8 *segm_neighbour_fwd,         /*i: circular neighbour list fwd */
                                const Word8 *segm_neighbour_rev,         /*i: circular neighbour list reverse */
                                const Word16 npost_check,                /*i: number of neigbours to check , should be even */

                                float *st1_mse_ptr,                /*i: dynRAM buffer for MSEs  */
                                int16_t *indices_st1_local, /*   o:  selecetd cand indices */
                                int16_t *indices_st1_local,        /*o:  selected cand indices */
                                float *st1_syn_vec_ptr,            /*i/o:  buffer for IDCT24 synthesis  */
                                float *dist1_ptr            /*   o: resulting stage 1 MSEs in DCT24 domain */ 
                                float *dist1_ptr                   /*o:  resulting stage 1 MSEs in DCT-N domain */
)

{  /* stage 1 search in truncated dct domain without any weights  */
{ /* stage1 search in a segmentwise  truncated dctN  domain without  weights  */

    float dct_target[FDCNG_VQ_DCT_MAXTRUNC];
    float u_mr[FDCNG_VQ_MAX_LEN];
    float u_mr_scaled[FDCNG_VQ_MAX_LEN];
    float mse_trunc_segm[FDCNG_VQ_DCT_NSEGM];
    float tmp, check_mse;
    float mse; /* Word32 */
    float mse; /* Word32 in BASOP */

    int16_t p_max, c, c2, segm, j_full, j, i;
    int16_t n_ana, p_mins[2], idx_min[2];
@@ -89,82 +106,82 @@ int16_t msvq_stage1_dct_search( /* o : (p_max , best candi
    float *st1_mse_pair;
    int16_t *st1_idx_pair;

    DCTTYPE dcttype = DCT_T2_24_XX; 
    float tmp2;
    int16_t check_ind[FDCNG_VQ_DCT_NPOST];
    assert( ( npost_check % 2 == 0 ) && ( npost_check <= FDCNG_VQ_DCT_NPOST ) );

    n_ana = N; /*  VQ stage#1 core is currentlu always using stored DCT24 coeffs */
    assert( n_ana >= FDCNG_VQ_DCT_MAXTRUNC ); /* check for  FDCNGVQ  WB , SWB, FB operation  */
    assert( n_segm <= FDCNG_VQ_DCT_NSEGM );

    /*remove mean/mid fdcng stage#1 vector,  in original subband domain */
    v_sub( u, cdk1r_tr_midQ_truncQ, u_mr, n_ana );
    n_ana = N;                        /*  VQ stage#1 core is currently always using stored DCT N coeffs */
    assert( n_ana >= max_dct_trunc ); /* check for  FDCNGVQ  WB , SWB, FB operation  */

    v_multc( u_mr, fdcng_dct_invScaleF[1], u_mr_scaled, n_ana ); /*scale up target to upscaled  W8x storage  domain  */
    /* remove  mid  stage#1 vector,  in original  input  domain */
    v_sub( u, midQ_truncQ, u_mr, n_ana );

    v_multc( u_mr, dct_invScaleF[1], u_mr_scaled, n_ana ); /* scale up target to upscaled  W8x storage  domain  */
    /* 16.0-->scale up from Q0 to  search  domain  in Q4,  not really  needed in BASOP , impl. by shifts */

    dctT2_N_apply_matrix( (const float *) u_mr_scaled, dct_target, min( FDCNG_VQ_DCT_MAXTRUNC, n_ana ), n_ana, invTrfMatrix, FDCNG_VQ_DCT_MAXTRUNC, dcttype );
    dctT2_N_apply_matrix( (const float *) u_mr_scaled, dct_target, min( max_dct_trunc, n_ana ), n_ana, invTrfMatrix, max_dct_trunc, dcttype );

    mse = 0;
    /* init search state  ptr's  at the top */
    set_f( dist1_ptr, FLT_MAX, maxC_st1 );
    st1_mse_pair = &( dist1_ptr[0] );         /* req. ptr init +=2 */
    st1_idx_pair = &( indices_st1_local[0] ); /* req. ptr init +=2 */
    for ( segm = 0; segm < FDCNG_VQ_DCT_NSEGM; segm++ )

    for ( segm = 0; segm < n_segm; segm++ )
    {
        /*  point to a  new paired location for each segment  */
        st1_mse_pair += 2; /* req. ptr init +=2  */
        st1_idx_pair += 2; /* req.  ptr init     +=2 */
        p_max = 0;                                       /* req. to point to  1 or 0  */
        p_max = 0;         /* req. to point to one of 1 or 0 , this init  can potentially be omitted here as p_max is always 1 or 0 */

        /* compute segment common trunction error in dctN domain */
        mse_trunc_segm[segm] = 0;
        mse_trunc_segm[segm] += sum2_f( (const float *) ( &( dct_target[cdk1_ivas_cols_per_segment[segm]] ) ), cdk1_ivas_trunc_dct_cols_per_segment[segm] );
        mse_trunc_segm[segm] += sum2_f( (const float *) ( &( dct_target[cols_per_segment[segm]] ) ), trunc_dct_cols_per_segment[segm] );

        cbpW8 = cdk_37bits_ivas_stage1_W8Qx_dct_sections[segm]; /* Word8 column variable Qx storage*/
        cbpW8 = W8Qx_dct_sections[segm]; /* Word8 column variable Qx storage , table ptr init */

        for ( j = 0; j < cdk1_ivas_entries_per_segment[segm]; j++ )
        for ( j = 0; j < entries_per_segment[segm]; j++ )
        {
            /* unweighted segmented search DCT domain loop */
            j_full = j + cdk1_ivas_cum_entries_per_segment[segm]; /* or simply use j_full++ */
            j_full = j + cum_entries_per_segment[segm]; /* or simply use j_full++ */

            mse = mse_trunc_segm[segm]; /* init mse with with common mse truncation part, in  BASOP a move32() */

            dct_col_shift_tab = stage1_dct_col_syn_shift[segm]; /* ptr init */
            dct_col_shift_tab = col_syn_shift[segm]; /* ptr init */

            for ( c2 = 0; c2 < cdk1_ivas_cols_per_segment[segm]; c2++ )
            for ( c2 = 0; c2 < cols_per_segment[segm]; c2++ )
            {

#define WMC_TOOL_SKIP
                tmp = dct_target[c2] - (float) ( ( (Word16) cbpW8[c2] ) << dct_col_shift_tab[c2] ); /* Word8 storage MSE inner loop */
                LOGIC( 1 );
                SHIFT( 1 );
                ADD( 1 ); /* in BASOP:    s_and(for W8->W16), shl(), sub()*/
#undef WMC_TOOL_SKIP

                mse += tmp * tmp; /*  L_mac or L_mac0()   square Word16 -> Word32*/
            }
            st1_mse_ptr[j_full] = mse; /* save MSE in shared dynamic RAM,  move32() in BASOP */

#define WMC_TOOL_SKIP
            cbpW8 += cdk1_ivas_cols_per_segment[segm]; /*   fixed  pointer increment for each segment  */
            cbpW8 += cols_per_segment[segm]; /*   fixed  pointer increment for each segment  */
#undef WMC_TOOL_SKIP

            /* overwrite with a new worst index at p_max  */
#ifdef ERI_FDCNGVQ_LOW_ROM
            /* The three inner loop if's below are not really properly instrumented by WMC tool */
            /* a ptr to worst index will be in use */
#endif

            /* Note: The three inner loop if's below are not 100% properly instrumented by WMC tool */
            if ( mse < st1_mse_pair[p_max] ) /* L_sub  */
            {
                st1_idx_pair[p_max] = j_full; /* simplified */
                st1_idx_pair[p_max] = j_full; /* move16, single BASOP */
            }                                 /* BASOP 2 ops */

            if ( st1_idx_pair[p_max] == j_full ) /* simplified */
            {                                    /* idx updated to j_full -->  also update mse */
            if ( st1_idx_pair[p_max] == j_full )
            {                              /* idx updated  -->  also update mse */
                st1_mse_pair[p_max] = mse; /* move32(), single BASOP  */
            }                              /* BASOP 3 ops */

            /* avoid WC costly list management by always updating p_max,  as  we have only a pair to maintain */
            /* avoid WC costly candidate list management by always updating p_max, 
               as we have only a pair in each segment to maintain */
            p_max = 0;                                       /* move16() */
            if ( ( st1_mse_pair[0] - st1_mse_pair[1] ) < 0 ) /* L_sub()*/
            {
@@ -173,7 +190,7 @@ int16_t msvq_stage1_dct_search( /* o : (p_max , best candi

            /* Note: logical shift right not available in ANSI-C */
            /* p_max = (st1_mse_pair[0] - st1_mse_pair[1]) ">>>" 31; */
            /* in java logical shift right is available as  >>> ,  in BASOP  it is L_lshr */
            /* in java logical shift right is available as  >>> ,  in BASOP  it is available as L_lshr */

            /* Cost: weighted sum with cond moves ('if') => 8 in float ,   7 in BASOP with L_lshr  */
        } /* j in section */
@@ -184,10 +201,13 @@ int16_t msvq_stage1_dct_search( /* o : (p_max , best candi
    {
        /* compute_full mse using stored DCT24 domain  MSE's   */
        /* calculate MSE  from stage1 inner using existing  inner  DCT domain variables */
        dist1_ptr[j] *= fdcng_dct_scaleF[2]; /*  single multiplication to get the MSE scale to the correct input domain   */
        dist1_ptr[j] *= dct_scaleF[2]; /*  multiplication to get the DCT inner MSE scale  to the correct input domain   */
    }

    p_max = maximum( dist1_ptr, maxC_st1, NULL );  /* establish  current worst candidate for stage#2  among all  maxC_st1 candidates */
    assert( (maxC_st1 >= 3)  );
    assert( (maxC_st1 <= 8) );

    p_max = maximum( dist1_ptr, maxC_st1, NULL ); /* establish  current worst candidate for MSVQ stage#2  among all  maxC_st1 candidates so far */

    p_mins[0] = minimum( dist1_ptr, maxC_st1, NULL ); /* find best  entry among all maxC_pre   */
    tmp = dist1_ptr[p_mins[0]];
@@ -204,32 +224,32 @@ int16_t msvq_stage1_dct_search( /* o : (p_max , best candi
    idx_min[1] = indices_st1_local[p_mins[1]];


    /* use global exclusion list to never reselect  the two  (best)  MSE values sofar  */
    /* use global exclusion list to never reselect  the two  (best) global  MSE values sofar  */
    st1_mse_ptr[idx_min[0]] = FLT_MAX; /* move32() */
    st1_mse_ptr[idx_min[1]] = FLT_MAX; /* move32() */

    /* circular MSE-neigbour list in use to potentially replace some segment search candidates */
    /* using both 1st and 2nd best neighbours   in fwd and rev directions */
    check_ind[0] = cdk1_ivas_segm_neighbour_fwd[idx_min[0]];
    check_ind[1] = cdk1_ivas_segm_neighbour_rev[idx_min[0]];
    check_ind[0] = segm_neighbour_fwd[idx_min[0]];
    check_ind[1] = segm_neighbour_rev[idx_min[0]];

    check_ind[2] = cdk1_ivas_segm_neighbour_fwd[idx_min[1]];
    check_ind[3] = cdk1_ivas_segm_neighbour_rev[idx_min[1]];
    check_ind[2] = segm_neighbour_fwd[idx_min[1]];
    check_ind[3] = segm_neighbour_rev[idx_min[1]];

    check_ind[4] = cdk1_ivas_segm_neighbour_fwd[check_ind[0]];
    check_ind[5] = cdk1_ivas_segm_neighbour_rev[check_ind[1]];
    check_ind[4] = segm_neighbour_fwd[check_ind[0]];
    check_ind[5] = segm_neighbour_rev[check_ind[1]];

    check_ind[6] =                    cdk1_ivas_segm_neighbour_fwd[check_ind[2]];
    check_ind[FDCNG_VQ_DCT_NPOST-1] = cdk1_ivas_segm_neighbour_rev[check_ind[3]];
    check_ind[6] = segm_neighbour_fwd[check_ind[2]];
    check_ind[FDCNG_VQ_DCT_NPOST - 1] = segm_neighbour_rev[check_ind[3]];

    for ( i = 0; i < FDCNG_VQ_DCT_NPOST; i++ )
    for ( i = 0; i < npost_check; i++ )
    {
       /*   move MSE from search to synthesis domain */ 
        /*   move MSE from DCT-inner loop search  to  input synthesis domain */
        /*   multiplication by fdcng_dct_scaleF[2]   to get the float outer loop scale correct in IDCT synthesis domain  */
       check_mse = st1_mse_ptr[check_ind[i]] * fdcng_dct_scaleF[2];
        check_mse = st1_mse_ptr[check_ind[i]] * dct_scaleF[2];

        if ( check_mse < dist1_ptr[p_max] )
        {   /* new winner , replace */
        { /* new winner , replace worst */
            dist1_ptr[p_max]          = check_mse;
            indices_st1_local[p_max]  = check_ind[i];
            st1_mse_ptr[check_ind[i]] = FLT_MAX;           /* exclude,   BASOP: move32() */
@@ -238,22 +258,27 @@ int16_t msvq_stage1_dct_search( /* o : (p_max , best candi
    }

    /* extract the selected stage one vectors in DCT_N domain , apply IDCT_N and scale up */
    /* always extract full length signal(24) to be able to update WB( N==21) candidate MSE values */
    /* in the case that only a part of the IDCT vector is in final use                            */
    /* always extract full length signal(e.g. 24) to be able to update WB(e.g.  N_in==21) candidate MSE values */
    /* in the case that only a part of the IDCTN  vector is in final use    */

    /* synthesis not yet fully parameterized/generalized for other IDCT lengths */
    assert( N == 24 );
    {
        for ( c = 0; c < maxC_st1; c++ )
        {
            dec_FDCNG_MSVQ_stage1( indices_st1_local[c], N, invTrfMatrix, dcttype + 1, &( st1_syn_vec_ptr[c * N] ), NULL );
        }
    }

    return p_max;
    return p_max; /*ptr to worst performing candidate */
};


/* recalc MSE for WB(0..20) coeffs ,
/* recalc MSE for fdcng WB(0..20) coeffs ,
           essentially subtract res21^2 ,res22^2, res23^2 that was included in stage1  MSE in the DCT24 domain truncated search,
           excludes the waveform contributions at pos 21,22,23 to the MSE, important to keep the WB MSEs update for the subsequent stages
*/
int16_t msvq_stage1_dct_recalc_candidates_wb(                               /* o  : (updated p_max)                */
int16_t msvq_stage1_dct_recalc_candidates_fdcng_wb(                               /* o  : (updated p_max)                */
                                              const float *st1_syn_vec_ptr, /* i  : IDCT24 synthesis vectors       */
                                              const float *u,               /* i  : target   signal                */
                                              const int16_t maxC_st1,       /* i  : number of candidates in stage1 */
@@ -452,7 +477,26 @@ void msvq_enc(
            /* stage 1 candidates search in truncated dct24  domain without any weights  */
            assert( N == FDCNG_VQ_MAX_LEN );
            assert( maxC == 2 * FDCNG_VQ_DCT_NSEGM );
            p_max = msvq_stage1_dct_search( u, FDCNG_VQ_MAX_LEN, maxC, invTrfMatrix, st1_mse_ptr, indices_st1_local, st1_syn_vec_ptr,  dist[1] );
 
            p_max = msvq_stage1_dct_search( u, FDCNG_VQ_MAX_LEN, maxC, 
                                            DCT_T2_24_XX, 
                                            FDCNG_VQ_DCT_MAXTRUNC,
                                            invTrfMatrix,           /* i : IDCT synthesis matrix for dim N          */
                                            cdk1r_tr_midQ_truncQ,   /* i: midQ  vector */
                                            fdcng_dct_invScaleF,    /* i: global inv scale factors*/  
                                            fdcng_dct_scaleF,                     /* i: global scale factors*/
                                            FDCNG_VQ_DCT_NSEGM,                   /* i: number of segments  */
                                            cdk1_ivas_cols_per_segment,           /* i: remaining length per segment  */
                                            cdk1_ivas_trunc_dct_cols_per_segment, /* i: trunc length per segment   */
                                            cdk1_ivas_entries_per_segment,        /* i: number of rows per segment */
                                            cdk1_ivas_cum_entries_per_segment,    /* i: number of cumulative entries  */
                                            cdk_37bits_ivas_stage1_W8Qx_dct_sections, /*i: Word8(byte) segment  table ptrs  */
                                            stage1_dct_col_syn_shift,          /*i: columnwise  syn shift tables  */
                                            cdk1_ivas_segm_neighbour_fwd,      /*i: circular neighbour list fwd */
                                            cdk1_ivas_segm_neighbour_rev,      /*i: circular neighbour list reverse */
                                            FDCNG_VQ_DCT_NPOST,                /*i: number of circ. neigbours to post check  */
                                            st1_mse_ptr, indices_st1_local, st1_syn_vec_ptr, dist[1] );
 

            /*    move established stage#1  indices  to the global MSVQ list structure */
            for ( c = 0; c < maxC; c++ )
@@ -768,7 +812,7 @@ void msvq_enc(
#ifdef ERI_MSVQ_CLEANUP
        if ( s == 0 && applyDCT_flag != 0 && n == FDCNG_VQ_MAX_LEN_WB )
        {
            p_max = msvq_stage1_dct_recalc_candidates_wb( st1_syn_vec_ptr, u, maxC, dist[1] );
            p_max = msvq_stage1_dct_recalc_candidates_fdcng_wb( st1_syn_vec_ptr, u, maxC, dist[1] );
        }
#else
        if ( s == 0 && applyDCT_flag != 0 && n == FDCNG_VQ_MAX_LEN_WB )
+3 −3

File changed.

Contains only whitespace changes.