Commit 03d98545 authored by Jonas Svedberg's avatar Jonas Svedberg
Browse files

initial functional split within MSVQ encoder function

parent 25523cf8
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -158,6 +158,9 @@

#define ERI_FDCNGVQ_LOW_ROM                             /* Eri: Contribution #31 Table ROM saving for IVAS FDCNG-VQ modes       */

#define ERI_MSVQ_CLEANUP                                /* Eri:   BE modularization of msvq encoder side DCT c-code     */


/* ################## End DEVELOPMENT switches ######################### */
/* clang-format on */
#endif
+267 −7
Original line number Diff line number Diff line
@@ -53,6 +53,235 @@

#include "ivas_prot.h"
void dctT2_N_apply_matrix( const float *input, float *output, const int16_t dct_dim, int16_t fdcngvq_dim, const float *idctT2_24_X_matrixQ16, const int16_t matrix_1st_dim, DCTTYPE dcttype );

#ifdef ERI_MSVQ_CLEANUP

  

int16_t msvq_stage1_dct_search(                      /* o  : (p_max , best candidate sofar )                                  */
                                const float *u,      /* i  : target                                   */
                                const int16_t N,     /* i  : target length and  IDCT synthesis length */
                                const int16_t maxC_st1, /* i  : number of candidates to provide */

                                float *invTrfMatrix, /* i/o: IDCT synthesis matrix for dim N          */

                                float *st1_mse_ptr,         /* i  : dynRAM buffer for MSEs  */
                                int16_t *indices_st1_local, /*   o:  selecetd cand indices */
                                float *st1_syn_vec_ptr ,    /* i/o:  buffer for IDCT24 synthesis  */
                                float *dist1_ptr            /*   o: resulting stage 1 MSEs in DCT24 domain */ 
                                )   

{  /* stage 1 search in truncated dct domain without any weights  */

    float dct_target[FDCNG_VQ_DCT_MAXTRUNC];
    float u_mr[FDCNG_VQ_MAX_LEN];
    float u_mr_scaled[FDCNG_VQ_MAX_LEN];
    float mse_trunc_segm[FDCNG_VQ_DCT_NSEGM];
    float tmp, check_mse;
    float mse; /* Word32 */

    int16_t p_max,c,c2, segm, j_full,j,i ;
    int16_t n_ana, p_mins[2], idx_min[2];

    const Word8 *cbpW8;
    const Word16 *dct_col_shift_tab;

    float *st1_mse_pair;
    int16_t *st1_idx_pair; 

    DCTTYPE dcttype = DCT_T2_24_XX; 
    float tmp2;
    int16_t check_ind[FDCNG_VQ_DCT_NPOST];
   
    n_ana = N; /*  VQ stage#1 core is currentlu always using stored DCT24 coeffs */
    assert( n_ana >= FDCNG_VQ_DCT_MAXTRUNC ); /* check for  FDCNGVQ  WB , SWB, FB operation  */

    /*remove mean/mid fdcng stage#1 vector,  in original subband domain */
    v_sub( u, cdk1r_tr_midQ_truncQ, u_mr, n_ana );

    v_multc( u_mr, fdcng_dct_invScaleF[1], u_mr_scaled, n_ana ); /*scale up target to upscaled  W8x storage  domain  */
    /* 16.0-->scale up from Q0 to  search  domain  in Q4,  not really  needed in BASOP , impl. by shifts */

    dctT2_N_apply_matrix( (const float *) u_mr_scaled, dct_target, min( FDCNG_VQ_DCT_MAXTRUNC, n_ana ), n_ana, invTrfMatrix, FDCNG_VQ_DCT_MAXTRUNC, dcttype );

    mse = 0;
    /* init search state  ptr's  at the top */
    set_f( dist1_ptr, FLT_MAX, maxC_st1);   
    st1_mse_pair = &( dist1_ptr[0] );          /* req. ptr init +=2  */
    st1_idx_pair = &( indices_st1_local[0] );  /* req.  ptr init     +=2 */
    for ( segm = 0; segm < FDCNG_VQ_DCT_NSEGM; segm++ )
    {
        /*  point to a  new paired location for each segment  */
        st1_mse_pair += 2;                               /* req. ptr init +=2  */
        st1_idx_pair += 2 ;                              /* req.  ptr init     +=2 */
        p_max = 0;                                       /* req. to point to  1 or 0  */

        /* compute segment common trunction error in dctN domain */
        mse_trunc_segm[segm] = 0;
        mse_trunc_segm[segm] += sum2_f( (const float *) ( &( dct_target[cdk1_ivas_cols_per_segment[segm]] ) ), cdk1_ivas_trunc_dct_cols_per_segment[segm] );

        cbpW8 = cdk_37bits_ivas_stage1_W8Qx_dct_sections[segm]; /* Word8 column variable Qx storage*/

        for ( j = 0; j < cdk1_ivas_entries_per_segment[segm]; j++ )
        {
            /* unweighted segmented search DCT domain loop */
            j_full = j + cdk1_ivas_cum_entries_per_segment[segm]; /* or simply use j_full++ */

            mse = mse_trunc_segm[segm]; /* init mse with with common mse truncation part, in  BASOP a move32() */

            dct_col_shift_tab = stage1_dct_col_syn_shift[segm]; /* ptr init */

            for ( c2 = 0; c2 < cdk1_ivas_cols_per_segment[segm]; c2++ )
            {

#define WMC_TOOL_SKIP
                tmp = dct_target[c2] - (float) ( ( (Word16) cbpW8[c2] ) << dct_col_shift_tab[c2] ); /* Word8 storage MSE inner loop */
                LOGIC( 1 );
                SHIFT( 1 );
                ADD( 1 ); /* in BASOP:    s_and(for W8->W16), shl(), sub()*/
#undef WMC_TOOL_SKIP

                mse += tmp * tmp; /*  L_mac or L_mac0()   square Word16 -> Word32*/
            }
            st1_mse_ptr[j_full] = mse; /* save MSE in shared dynamic RAM,  move32() in BASOP */

#define WMC_TOOL_SKIP
            cbpW8 += cdk1_ivas_cols_per_segment[segm]; /*   fixed  pointer increment for each segment  */
#undef WMC_TOOL_SKIP

            /* overwrite with a new worst index at p_max  */
#ifdef ERI_FDCNGVQ_LOW_ROM
            /* The three inner loop if's below are not really properly instrumented by WMC tool */
            /* a ptr to worst index will be in use */
#endif
            if ( mse < st1_mse_pair[p_max] ) /* L_sub  */
            {
                st1_idx_pair[p_max] = j_full; /* simplified */
            }                                 /* BASOP 2 ops */

            if ( st1_idx_pair[p_max] == j_full ) /* simplified */
            {                                    /* idx updated to j_full -->  also update mse */
                st1_mse_pair[p_max] = mse;       /* move32(), single BASOP  */
            }                                    /* BASOP 3 ops */

            /* avoid WC costly list management by always updating p_max,  as  we have only a pair to maintain */
            p_max = 0;                                       /* move16() */
            if ( ( st1_mse_pair[0] - st1_mse_pair[1] ) < 0 ) /* L_sub()*/
            {
                p_max = 1; /*  move16() */
            }              /* BASOP 3 ops  ,Note  2 ops possible in BASOP with L_sub and  L_lshr  */

            /* Note: logical shift right not available in ANSI-C */
            /* p_max = (st1_mse_pair[0] - st1_mse_pair[1]) ">>>" 31; */
            /* in java logical shift right is available as  >>> ,  in BASOP  it is L_lshr */

            /* Cost: weighted sum with cond moves ('if') => 8 in float ,   7 in BASOP with L_lshr  */
        } /* j in section */

    } /* next segment */

    for ( j = 0; j < maxC_st1; j++ )
    {
        /* compute_full mse using stored DCT24 domain  MSE's   */
        /* calculate MSE  from stage1 inner using existing  inner  DCT domain variables */
        dist1_ptr[j] *= fdcng_dct_scaleF[2]; /*  single multiplication to get the MSE scale to the correct input domain   */
    }

    p_max = maximum( dist1_ptr, maxC_st1, NULL );  /* establish  current worst candidate for stage#2  among all  maxC_st1 candidates */

    p_mins[0] = minimum( dist1_ptr, maxC_st1, NULL ); /* find best  entry among all maxC_pre   */
    tmp = dist1_ptr[p_mins[0]];
    dist1_ptr[p_mins[0]] = FLT_MAX; /* exclude 1st */

    p_mins[1] = minimum( dist1_ptr, maxC_st1, NULL ); /* find 2nd best entry  */
    tmp2 = dist1_ptr[p_mins[1]];
    dist1_ptr[p_mins[1]] = FLT_MAX; /* exclude 2nd */ 

    dist1_ptr[p_mins[0]] = tmp;  /* restore 1st */
    dist1_ptr[p_mins[1]] = tmp2; /* restore 2nd */

    idx_min[0] = indices_st1_local[p_mins[0]];
    idx_min[1] = indices_st1_local[p_mins[1]];


    /* use global exclusion list to never reselect  the two  (best)  MSE values sofar  */
    st1_mse_ptr[idx_min[0]] = FLT_MAX; /* move32() */
    st1_mse_ptr[idx_min[1]] = FLT_MAX; /* move32() */

    /*  circular MSE-neigbour list in use to potentially replace some segment search candidates */
    /* using both 1st and 2nd best neighbours   in fwd and rev directions */
    check_ind[0] = cdk1_ivas_segm_neighbour_fwd[idx_min[0]];
    check_ind[1] = cdk1_ivas_segm_neighbour_rev[idx_min[0]];

    check_ind[2] = cdk1_ivas_segm_neighbour_fwd[idx_min[1]];
    check_ind[3] = cdk1_ivas_segm_neighbour_rev[idx_min[1]];

    check_ind[4] = cdk1_ivas_segm_neighbour_fwd[check_ind[0]];
    check_ind[5] = cdk1_ivas_segm_neighbour_rev[check_ind[1]];

    check_ind[6] =                    cdk1_ivas_segm_neighbour_fwd[check_ind[2]];
    check_ind[FDCNG_VQ_DCT_NPOST-1] = cdk1_ivas_segm_neighbour_rev[check_ind[3]];

    for ( i = 0; i < FDCNG_VQ_DCT_NPOST; i++ )
    {
       /*   move MSE from search to synthesis domain */ 
       /*   multiplication by fdcng_dct_scaleF[2]   to get the float outer loop scale correct in IDCT synthesis domain  */
       check_mse = st1_mse_ptr[check_ind[i]] * fdcng_dct_scaleF[2];
    
        if ( check_mse < dist1_ptr[p_max] )
        {   /* new winner , replace */
            dist1_ptr[p_max] = check_mse;
            indices_st1_local[p_max] = check_ind[i];
            st1_mse_ptr[check_ind[i]] = FLT_MAX;          /* exclude,   BASOP: move32() */
            p_max = maximum( dist1_ptr, maxC_st1, NULL ); /* establish a new  current worst candidate   among all maxC */
        }
    }

    /* extract the selected stage one vectors in DCT_N domain , apply IDCT_N and scale up */
    /* always extract full length signal(24) to be able to update WB( N==21) candidate MSE values */
    /* in the case that only a part of the IDCT vector is in final use                            */
    for ( c = 0; c < maxC_st1; c++ )
    {
        dec_FDCNG_MSVQ_stage1( indices_st1_local[c], N, invTrfMatrix, dcttype + 1, &( st1_syn_vec_ptr[c * N] ), NULL );
    }

    return p_max;
};


/* recalc MSE for WB(0..20) coeffs ,
           essentially subtract res21^2 ,res22^2, res23^2 that was included in stage1  MSE in the DCT24 domain truncated search,
           excludes the waveform contributions at pos 21,22,23 to the MSE, important to keep the WB MSEs update for the subsequent stages
*/
int16_t msvq_stage1_dct_recalc_candidates_wb(                               /* o  : (updated p_max)                */
                                              const float *st1_syn_vec_ptr, /* i  : IDCT24 synthesis vectors       */
                                              const float *u,               /* i  : target   signal                */
                                              const int16_t maxC_st1,       /* i  : number of candidates in stage1 */
                                              float *dist_ptr               /* i/o: updated  MSE vector for stage1 */
)
{
    int16_t p_max_local, c;
    const float *p2;
    float res24, high_diff[FDCNG_VQ_MAX_LEN - FDCNG_VQ_MAX_LEN_WB];

    for ( c = 0; c < maxC_st1; c++ )
    {                                                                                          /* point to extended  synthesis part */
        p2 = (const float *) &( st1_syn_vec_ptr[c * FDCNG_VQ_MAX_LEN + FDCNG_VQ_MAX_LEN_WB] ); /* ptr init to synthesis candidate c */
        /* for stage#1 use "u" instead of the shortened resid[0], to access the extended/extrapolated  input target */
        v_sub( p2, &( u[FDCNG_VQ_MAX_LEN_WB] ), high_diff, FDCNG_VQ_MAX_LEN - FDCNG_VQ_MAX_LEN_WB );
        res24 = dotp( high_diff, high_diff, FDCNG_VQ_MAX_LEN - FDCNG_VQ_MAX_LEN_WB ); /* sum squared over top  env. values above WB coeffs */

        dist_ptr[c] -= res24; /* remove DCT24 high band error contribution */
    }

    /* finally update p_max,  as it may potentially change, 
       due to the core DCT24 search originally optimizing over the longer basis vectors than DCT21 */
    p_max_local = maximum( dist_ptr, maxC_st1, NULL );

    return p_max_local;
};
#endif

#endif

/*--------------------------------------------------------------------------*
@@ -87,9 +316,8 @@ void msvq_enc(
    float resid_buf[2 * LSFMBEST_MAX * M_MAX], dist_buf[2 * LSFMBEST_MAX], Tmp[M_MAX];
    int16_t idx_buf[2 * LSFMBEST_MAX * MAX_VQ_STAGES_USED], parents[LSFMBEST_MAX];
    int16_t n, maxn, start;


#ifdef ERI_FDCNGVQ_LOW_ROM
#ifndef ERI_MSVQ_CLEANUP  
    /* buffers */
    float dct_target[FDCNG_VQ_DCT_MAXTRUNC];
    float u_mr[FDCNG_VQ_MAX_LEN];
@@ -110,20 +338,28 @@ void msvq_enc(

    int16_t check_ind[FDCNG_VQ_DCT_NPOST];
    int16_t segm, j_full, maxC_pre;
    float *st1_syn_vec_ptr; /* 8* 24 floats in dynRAM */
    float *st1_mse_ptr;     /* 2^¨7 == 128 floats in existing dRAM used for stage 1 candidate analysis,  128  Word32 in BASOP  */
 #endif 
    float *st1_syn_vec_ptr; /* ptr to buffer in dynRAM */
    float *st1_mse_ptr;     /* ptr to  buffer in existing dRAM used for stage 1 candidate analysis */
    #ifdef ERI_MSVQ_CLEANUP
    int16_t indices_st1_local[FDCNG_VQ_DCT_NSEGM * 2]; /* after stage#1 DCT search  this is copied to the global  indices[1][s*stages] structure */
#else 
    float res24, high_diff[FDCNG_VQ_MAX_LEN - FDCNG_VQ_MAX_LEN_WB];

    maxC_pre = ( FDCNG_VQ_DCT_NSEGM * 2 );
#endif
    assert( maxC <= LSFMBEST_MAX );
    assert( ( LSFMBEST_MAX * M_MAX ) > ( N * maxC ) );
    /*   top of resid_buf  is   resid[1]  and used for stage#1 residuals (input target u),
         we here reuse  resid[0] part of the buffer for stage#1 DCT dynamic RAM needs
    */
    st1_mse_ptr = &( resid_buf[1 * LSFMBEST_MAX * M_MAX] ) - ( levels[0] );               /* reuse top of residual resid[0] scratch RAM for stage1 MSEs */

    st1_syn_vec_ptr = &( resid_buf[1 * LSFMBEST_MAX * M_MAX] ) - FDCNG_VQ_MAX_LEN * maxC; /*   reuse top of resid[0] scratch RAM for residual */

#ifndef ERI_MSVQ_CLEANUP
    dcttype = DCT_T2_24_XX;
#endif 

#endif

    /*----------------------------------------------------------------*
@@ -209,6 +445,22 @@ void msvq_enc(
        }

#ifdef ERI_FDCNGVQ_LOW_ROM

#ifdef ERI_MSVQ_CLEANUP
        if ( !s && applyDCT_flag != 0 ) /* means: m==1 */
        {                               
            /* stage 1 candidates search in truncated dct24  domain without any weights  */
            assert( N == FDCNG_VQ_MAX_LEN );  
            assert( maxC == 2 * FDCNG_VQ_DCT_NSEGM );
            p_max = msvq_stage1_dct_search( u, FDCNG_VQ_MAX_LEN, maxC, invTrfMatrix, st1_mse_ptr, indices_st1_local, st1_syn_vec_ptr,  dist[1] );

            /*    move established stage#1  indices  to the global MSVQ list structure */
            for ( c = 0; c < maxC; c++ )
            {
                indices[1][c * stages] = indices_st1_local[c]; 
            }
        }
#else
        if ( !s && applyDCT_flag != 0 ) /* means: m==1 */
        {                               /* stage 1 search in truncated dct domain without any weights  */

@@ -370,6 +622,7 @@ void msvq_enc(

            assert( maxC == maxC_pre );
        }
#endif
        else
        /* non-DCT Stage #1 code below */
#endif
@@ -512,6 +765,12 @@ void msvq_enc(
           essentially subtract res21^2 ,res22^2, res23^2 that was included in stage1  MSE in the DCT24 domain truncated search,
           excludes the waveform contributions at pos 21,22,23 to the MSE, important to keep WB MSEs update for the subsequent stages
           */
#ifdef ERI_MSVQ_CLEANUP
        if ( s == 0 && applyDCT_flag != 0 && n == FDCNG_VQ_MAX_LEN_WB )
        {
            p_max = msvq_stage1_dct_recalc_candidates_wb( st1_syn_vec_ptr, u, maxC, dist[1] );
        }
#else
        if ( s == 0 && applyDCT_flag != 0 && n == FDCNG_VQ_MAX_LEN_WB )
        {
            assert( start == 0 );
@@ -527,6 +786,7 @@ void msvq_enc(
            /* update p_max,  as it may potentially change, due to the core DCT24 search originally optimizing over longer basis vectors than 21 */
            p_max = maximum( dist[1], maxC, NULL );
        }
#endif
#endif
        m = maxC;
    } /* for (m=1, s=0; s<stages; s++) */