Commit 25bac608 authored by kinuthia's avatar kinuthia
Browse files

ROM optimization for FD-CNG first stage MSVQ

- under define ERI_FDCNGVQ_LOW_ROM
- inactive define ERI_FDCNGVQ_LOW_ROM_TESTING included for comparing with baseline
parent 589bd13c
Loading
Loading
Loading
Loading
+23 −0
Original line number Diff line number Diff line
@@ -755,7 +755,11 @@ typedef enum
#define STEP_SID                            5.25f                   /* CNG & DTX - CNG energy quantization step */

#define MIN_ACT_CNG_UPD                     20                      /* DTX - Minimum number of consecutive active frames for CNG mode update */
#ifdef ERI_FDCNGVQ_LOW_ROM_TESTING
#define FIXED_SID_RATE                      2                       /* increase DTX SID rate for better FDCNG VQ testing  */
#else
#define FIXED_SID_RATE                      8                       /* DTX SID rate */
#endif

#define TOTALNOISE_HIST_SIZE                4

@@ -1391,6 +1395,25 @@ enum
#define NPARTCLDFB                          10
#define NPART_SHAPING                       62

#ifdef ERI_FDCNGVQ_LOW_ROM
#define FDCNG_VQ_MAX_LEN        FD_CNG_maxN_37bits   
#define FDCNG_VQ_DCT_NSEGM      4   
#define FDCNG_VQ_DCT_MINTRUNC   8   
#define FDCNG_VQ_DCT_MAXTRUNC   18  
#define FDCNG_VQ_MAX_LEN_WB     21 

#define FDCNG_VQ_DCT_NPOST   8   

typedef enum _DCTTYPE
{
    DCT_T2_24_XX = 0, /* truncated DCT_T2_24 */
    IDCT_T2_XX_24 = 1,
    DCT_T2_21_XX = 2, /* truncated DCT_T2_21 */
    IDCT_T2_XX_21 = 3
} DCTTYPE;

#endif 

#define MSSUBFRLEN                          12
#define MSNUMSUBFR                          6
#define MSBUFLEN                            5
+4 −0
Original line number Diff line number Diff line
@@ -83,7 +83,11 @@ static inline ivas_error ivas_error_wrapper( const ivas_error error_code, const
    va_end( args );

    fprintf( stderr, "\n\nIn function: %s(), %s:%d\n\n", function, file, line );
#ifdef ERI_FDCNGVQ_LOW_ROM_TESTING
    assert( 0 );
#endif
    // assert( 0 );

    return error_code;
}
#else
+365 −2
Original line number Diff line number Diff line
@@ -2027,6 +2027,69 @@ int16_t tcxlpc_get_cdk(
    return cdk;
}

#ifdef ERI_FDCNGVQ_LOW_ROM
void dec_FDCNG_MSVQ_stage1(
    int16_t j_full,            /* i:   index full range           */
    int16_t n,                 /* i:   dimension to generate      */
    const float *invTrfMatrix, /* i:   matrix for synthesis */
    const DCTTYPE idcttype,    /* i:  specify which IDCT */
    float *uq,                 /* o:   synthesized stage1 vector  */
    Word16 *uq_ind             /* o:  synthesized stage1 vector in BASOP */
)
{
    int16_t col, segm_ind, j;
    float dct_vec[FDCNG_VQ_MAX_LEN];
    float idct_vec[FDCNG_VQ_MAX_LEN];
    const Word8 *cbpW8;
    const Word16 *dct_col_shift_tab;

    assert( n <= FDCNG_VQ_MAX_LEN );
    assert( n >= FDCNG_VQ_DCT_MINTRUNC );

    segm_ind = 0;
    for ( col = 1; col <= FDCNG_VQ_DCT_NSEGM; col++ )
    {
        if ( j_full >= cdk1_ivas_cum_entries_per_segment[col] )
        {
            segm_ind++;
        }
    }

    j = j_full - cdk1_ivas_cum_entries_per_segment[segm_ind]; /* j  is the local segment index */

    assert( j < cdk1_ivas_entries_per_segment[segm_ind] );

    /* Word8 column variable Qx storage*/
    cbpW8 = cdk_37bits_ivas_stage1_W8Qx_dct_sections[segm_ind]; /*   Word8 storage fixed  ptr_init */
    cbpW8 += j * cdk1_ivas_cols_per_segment[segm_ind];          /*    adaptive ptr init */
    dct_col_shift_tab = stage1_dct_col_syn_shift[segm_ind];

    for ( col = 0; col < cdk1_ivas_cols_per_segment[segm_ind]; col++ )
    {
#if 1
        dct_vec[col] = (float) ( ( (Word16) cbpW8[col] ) << dct_col_shift_tab[col] );
#else
#define WMC_TOOL_MAN
        dct_vec[col] = (float) ( ( (Word16) cbpW8[col] ) << dct_col_shift_tab[col] );
        LOGIC( 1 );
        SHIFT( 1 );
        ADD( 1 ); /* in BASOP:   s_and(for W8->W16), shl(), sub()*/
#undef WMC_TOOL_MAN
#endif
    }
    dctT2_N_apply_matrix( (const float *) dct_vec, idct_vec, cdk1_ivas_cols_per_segment[segm_ind], n, invTrfMatrix, FDCNG_VQ_DCT_MAXTRUNC, idcttype );

    /*scale down to original fdcngvq domain and move to Q0  */
    v_multc( idct_vec, fdcng_dct_scaleF[1], idct_vec, n );
    /* fdcng_dct_scaleF[1] --> 0.0625-->scale down from  search Q4 domain  to Q0 ,  not really  relevant for  BASOP loop */

    /*add common  mid fdcng vector,  in fdcng bands domain */
    v_add( idct_vec, cdk1r_tr_midQ_truncQ, uq, n );
    assert( uq_ind == NULL );
}
#endif


/*--------------------------------------------------------------------------*
 * msvq_dec()
 *
@@ -2042,6 +2105,10 @@ void msvq_dec(
    const int16_t N,        /* i  : Vector dimension                                     */
    const int16_t maxN,     /* i  : Codebook dimension                                   */
    const int16_t Idx[],    /* i  : Indices                                              */
#ifdef ERI_FDCNGVQ_LOW_ROM
    const int16_t applyIDCT_flag, /* i  : applyIDCT flag                                 */
    const float *invTrfMatrix,    /* i:   matrix for synthesis                          */
#endif
    float *uq,     /* o  : quantized vector                                           */
    Word16 *uq_ind /* o  : quantized vector (fixed point)                       */
)
@@ -2079,9 +2146,32 @@ void msvq_dec(
            start = 0;
        }

#ifdef ERI_FDCNGVQ_LOW_ROM
        if ( i == 0 && applyIDCT_flag != 0 )
        {
            assert( start == 0 );
            dec_FDCNG_MSVQ_stage1( Idx[0], N, invTrfMatrix, IDCT_T2_XX_24, uq, uq_ind ); /* IDCT24 used for all synthesis  */
        }
        else
        {
            v_add( uq + start, cb[i] + Idx[i] * maxn, uq + start, n );
        }

#define WMC_TOOL_SKIP
        IF( uq_ind != NULL )
        {
            FOR( j = 0; j < n; ++j )
            {
                move16();
                uq_ind[start + j] = add( uq_ind[start + j], (Word16) ( cb[i][Idx[i] * maxn + j] * 2.0f * 1.28f ) );
            }
        }
#undef WMC_TOOL_SKIP
#else

        v_add( uq + start, cb[i] + Idx[i] * maxn, uq + start, n );

#define WMC_TOOL_SKIP
        IF( uq_ind != NULL )
        {
            FOR( j = 0; j < n; ++j )
@@ -2091,6 +2181,7 @@ void msvq_dec(
            }
        }
#undef WMC_TOOL_SKIP
#endif
    }

    return;
@@ -2358,3 +2449,275 @@ void a2isf(

    return;
}

#ifdef ERI_FDCNGVQ_LOW_ROM
/*-------------------------------------------------------------------*
 * dctT2_N_apply_matrix()
 *
 * dct/idct    matrix application loop for a fixed DCT basis vector length of N
 *-------------------------------------------------------------------*/
void dctT2_N_apply_matrix(
    const float *input,
    float *output,
    const int16_t dct_dim,
    int16_t fdcngvq_dim,
    const float *matrix,
    const int16_t matrix_row_dim,
    DCTTYPE dcttype )
{
    int16_t i, j, dim_in, dim_out;
    int16_t mat_step_col, mat_step_row, mat_step_col_flag;
    const float *pt_x, *pt_A;
    float tmp_y[FDCNG_VQ_MAX_LEN];
    float *pt_y;

#ifdef ERI_FDCNGVQ_LOW_ROM_TESTING
    {
        char tmp_str[1024];
        sprintf( tmp_str, "dctT2_%d_apply_mat", fdcngvq_dim );
        push_wmops( tmp_str );
    }
#endif

    /* [optionally]  non-square DCT_N and IDCT_N matrix application,
       using a  stored format of  an IDCT_Nx(FDCNG_VQ_DCT_MAXTRUNC)   matrix  */
    /* effciently parallelized in SIMD */

    assert( dct_dim <= FDCNG_VQ_DCT_MAXTRUNC );
    assert( fdcngvq_dim <= FDCNG_VQ_MAX_LEN );

    if ( ( dcttype & 1 ) == 0 ) /* even entries are DCTs */
    {
        /*  DCT_typeII   24,21  -> XX   in worst case  */
        dim_in = fdcngvq_dim;
        dim_out = dct_dim;
        mat_step_col = matrix_row_dim; /* matrix maximum storage size dependent,  width of first row in matrix  */
        mat_step_row = 0;
        mat_step_col_flag = 1;
        assert( dcttype == DCT_T2_21_XX || dcttype == DCT_T2_24_XX );
    }
    else
    {
        assert( ( dcttype & 1 ) != 0 ); /* idct */
        dim_in = dct_dim;
        dim_out = fdcngvq_dim;
        mat_step_col = 1;
        mat_step_row = matrix_row_dim;
        mat_step_col_flag = 0;
        assert( dcttype == IDCT_T2_XX_24 );
    }

    pt_y = tmp_y;
    for ( i = 0; i < dim_out; i++ )
    {
        pt_x = input;

        *pt_y = 0;

        /* +i(DCT)  or   +i*maxTrunc(IDCT) */
#define WMC_TOOL_SKIP
        pt_A = &( matrix[i * ( mat_step_row + mat_step_col_flag )] ); /* ptr indexing */
        PTR_INIT( 1 );
#undef WMC_TOOL_SKIP
        for ( j = 0; j < dim_in; j++ )
        {
#define WMC_TOOL_SKIP
            *pt_y += ( *pt_x++ ) * ( *pt_A );
            pt_A += mat_step_col; /* step +maxtrunc   or  1 */ /* ptr indexing*/
            MAC( 1 );
#undef WMC_TOOL_SKIP
        }
        pt_y++;
    }
    mvr2r( tmp_y, output, dim_out );
#ifdef ERI_FDCNGVQ_LOW_ROM_TESTING
    {
        pop_wmops();
    }
#endif
}


/*-------------------------------------------------------------------*
 * extend_dctN_input()
 *
 * (inputN, dctN) -> idct(N_ext)    idct_N matrix application loop for
 *          extending, extrapolating  a  DCT basis vector length of N to N_ext
 *-------------------------------------------------------------------*/

void extend_dctN_input(
    const float *input,     /* i:  input in fdcng domain         */
    const float *dct_input, /* i:  input in dctN(fdcng) domain   */
    const int16_t in_dim,   /* i:  in_dim==N */
    float *ext_sig,         /* o:  extended output in fdcng domain  */
    const int16_t out_dim,  /* i:  output total dim   */
    float *matrix,          /* i: idct synthesis matrix of  size  N rows ,  n_cols  columns*/
    const int16_t n_cols,   /* i: number of columns ==  truncation length */
    DCTTYPE dcttype )       /* i: matrix operation type    */
{
    int16_t i, j, i_rev;

    const float( *ptr )[FDCNG_VQ_DCT_MAXTRUNC] = (void *) matrix;

#ifdef ERI_FDCNGVQ_LOW_ROM_TESTING
    {
        char tmp_str[1024];
        sprintf( tmp_str, "extend_dct%d_input", in_dim );
        push_wmops( tmp_str );
    }
#endif


    /*   stored format of  an IDCT_Nx(FDCNG_VQ_DCT_MAXTRUNC)   matrix  */
    assert( in_dim < FDCNG_VQ_MAX_LEN );
    assert( out_dim <= FDCNG_VQ_MAX_LEN );
    assert( out_dim > in_dim );
    assert( n_cols == FDCNG_VQ_DCT_MAXTRUNC ); /* for *ptr[MAX_TRUNC] adressing*/


    assert( ( dcttype & 1 ) != 0 ); /* idct tables in use for this basis vector extension */

    mvr2r( input, ext_sig, in_dim ); /* copy initial part,  i.e.  only   last/tail  parts are updated */

    set_f( &( ext_sig[in_dim] ), 0.0, out_dim - in_dim );

    i_rev = in_dim; /*ptr init*/
    for ( i = in_dim; i < out_dim; i++ )
    { /* for each extension sample */
        /*   i  = 21  22  23;
          i_rev = 20  19  18;    for odd dctII simply reflect basis vector
         */
        i_rev--;

        for ( j = 0; j < n_cols; j++ ) /* for each available  DCT coeff */
        {
            /* DCTcoeff * reflected basis vector */
#define WMC_TOOL_SKIP
            /* these are pure ptr operations */
            ext_sig[i] += dct_input[j] * ptr[i_rev][j]; /* sum up scaled and extended basis vector */
            MAC( 1 );
#undef WMC_TOOL_SKIP
        }
    }
#ifdef ERI_FDCNGVQ_LOW_ROM_TESTING
    {
        pop_wmops();
    }
#endif
}


/* inititate idct24 FDCNG_VQ_DCT_MAXTRUNCx N matrix in   RAM  from a Word8+Word16 quantized compressed ROM format */
void create_IDCT_N_Matrix( float *inv_matrixFloatQ, const int16_t N, const int16_t n_cols, const int16_t alloc_size )
{
    int16_t c, c1, r, r_flip, W16_val;
    int16_t len; /* <=FDCNG_VQ_MAX_LEN */
    int16_t mat_cpy_size;
    const Word16 *val_ptr;
    const Word8 *idx_ptr;
    int8_t idx;
#ifdef ERI_FDCNGVQ_LOW_ROM_TESTING
    {
        char tmp_str[1024];
        sprintf( tmp_str, "create_IDCT_%d_mat", N );
        push_wmops( tmp_str );
    }
#endif


    val_ptr = unique_idctT2_24coeffsQ16;
    idx_ptr = idctT2_24_compressed_idx;
    len = FDCNG_VQ_MAX_LEN;
    if ( N == FDCNG_VQ_MAX_LEN_WB )
    {
        val_ptr = unique_idctT2_21coeffsQ16;
        idx_ptr = idctT2_21_compressed_idx;
        len = N;
    }

    assert( alloc_size >= ( n_cols * len ) ); /* enough space for the full expanded IDCT matrix  */
    assert( N <= len );
    W16_val = 0; /* safety init */

    mat_cpy_size = ( n_cols ) * ( len / 2 ); /* one integer division of "len" */

    if ( ( len & 1 ) != 0 )
    { /* odd sized DCT with a non-reflected center row  */
        mat_cpy_size += n_cols;
    }

    for ( c = 0; c < mat_cpy_size; c++ )
    {
        idx = idx_ptr[c];
        if ( idx >= 0 )
        {
            W16_val = ( val_ptr[idx] );
        }
        if ( idx < 0 )
        {
            W16_val = -( val_ptr[-idx] );
        }
        inv_matrixFloatQ[c] = ( +1.52587890625e-05f ) * ( (float) W16_val ); /* 1.0/2.^16 scaling to a float-"Q0"  scaling not done  in   BASOP */
    }

    { /* a  temporary local scope for matrix  ptr's */
        /* for even number of coeffs DCT24,
           flip symmetry for odd, even used to save 50%  further Table ROM */
        /* for odd DCT  center is not flipped  e.g  DCT21  */
        /* float  inv_matrixFloatQ[len][FDCNG_VQ_DCT_MAXTRUNC]; */

        /* use fixed number of columns pointers , to simplify adressing code below  */
        float( *ptr )[FDCNG_VQ_DCT_MAXTRUNC] = (void *) inv_matrixFloatQ;
        float( *ptr21 )[FDCNG_VQ_MAX_LEN_WB] = (void *) inv_matrixFloatQ;

        if ( n_cols == FDCNG_VQ_DCT_MAXTRUNC )
        {

            assert( ( n_cols & 1 ) == 0 );
            for ( c = 0; c < ( n_cols ); c += 2 )
            {
                c1 = c + 1;
                r_flip = len - 1;
                for ( r = 0; r < ( len / 2 ); r++, r_flip-- )
                {
#define WMC_TOOL_SKIP
                    /* pure ptr based calculations  */
                    ptr[r_flip][c] = ptr[r][c];    /* flipped */
                    ptr[r_flip][c1] = -ptr[r][c1]; /* flipped and sign swapped */

                    MOVE( 2 );
                    MULT( 1 ); /*for negate */
#undef WMC_TOOL_SKIP
                }
            }
        }
        else
        {
            float sign_swap = 1.0f;
            assert( n_cols == FDCNG_VQ_MAX_LEN_WB );
            for ( c = 0; c < ( n_cols ); c++ )
            {
                r_flip = len - 1;
                sign_swap = 1.0f - 2.0f * ( c & 1 ); /*signsawpp odd columns */
                for ( r = 0; r < ( len / 2 ); r++, r_flip-- )
                {
#define WMC_TOOL_SKIP
                    /* pure ptr based calculations  */
                    ptr21[r_flip][c] = sign_swap * ptr21[r][c]; /* flipped and potetially sign swapped */

                    MULT( 1 );
                    MOVE( 1 );
#undef WMC_TOOL_SKIP
                }
            }
        }
    }
#ifdef ERI_FDCNGVQ_LOW_ROM_TESTING
    {
        pop_wmops();
    }
#endif
}


#endif
+4 −0
Original line number Diff line number Diff line
@@ -156,6 +156,10 @@
#define OTR_REFERENCE_VECTOR_TRACKING                 /* FhG: enables the reference position orientation tracking mode */
#endif


#define ERI_FDCNGVQ_LOW_ROM                           /*  Eri:  ~1.6kW Table ROM saving for IVAS FDCNG-VQ */
/*#  define ERI_FDCNGVQ_LOW_ROM_TESTING */                /*  Enable testing vs. Baseline for XC SD-measurements ,  deactivate for WMOPS measurement */

/* ################## End DEVELOPMENT switches ######################### */
/* clang-format on */
#endif
+51 −3
Original line number Diff line number Diff line
@@ -8061,6 +8061,10 @@ void msvq_enc(
    const float w[],      /* i  : Weights                                              */
    const int16_t N,      /* i  : Vector dimension                                     */
    const int16_t maxN,   /* i  : Codebook dimension                                   */
#ifdef ERI_FDCNGVQ_LOW_ROM
    const int16_t applyDCT_flag, /* i  : applyDCT flag                                   */
    float *invTrfMatrix,         /* i:/o   expanded synthesis matrix                        */
#endif
    int16_t Idx[] /* o  : Indices                                              */
);

@@ -8072,10 +8076,54 @@ void msvq_dec(
    const int16_t N,        /* i  : Vector dimension                                     */
    const int16_t maxN,     /* i  : Codebook dimension                                   */
    const int16_t Idx[],    /* i  : Indices                                              */
#ifdef ERI_FDCNGVQ_LOW_ROM
    const int16_t applyIDCT_flag, /* i  : applyIDCT flag                                 */
    const float *invTrfMatrix,    /* i: synthesis matrix                                 */
#endif
    float *uq,     /* o  : quantized vector                                     */
    Word16 *uq_ind /* o  : quantized vector (fixed point)                       */
);

#ifdef ERI_FDCNGVQ_LOW_ROM
void dec_FDCNG_MSVQ_stage1(
    int16_t j_full,            /* i:   index full range           */
    int16_t n,                 /* i:   dimension to generate      */
    const float *invTrfMatrix, /* i: synthesis matrix */
    DCTTYPE idcttype,          /* i: idct type */
    float *uq,                 /* o:   synthesized stage1 vector  */
    Word16 *uq_ind             /* o:  synthesized stage1 vector in BASOP */
);


void create_IDCT_N_Matrix(
    float *inv_matrixFloatQ, /*i/o: RAM buffer */
    const int16_t N,         /* i: DCT length , number of time samples */
    const int16_t n_cols,    /*i: number of dct coeffs (as DCt may be truncated */

    const int16_t alloc_size /*i: RAM buffer size in elements*/
);


void dctT2_N_apply_matrix(
    const float *input,
    float *output,
    const int16_t dct_dim,
    int16_t fdcngvq_dim,
    const float *idctT2_N_16matrixQ16,
    const int16_t matrix_1st_dim,
    DCTTYPE dcttype );

void extend_dctN_input(
    const float *input,     /* i:  input in fdcng domain         */
    const float *dct_input, /* i:  input in dctN(fdcng) domain   */
    const int16_t in_dim,   /* i:  in_dim==N */
    float *ext_sig,         /* o:  extended output in fdcng domain  */
    const int16_t out_dim,  /* i:  output total dim   */
    float *matrix,          /* i: idct matrix of  size  N rows ,  n_cols  columns*/
    const int16_t n_cols,   /* i: number of columns ==  truncation length */
    DCTTYPE dcttype );      /* i: matrix operation type    */
#endif

void PulseResynchronization(
    const float *src_exc,       /* i  : Input excitation buffer                 */
    float *dst_exc,             /* o  : output excitation buffer                */
Loading