Commit ebc7ceef authored by Adityaraj Jain's avatar Adityaraj Jain
Browse files

msvq with 64bit basop for dist

parent f1458362
Loading
Loading
Loading
Loading
+20 −0
Original line number Diff line number Diff line
@@ -1918,6 +1918,26 @@ Word16 findIndexOfMinWord32( Word32 *x, const Word16 len )
    return indx;
}

Word16 findIndexOfMinWord64( Word64 *x, const Word16 len )
{
    Word16 i, indx;


    indx = 0;
    move16();
    FOR( i = 1; i < len; i++ )
    {
        if ( LT_64( x[i], x[indx] ) )
        {
            indx = i;
            move16();
        }
    }


    return indx;
}


Word16 imult1616( Word16 x, Word16 y )
{
+1 −0
Original line number Diff line number Diff line
@@ -538,6 +538,7 @@ Word16 findIndexOfMinWord16( Word16 *x, const Word16 len );
  \return   index of min Word32
 */
Word16 findIndexOfMinWord32( Word32 *x, const Word16 len );
Word16 findIndexOfMinWord64( Word64 *x, const Word16 len );

/****************************************************************************/
/*!
+329 −3
Original line number Diff line number Diff line
@@ -468,6 +468,33 @@ static void depack_sub_values_fx( Word16 *pTmp, const Word16 *p1, const Word16 *
}


static Word64 depack_mul_values_fx64( Word32 *Tmp, const Word16 *w, const Word16 *cbp, const Word16 N )
{
    Word16 i, val0, val1, val2, val3;
    Word64 en;

    en = 0;
    move32();
    FOR( i = 0; i < N; i += 4 )
    {
        depack_4_values( cbp + i_mult( shr( i, 2 ), 3 ), val0, val1, val2, val3 )
            Tmp[i + 0] = L_mult0( w[i + 0], val0 ); // Q8 * Q2.56
        move16();
        en = W_mac_32_16( en, Tmp[i + 0], val0 ); // Q8 * Q2.56 * 2.56 * Q1
        Tmp[i + 1] = L_mult0( w[i + 1], val1 );
        move16();
        en = W_mac_32_16( en, Tmp[i + 1], val1 );
        Tmp[i + 2] = L_mult0( w[i + 2], val2 );
        move16();
        en = W_mac_32_16( en, Tmp[i + 2], val2 );
        Tmp[i + 3] = L_mult0( w[i + 3], val3 );
        move16();
        en = W_mac_32_16( en, Tmp[i + 3], val3 );
    }

    return en; // Q8 * Q2.56 * 2.56 * Q1
}

/*--------------------------------------------------------------------------*
 * msvq_enc_find_p_max_8()
 *
@@ -522,6 +549,54 @@ static Word16 msvq_enc_find_p_max_8_fx( Word32 dist[] )
}


static Word16 msvq_enc_find_p_max_8_fx64( Word64 dist[] )
{
    Word16 p_max;

    p_max = 0;
    move16();

    BASOP_SATURATE_WARNING_OFF_EVS
    if ( GT_64( dist[1], dist[p_max] ) )
    {
        p_max = 1;
        move16();
    }
    if ( GT_64( dist[2], dist[p_max] ) )
    {
        p_max = 2;
        move16();
    }
    if ( GT_64( dist[3], dist[p_max] ) )
    {
        p_max = 3;
        move16();
    }
    if ( GT_64( dist[4], dist[p_max] ) )
    {
        p_max = 4;
        move16();
    }
    if ( GT_64( dist[5], dist[p_max] ) )
    {
        p_max = 5;
        move16();
    }
    if ( GT_64( dist[6], dist[p_max] ) )
    {
        p_max = 6;
        move16();
    }
    if ( GT_64( dist[7], dist[p_max] ) )
    {
        p_max = 7;
        move16();
    }
    BASOP_SATURATE_WARNING_ON_EVS
    return p_max;
}


/*--------------------------------------------------------------------------*
 * msvq_enc_find_p_max_6()
 *
@@ -565,6 +640,43 @@ static Word16 msvq_enc_find_p_max_6_fx( Word32 dist[] )
    return p_max;
}

static Word16 msvq_enc_find_p_max_6_fx64( Word64 dist[] )
{
    Word16 p_max;

    p_max = 0;
    move16();

    BASOP_SATURATE_WARNING_OFF_EVS
    if ( GT_64( dist[1], dist[p_max] ) )
    {
        p_max = 1;
        move16();
    }
    if ( GT_64( dist[2], dist[p_max] ) )
    {
        p_max = 2;
        move16();
    }
    if ( GT_64( dist[3], dist[p_max] ) )
    {
        p_max = 3;
        move16();
    }
    if ( GT_64( dist[4], dist[p_max] ) )
    {
        p_max = 4;
        move16();
    }
    if ( GT_64( dist[5], dist[p_max] ) )
    {
        p_max = 5;
        move16();
    }
    BASOP_SATURATE_WARNING_ON_EVS
    return p_max;
}


/*--------------------------------------------------------------------------*
 * msvq_enc_fx()
@@ -787,7 +899,222 @@ void msvq_enc_fx(
    return;
}

void msvq_enc_lsf_fx64(
    const Word16 *const *cb, /* i  : Codebook (indexed cb[*stages][levels][p])         (10Q5 * 1.28) */
    const Word16 dims[],     /* i  : Dimension of each codebook stage (NULL: full dim.)       */
    const Word16 offs[],     /* i  : Starting dimension of each codebook stage (NULL: 0)      */
    const Word16 u[],        /* i  : Vector to be encoded (prediction and mean removed)(Q14Q1*1.28) */
    const Word16 *levels,    /* i  : Number of levels in each stage                           */
    const Word16 maxC,       /* i  : Tree search size (number of candidates kept from         */
    /*      one stage to the next == M-best)                         */
    const Word16 stages, /* i  : Number of stages                                         */
    const Word16 w[],    /* i  : Weights                                                  Q8*/
    const Word16 N,      /* i  : Vector dimension                                         */
    const Word16 maxN,   /* i  : Codebook dimension                                       */
    Word16 Idx[]         /* o  : Indices                                                  */
)
{
    Word16 j;
    const Word16 *cbp;
    Word16 p2i;
    Word16 resid_buf[2 * LSFMBEST_MAX * M_MAX], *resid[2];
    Word16 *pTmp, *p1;
    Word16 *indices[2], m, s, c, c2, p_max, i;
    Word32 Tmp32[M_MAX];
    Word16 idx_buf[2 * LSFMBEST_MAX * MAX_VQ_STAGES_USED], parents[LSFMBEST_MAX];
    Word64 *dist_64[2], en64, tmp64;
    Word64 dist_buf_64[2 * LSFMBEST_MAX];
    Word16 ( *func_ptr64 )( Word64 * );
    Word16 N34;
    Word16 n, maxn, start;

    /*----------------------------------------------------------------*
     * Allocate memory for previous (parent) and current nodes.
     *   Parent node is indexed [0], current node is indexed [1].
     *----------------------------------------------------------------*/
    indices[0] = idx_buf;
    indices[1] = idx_buf + maxC * stages; /*move16();*/
    /*vr_iset(0, idx_buf, 2*stages*maxC);*/
    set16_fx( idx_buf, 0, (Word16) ( 2 * stages * maxC ) );

    resid[0] = resid_buf;
    resid[1] = resid_buf + maxC * N; /*move16();*/

    dist_64[0] = dist_buf_64;
    dist_64[1] = dist_buf_64 + maxC; /*move16();*/

    /*vr_iset(0, parents, maxC);*/
    set16_fx( parents, 0, maxC );


    func_ptr64 = msvq_enc_find_p_max_6_fx64;
    move16();
    if ( EQ_16( maxC, 8 ) )
    {
        func_ptr64 = msvq_enc_find_p_max_8_fx64;
        move16();
    }

    /*----------------------------------------------------------------*
     * LSF weights are normalized, so it is always better to multiply it first
     * Set up inital distance vector
     *----------------------------------------------------------------*/
    /* Q0/16 * Qw_norm/16 << 1 >> 16 => Qwnorm-15/16 * Q0/16 << 1 => Qwnorm-14/32 * 6.5536 */
    Word64 ss2_64;
    ss2_64 = W_mult_32_16( L_mult0( u[0], w[0] ), u[0] );
    // Q8 * Q2.56 * 2.56 * Q1
    FOR( j = 1; j < N; j++ )
    {
        ss2_64 = W_mac_32_16( ss2_64, L_mult0( u[j], w[j] ), u[j] );
    }

    /* Set up inital error (residual) vectors */
    pTmp = resid[1]; /*move16();*/
    FOR( c = 0; c < maxC; c++ )
    {
        Copy( u, pTmp + c * N, N );
        dist_64[1][c] = ss2_64;
        move64();
    }

    /* Loop over all stages */
    m = 1;
    move16();
    FOR( s = 0; s < stages; s++ )
    {
        /* codebook pointer is set to point to first stage */
        cbp = cb[s]; /*3Q12*1.28*/
        move16();

        /* Set up pointers to parent and current nodes */
        swap( indices[0], indices[1], Word16 * );
        move16();
        move16();
        move16();
        move16();
        swap( resid[0], resid[1], Word16 * );
        move16();
        move16();
        move16();
        swap( dist_64[0], dist_64[1], Word64 * );
        move64();
        move64();
        move64();

        /* p_max points to maximum distortion node (worst of best) */
        p_max = 0;
        move16();

        n = N;
        move16();
        maxn = maxN;
        move16();
        if ( dims )
        {
            n = dims[s];
            move16();
        }
        if ( dims )
        {
            maxn = n;
            move16();
        }

        assert( ( maxn % 4 ) == 0 );
        N34 = mult( maxn, 24576 /*0.75f Q15*/ );

        start = 0;
        move16();
        if ( offs )
        {
            start = offs[s];
            move16();
        }

        set32_fx( Tmp32, 0, start );
        set32_fx( Tmp32 + start + n, 0, sub( N, add( start, n ) ) );

        /* Set distortions to a large value */
        FOR( j = 0; j < maxC; j++ )
        {
            dist_64[1][j] = LLONG_MAX;
            move64();
        }

        FOR( j = 0; j < levels[s]; j++ )
        {
            /* Compute weighted codebook element and its energy */
            en64 = depack_mul_values_fx64( Tmp32 + start, w + start, cbp, n ); // Q8
            // en64: Q8 * Q2.56 * Q2.56 * q1
            // Tmp: 2.56 * Q8

            cbp += N34; /* pointer is incremented */

            /* Iterate over all parent nodes */
            FOR( c = 0; c < m; c++ )
            {
                pTmp = &resid[0][c * N]; // this resid buffer is initial lsf values
                /*tmp = (*pTmp++) * Tmp[0];*/
                Word64 t164 = 0;
                move64();
                t164 = W_mult_32_16( Tmp32[0], pTmp[0] ); // 2.56 * Q8 * Q2.56 * Q1
                // Tmp32: Q8 * Q2.56
                FOR( i = 1; i < N; i++ )
                {
                    t164 = W_mac_32_16( t164, Tmp32[i], pTmp[i] ); // 2.56 * Q8 * Q2.56 * Q1
                }

                tmp64 = W_add( dist_64[0][c], W_sub( en64, W_shl( t164, 1 ) ) );
                t164 = W_sub( tmp64, dist_64[1][p_max] );
                IF( t164 <= 0 )
                {
                    /* Replace worst */
                    dist_64[1][p_max] = tmp64;
                    move64();
                    indices[1][p_max * stages + s] = j;
                    move16();
                    parents[p_max] = c;
                    move16();

                    p_max = ( *func_ptr64 )( dist_64[1] );

                } /*IF (L_sub(tmp,dist[1][p_max]) < 0) */
            }     /* FOR (c=0; c<m; c++) */
        }         /* FOR (j=0; j<levels[s]; j++) */

        /*------------------------------------------------------------*
         * Compute error vectors for each node
         *------------------------------------------------------------*/
        pTmp = resid[1];
        FOR( c = 0; c < maxC; c++ )
        {
            /* Subtract codebook entry from residual vector of parent node and multiply with scale factor */
            p1 = resid[0] + parents[c] * N;
            p2i = indices[1][c * stages + s];
            move16();

            Copy( p1, pTmp, start );
            depack_sub_values_fx( pTmp + start, p1 + start, &cb[s][p2i * N34], n );
            Copy( p1 + start + n, pTmp + start + n, sub( N, add( start, n ) ) );

            pTmp += N;

            /* Get indices that were used for parent node */
            /*mvs2s(indices[0]+parents[c]*stages, indices[1]+c*stages, s);*/
            Copy( indices[0] + parents[c] * stages, indices[1] + c * stages, s );
        } /* for (c=0; c<maxC; c++) */
        m = maxC;
        move16();
    } /* for (m=1, s=0; s<stages; s++) */

    /* Find the optimum candidate */
    c2 = findIndexOfMinWord64( dist_64[1], maxC );
    /*mvi2i (indices[1]+c2*stages, Idx, stages);*/
    Copy( indices[1] + c2 * stages, Idx, stages );


    return;
}
/*--------------------------------------------------------------------------*
 * msvq_enc_ivas_fx()
 *
@@ -1530,8 +1857,7 @@ Word16 Q_lsf_tcxlpc_ivas_fx(
        move16();
    }


    msvq_enc_fx(
    msvq_enc_lsf_fx64(
        lsf_codebook[narrowband][cdk],
        lsf_dims,
        lsf_offs,
@@ -1572,7 +1898,7 @@ Word16 Q_lsf_tcxlpc_ivas_fx(
    }

    /* Quantize using extra stage(s) */
    msvq_enc_fx(
    msvq_enc_lsf_fx64(
        lsf_ind_codebook[narrowband][cdk],
        lsf_ind_dims,
        lsf_ind_offs,
+16 −0
Original line number Diff line number Diff line
@@ -3131,6 +3131,22 @@ void msvq_enc_fx(
    const Word16 maxN,   /* i  : Codebook dimension                                           */
    Word16 Idx[]         /* o  : Indices                                                      */
);

void msvq_enc_lsf_fx64(
    const Word16 *const *cb, /* i  : Codebook (indexed cb[*stages][levels][p])         (Q10Q5*1.28 ) */
    const Word16 dims[],     /* i  : Dimension of each codebook stage (NULL: full dim.)       */
    const Word16 offs[],     /* i  : Starting dimension of each codebook stage (NULL: 0)      */
    const Word16 u[],        /* i  : Vector to be encoded (prediction and mean removed)(14Q1*1.28) */
    const Word16 *levels,    /* i  : Number of levels in each stage                           */
    const Word16 maxC,       /* i  : Tree search size (number of candidates kept from         */
    /*      one stage to the next == M-best)                         */
    const Word16 stages, /* i  : Number of stages                                         */
    const Word16 w[],    /* i  : Weights                                                  Q8*/
    const Word16 N,      /* i  : Vector dimension                                         */
    const Word16 maxN,   /* i  : Codebook dimension                                       */
    Word16 Idx[]         /* o  : Indices                                                  */
);

void midlsf_enc_fx(
    const Word16 qlsf0[],  /* i: quantized lsf coefficients (3Q12)	*/
    const Word16 qlsf1[],  /* i: quantized lsf coefficients (3Q12)	*/