Commit 0025eaa5 authored by Sandesh Venkatesh's avatar Sandesh Venkatesh
Browse files

Merge branch '1366-improve-wmops-of-sns_1st_cod_fx' into 'main'

Resolve "Improve WMOPS of sns_1st_cod_fx()"

Closes #1366

See merge request !1251
parents b9942967 149df281
Loading
Loading
Loading
Loading
Loading
+186 −86
Original line number Diff line number Diff line
@@ -58,6 +58,105 @@ static Word16 sns_1st_cod_fx(
    const Word16 core,
    Word32 *snsq_fx /* o  : quantized sns    Q16 */
)
{
    IF( exp_sns == Q15 )
    {
        Word16 index;
        const Word16 split_len = M / 2;
        move16();
        const Word16 *means;
        const Word16 means_fix = 2; // Q15
        move16();
        /* remove means */
        means = NULL;
        SWITCH( L_frame )
        {
            case L_FRAME16k:
                means = &sns_1st_means_16k[core - 1][0]; // Q14
                break;
            case L_FRAME25_6k:
                means = &sns_1st_means_25k6[core - 1][0]; // Q14
                break;
            case L_FRAME32k:
                means = &sns_1st_means_32k[core - 1][0]; // Q14
                break;
            default:
                assert( !"illegal frame length in sns_1st_cod" );
        }
        FOR( Word16 i = 0; i < M; ++i )
        {
            Word32 tmp = L_mult( means[i], means_fix ); // Q14->Q16
            snsq_fx[i] = L_sub( sns_fx[i], tmp );       // Q16
            move32();
        }

        index = 0;
        move16();
        FOR( Word16 split = 0; split < 2; ++split )
        {
            const Word16 *cdbk_ptr;
            Word16 j0, j1;
            Word16 index_split;
            Word32 dist_min_fx;
            const Word16 cdbk_fix = 8; // 1.f / powf( 2, SNS_CDBKS_BITS_4_FRAC ) in Q15
            move16();
            const Word16 *const cdbk = &sns_1st_cdbk[split][core - 1][0]; // Q12

            j0 = imult1616( split, split_len );
            j1 = add( j0, split_len );

            cdbk_ptr = cdbk;
            dist_min_fx = MAXVAL_WORD32;
            index_split = 0;
            move32();
            move16();
            FOR( Word16 i = 0; i < 32; ++i )
            {
                Word32 dist_fx = 0;
                move32();
                FOR( Word16 j = j0; j < j1; ++j ) // j1-j0=split_len. split_len=M/2. M=16
                {
                    Word32 tmp;
                    Word32 dist;

                    tmp = L_mult( *cdbk_ptr++, cdbk_fix ); // Q12->Q16
                    dist = L_sub( snsq_fx[j], tmp );       // Q16
                    dist = L_shl( dist, 11 );              // cdbk_ptr is a 16 bit LUT with 3.12 values, used as 3.16. assumption: snsq_fx has the same representation. thus, the subtraction results are in 4.16, which leaves 11 bit headroom.
                    dist = Mpy_32_32( dist, dist );
                    dist = L_shr( dist, 3 ); // make sure that the sum of 8 values does not overflow
                    dist_fx = L_add( dist_fx, dist );
                }

                IF( LT_32( dist_fx, dist_min_fx ) )
                {
                    dist_min_fx = dist_fx;
                    move32();
                    index_split = i;
                    move16();
                }
            }

            /* set quantized vector */
            cdbk_ptr = &cdbk[imult1616( index_split, split_len )];
            FOR( Word16 j = j0; j < j1; ++j )
            {
                Word32 tmp_3 = L_mult( means[j], means_fix );   // Q16
                Word32 tmp_4 = L_mult( *cdbk_ptr++, cdbk_fix ); // Q16
                snsq_fx[j] = L_add( tmp_4, tmp_3 );             // Q16
                move32();
            }

            /* for second split shift by five bits to store both indices as one 10 bit value */
            if ( EQ_16( split, 1 ) )
            {
                index_split = shl( index_split, 5 );
            }

            index = add( index, index_split );
        }
        return index;
    }
    ELSE
    {
        Word16 index, i;
        const Word16 split_len = M / 2;
@@ -169,6 +268,7 @@ static Word16 sns_1st_cod_fx(

        return index;
    }
}

/*-------------------------------------------------------------------
 * sns_2st_cod()