Commit 5bb960bf authored by Sandesh Venkatesh's avatar Sandesh Venkatesh
Browse files

Merge branch '1227-basop-encoder-improve-wmops-performance-of-ivas_band_cov' into 'main'

Resolve "BASOP encoder: Improve WMOPS performance of ivas_band_cov"

Closes #1227

See merge request !1015
parents 35a68179 efce5329
Loading
Loading
Loading
Loading
+21 −14
Original line number Diff line number Diff line
@@ -513,9 +513,11 @@ static void ivas_band_cov_fx(
    Word32 pV_re[L_FRAME48k];
    Word64 pV_re_64bit[L_FRAME48k];
    Word64 cov_real_64bit[IVAS_SPAR_MAX_CH][IVAS_SPAR_MAX_CH][IVAS_MAX_NUM_BANDS];
    Word16 q_shift;
    Word16 q_shift, q_shift_tmp;
    Word16 m, start_bin, active_bins;
    Word16 num_blocks;

    num_blocks = idiv1616( num_bins, stride ); /* Q0 */
    FOR( i = 0; i < num_chans; i++ )
    {
        FOR( j = i; j < num_chans; j++ )
@@ -536,14 +538,16 @@ static void ivas_band_cov_fx(
            move16();
            FOR( k = 0; k < num_bins; k++ )
            {
                IF( pV_re_64bit[k] != 0 )
                q_shift_tmp = W_norm( pV_re_64bit[k] );
                if ( pV_re_64bit[k] != 0 )
                {
                    q_shift = s_min( q_shift, W_norm( pV_re_64bit[k] ) );
                    q_shift = s_min( q_shift, q_shift_tmp );
                }
            }
            q_shift_tmp = sub( q_shift, 32 );
            FOR( k = 0; k < num_bins; k++ )
            {
                pV_re[k] = W_extract_l( W_shl_nosat( pV_re_64bit[k], sub( q_shift, 32 ) ) ); //(q_In_FR[i1] + q_In_FR[j1]) + (q_shift - 32)
                pV_re[k] = W_extract_l( W_shl_nosat( pV_re_64bit[k], q_shift_tmp ) ); //(q_In_FR[i1] + q_In_FR[j1]) + (q_shift - 32)
                move32();
                /* perform rounding towards lower value for negative results */
                if ( pV_re[k] < 0 )
@@ -556,31 +560,32 @@ static void ivas_band_cov_fx(
            {
                Word64 temp;
                const Word32 *p_bin_to_band = pFb_bin_to_band[k]; // Q22
                Word32 *cov_ptr = pV_re;
                Word16 num_blocks;
                Word32 *cov_ptr;
                Word16 blk;

                temp = 0;
                move64();
                num_blocks = idiv1616( num_bins, stride ); /* Q0 */
                move16();
                start_bin = pFb_start_bin_per_band[k]; /* Q0 */
                move16();
                active_bins = pFb_active_bins_per_band[k]; /* Q0 */
                move16();

                cov_ptr = &pV_re[start_bin];
                move16();
                FOR( blk = 0; blk < num_blocks; blk++ )
                {
                    /* optional: add temporal weight here */
                    FOR( m = start_bin; m < add( start_bin, active_bins ); m++ )
                    FOR( m = 0; m < active_bins; m++ )
                    {
                        temp = W_add( temp, W_mult0_32_32( cov_ptr[m], p_bin_to_band[sub( m, start_bin )] ) ); // ((q_In_FR[i1] + q_In_FR[j1]) + (q_shift - 32), Q22) -> (q_In_FR[i1] + q_In_FR[j1] + (q_shift - 10)
                        temp = W_add( temp, W_mult0_32_32( cov_ptr[m], p_bin_to_band[m] ) ); // ((q_In_FR[i1] + q_In_FR[j1]) + (q_shift - 32), Q22) -> (q_In_FR[i1] + q_In_FR[j1] + (q_shift - 10)
                    }
                    cov_ptr += stride;
                    move16();
                }
                // What basop to add below????
                cov_real_64bit[i][j][k] = temp * (Word64) ( num_blocks ); // (q_In_FR[i1] + q_In_FR[j1] + (q_shift - 10) - guard_bits
                move64();
                move64(); // conservative estimation of a 64 bit multiplication
            }
            q_cov_real[i][j] = add( add( q_In_FR, q_In_FR ), sub( q_shift, Q10 ) );
            move16();
@@ -595,17 +600,19 @@ static void ivas_band_cov_fx(
            move16();
            FOR( k = start_band; k < end_band; k++ )
            {
                IF( cov_real_64bit[i][j][k] != 0 )
                q_shift_tmp = W_norm( cov_real_64bit[i][j][k] );
                if ( cov_real_64bit[i][j][k] != 0 )
                {
                    q_shift = s_min( q_shift, W_norm( cov_real_64bit[i][j][k] ) );
                    q_shift = s_min( q_shift, q_shift_tmp );
                }
            }
            q_shift_tmp = sub( q_shift, 32 );
            FOR( k = start_band; k < end_band; k++ )
            {
                cov_real[i][j][k] = W_extract_l( W_shl_nosat( cov_real_64bit[i][j][k], sub( q_shift, 32 ) ) ); /* q_cov_real[i][j] + q_shift - 32 */
                cov_real[i][j][k] = W_extract_l( W_shl_nosat( cov_real_64bit[i][j][k], q_shift_tmp ) ); /* q_cov_real[i][j] + q_shift - 32 */
                move32();
            }
            q_cov_real[i][j] = add( q_cov_real[i][j], sub( q_shift, 32 ) );
            q_cov_real[i][j] = add( q_cov_real[i][j], q_shift_tmp );
            move16();
        }
    }