From fe507516a96bc6f90abe281c0d7d07516c329b33 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Thu, 23 Jan 2025 14:21:45 +0100 Subject: [PATCH 1/6] the changes in ivas_band_cov_fx() reduced the total WMOPS (by 10.524). --- lib_enc/ivas_enc_cov_handler.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/lib_enc/ivas_enc_cov_handler.c b/lib_enc/ivas_enc_cov_handler.c index f736f86ea..80a035fbb 100644 --- a/lib_enc/ivas_enc_cov_handler.c +++ b/lib_enc/ivas_enc_cov_handler.c @@ -513,7 +513,7 @@ static void ivas_band_cov_fx( Word32 pV_re[L_FRAME48k]; Word64 pV_re_64bit[L_FRAME48k]; Word64 cov_real_64bit[IVAS_SPAR_MAX_CH][IVAS_SPAR_MAX_CH][IVAS_MAX_NUM_BANDS]; - Word16 q_shift; + Word16 q_shift, q_shift_tmp; Word16 m, start_bin, active_bins; FOR( i = 0; i < num_chans; i++ ) @@ -536,14 +536,16 @@ static void ivas_band_cov_fx( move16(); FOR( k = 0; k < num_bins; k++ ) { - IF( pV_re_64bit[k] != 0 ) + q_shift_tmp = W_norm( pV_re_64bit[k] ); + if ( pV_re_64bit[k] != 0) { - q_shift = s_min( q_shift, W_norm( pV_re_64bit[k] ) ); + q_shift = s_min( q_shift, q_shift_tmp ); } } + q_shift_tmp = sub( q_shift, 32 ); FOR( k = 0; k < num_bins; k++ ) { - pV_re[k] = W_extract_l( W_shl_nosat( pV_re_64bit[k], sub( q_shift, 32 ) ) ); //(q_In_FR[i1] + q_In_FR[j1]) + (q_shift - 32) + pV_re[k] = W_extract_l( W_shl_nosat( pV_re_64bit[k], q_shift_tmp ) ); //(q_In_FR[i1] + q_In_FR[j1]) + (q_shift - 32) move32(); /* perform rounding towards lower value for negative results */ if ( pV_re[k] < 0 ) @@ -569,18 +571,22 @@ static void ivas_band_cov_fx( active_bins = pFb_active_bins_per_band[k]; /* Q0 */ move16(); + cov_ptr += start_bin; + move16(); FOR( blk = 0; blk < num_blocks; blk++ ) { /* optional: add temporal weight here */ - FOR( m = start_bin; m < add( start_bin, active_bins ); m++ ) + FOR( m = 0; m < active_bins; m++ ) { - temp = W_add( temp, W_mult0_32_32( cov_ptr[m], p_bin_to_band[sub( m, start_bin )] ) ); // ((q_In_FR[i1] + q_In_FR[j1]) + (q_shift - 32), Q22) -> (q_In_FR[i1] + q_In_FR[j1] + (q_shift - 10) + temp = W_add( temp, W_mult0_32_32( cov_ptr[m], p_bin_to_band[m] ) ); // ((q_In_FR[i1] + q_In_FR[j1]) + (q_shift - 32), Q22) -> (q_In_FR[i1] + q_In_FR[j1] + (q_shift - 10) } cov_ptr += stride; + move16(); } // What basop to add below???? cov_real_64bit[i][j][k] = temp * (Word64) ( num_blocks ); // (q_In_FR[i1] + q_In_FR[j1] + (q_shift - 10) - guard_bits move64(); + move64(); // conservative estimation of a 64 bit multiplication } q_cov_real[i][j] = add( add( q_In_FR, q_In_FR ), sub( q_shift, Q10 ) ); move16(); @@ -595,17 +601,19 @@ static void ivas_band_cov_fx( move16(); FOR( k = start_band; k < end_band; k++ ) { - IF( cov_real_64bit[i][j][k] != 0 ) + q_shift_tmp = W_norm( cov_real_64bit[i][j][k] ); + if ( cov_real_64bit[i][j][k] != 0 ) { - q_shift = s_min( q_shift, W_norm( cov_real_64bit[i][j][k] ) ); + q_shift = s_min( q_shift, q_shift_tmp ); } } + q_shift_tmp = sub( q_shift, 32 ); FOR( k = start_band; k < end_band; k++ ) { - cov_real[i][j][k] = W_extract_l( W_shl_nosat( cov_real_64bit[i][j][k], sub( q_shift, 32 ) ) ); /* q_cov_real[i][j] + q_shift - 32 */ + cov_real[i][j][k] = W_extract_l( W_shl_nosat( cov_real_64bit[i][j][k], q_shift_tmp ) ); /* q_cov_real[i][j] + q_shift - 32 */ move32(); } - q_cov_real[i][j] = add( q_cov_real[i][j], sub( q_shift, 32 ) ); + q_cov_real[i][j] = add( q_cov_real[i][j], q_shift_tmp ); move16(); } } -- GitLab From 111ed772299d40b66321cec09a986bc2f2606b84 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Thu, 23 Jan 2025 14:27:04 +0100 Subject: [PATCH 2/6] formatting fix. --- lib_enc/ivas_enc_cov_handler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_enc/ivas_enc_cov_handler.c b/lib_enc/ivas_enc_cov_handler.c index 80a035fbb..a1ff81c4d 100644 --- a/lib_enc/ivas_enc_cov_handler.c +++ b/lib_enc/ivas_enc_cov_handler.c @@ -537,7 +537,7 @@ static void ivas_band_cov_fx( FOR( k = 0; k < num_bins; k++ ) { q_shift_tmp = W_norm( pV_re_64bit[k] ); - if ( pV_re_64bit[k] != 0) + if ( pV_re_64bit[k] != 0 ) { q_shift = s_min( q_shift, q_shift_tmp ); } -- GitLab From 176e24235f06bb5dd40d19f05938324e692258b5 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Mon, 27 Jan 2025 10:28:42 +0100 Subject: [PATCH 3/6] moved one idiv1616 out of the for loops to gain even more wmops. --- lib_enc/ivas_enc_cov_handler.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib_enc/ivas_enc_cov_handler.c b/lib_enc/ivas_enc_cov_handler.c index a1ff81c4d..92ccf2e28 100644 --- a/lib_enc/ivas_enc_cov_handler.c +++ b/lib_enc/ivas_enc_cov_handler.c @@ -515,7 +515,11 @@ static void ivas_band_cov_fx( Word64 cov_real_64bit[IVAS_SPAR_MAX_CH][IVAS_SPAR_MAX_CH][IVAS_MAX_NUM_BANDS]; Word16 q_shift, q_shift_tmp; Word16 m, start_bin, active_bins; + Word16 num_blocks; + + num_blocks = idiv1616( num_bins, stride ); /* Q0 */ + move16(); FOR( i = 0; i < num_chans; i++ ) { FOR( j = i; j < num_chans; j++ ) @@ -559,13 +563,10 @@ static void ivas_band_cov_fx( Word64 temp; const Word32 *p_bin_to_band = pFb_bin_to_band[k]; // Q22 Word32 *cov_ptr = pV_re; - Word16 num_blocks; Word16 blk; temp = 0; move64(); - num_blocks = idiv1616( num_bins, stride ); /* Q0 */ - move16(); start_bin = pFb_start_bin_per_band[k]; /* Q0 */ move16(); active_bins = pFb_active_bins_per_band[k]; /* Q0 */ -- GitLab From 1cc587e91914bd66f52928af9657ef83493f4b78 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Mon, 27 Jan 2025 13:28:31 +0100 Subject: [PATCH 4/6] reverted the last change, since it did not impact the WMOPS that much. And now there are broken pipelines. --- lib_enc/ivas_enc_cov_handler.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib_enc/ivas_enc_cov_handler.c b/lib_enc/ivas_enc_cov_handler.c index 92ccf2e28..a1ff81c4d 100644 --- a/lib_enc/ivas_enc_cov_handler.c +++ b/lib_enc/ivas_enc_cov_handler.c @@ -515,11 +515,7 @@ static void ivas_band_cov_fx( Word64 cov_real_64bit[IVAS_SPAR_MAX_CH][IVAS_SPAR_MAX_CH][IVAS_MAX_NUM_BANDS]; Word16 q_shift, q_shift_tmp; Word16 m, start_bin, active_bins; - Word16 num_blocks; - - num_blocks = idiv1616( num_bins, stride ); /* Q0 */ - move16(); FOR( i = 0; i < num_chans; i++ ) { FOR( j = i; j < num_chans; j++ ) @@ -563,10 +559,13 @@ static void ivas_band_cov_fx( Word64 temp; const Word32 *p_bin_to_band = pFb_bin_to_band[k]; // Q22 Word32 *cov_ptr = pV_re; + Word16 num_blocks; Word16 blk; temp = 0; move64(); + num_blocks = idiv1616( num_bins, stride ); /* Q0 */ + move16(); start_bin = pFb_start_bin_per_band[k]; /* Q0 */ move16(); active_bins = pFb_active_bins_per_band[k]; /* Q0 */ -- GitLab From c45cd7c0527a4ccf2d876706073b6d875e7cfcb5 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Tue, 28 Jan 2025 08:05:02 +0100 Subject: [PATCH 5/6] moved the idiv1616 outside the for loop in ivas_band_cov_fx() to gain WMOPS. --- lib_enc/ivas_enc_cov_handler.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib_enc/ivas_enc_cov_handler.c b/lib_enc/ivas_enc_cov_handler.c index a1ff81c4d..0a4b53aaa 100644 --- a/lib_enc/ivas_enc_cov_handler.c +++ b/lib_enc/ivas_enc_cov_handler.c @@ -515,7 +515,9 @@ static void ivas_band_cov_fx( Word64 cov_real_64bit[IVAS_SPAR_MAX_CH][IVAS_SPAR_MAX_CH][IVAS_MAX_NUM_BANDS]; Word16 q_shift, q_shift_tmp; Word16 m, start_bin, active_bins; + Word16 num_blocks; + num_blocks = idiv1616( num_bins, stride ); /* Q0 */ FOR( i = 0; i < num_chans; i++ ) { FOR( j = i; j < num_chans; j++ ) @@ -559,13 +561,11 @@ static void ivas_band_cov_fx( Word64 temp; const Word32 *p_bin_to_band = pFb_bin_to_band[k]; // Q22 Word32 *cov_ptr = pV_re; - Word16 num_blocks; Word16 blk; + move16(); temp = 0; move64(); - num_blocks = idiv1616( num_bins, stride ); /* Q0 */ - move16(); start_bin = pFb_start_bin_per_band[k]; /* Q0 */ move16(); active_bins = pFb_active_bins_per_band[k]; /* Q0 */ -- GitLab From bd629b6cf5cec718827351ea333efb98904587bb Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Tue, 28 Jan 2025 08:20:20 +0100 Subject: [PATCH 6/6] fixed a pointer issue in ivas_band_cov_fx() --- lib_enc/ivas_enc_cov_handler.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib_enc/ivas_enc_cov_handler.c b/lib_enc/ivas_enc_cov_handler.c index 0a4b53aaa..5c19d8e02 100644 --- a/lib_enc/ivas_enc_cov_handler.c +++ b/lib_enc/ivas_enc_cov_handler.c @@ -560,9 +560,8 @@ static void ivas_band_cov_fx( { Word64 temp; const Word32 *p_bin_to_band = pFb_bin_to_band[k]; // Q22 - Word32 *cov_ptr = pV_re; + Word32 *cov_ptr; Word16 blk; - move16(); temp = 0; move64(); @@ -571,7 +570,7 @@ static void ivas_band_cov_fx( active_bins = pFb_active_bins_per_band[k]; /* Q0 */ move16(); - cov_ptr += start_bin; + cov_ptr = &pV_re[start_bin]; move16(); FOR( blk = 0; blk < num_blocks; blk++ ) { -- GitLab