From 162ace01650273f64c0b1d49a9839399043a2273 Mon Sep 17 00:00:00 2001 From: naghibza Date: Wed, 17 Sep 2025 15:31:31 +0200 Subject: [PATCH 01/14] reduced headroom in hp20_fx_32 calculation. --- lib_com/hp50_fx.c | 6 +++--- lib_com/options.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lib_com/hp50_fx.c b/lib_com/hp50_fx.c index d6a1182a4..f83089287 100644 --- a/lib_com/hp50_fx.c +++ b/lib_com/hp50_fx.c @@ -355,7 +355,7 @@ void hp20_fx_32( Word16 prescale, prescaleOld, prescale_current_frame, diff; prescale = getScaleFactor32( signal_fx, lg ); - prescale_current_frame = s_min( 3, sub( 1 + HP20_COEFF_SCALE, prescale ) ); + prescale_current_frame = s_min( 3, sub( 1 , prescale ) ); prescaleOld = extract_l( mem_fx[4] ); @@ -369,14 +369,14 @@ void hp20_fx_32( #endif - diff = L_max( diff_pos, diff_neg ); + diff = L_min( diff_pos, diff_neg ); IF( NE_16( diff, 0 ) ) { prescale = s_min( prescale, diff ); } - prescale = s_min( 3, sub( 1 + HP20_COEFF_SCALE, prescale ) ); + prescale = s_min( 3, sub( 1 , prescale ) ); diff = sub( prescale, prescaleOld ); #ifdef ISSUE_1836_replace_overflow_libcom diff --git a/lib_com/options.h b/lib_com/options.h index 488defa92..d2d898150 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -102,6 +102,7 @@ #define FIX_2010_PREP_TBE_EXC /* FhG: fix issues with varying Q-values for code_preQ_fx[] */ #define FIX_2025_FDCNG_MULT /* Nokia, FhG: Fix LB FDCNG noise introduced by changing mult to i_mult */ +#define HP20_FIX32_RECODING /* FhG: reduced headroom in hp20_fx_32 calculation */ /* #################### Start BASOP porting switches ############################ */ #define NONBE_1244_FIX_SWB_BWE_MEMORY /* VA: issue 1244: fix to SWB BWE memory in case of switching from FB coding - pending a review by Huawei */ -- GitLab From 6e343d99e2291164c2384e6554eed1b41a53300b Mon Sep 17 00:00:00 2001 From: naghibza Date: Wed, 17 Sep 2025 15:37:50 +0200 Subject: [PATCH 02/14] applied clang formatting patch --- lib_com/hp50_fx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_com/hp50_fx.c b/lib_com/hp50_fx.c index f83089287..adfe3d099 100644 --- a/lib_com/hp50_fx.c +++ b/lib_com/hp50_fx.c @@ -355,7 +355,7 @@ void hp20_fx_32( Word16 prescale, prescaleOld, prescale_current_frame, diff; prescale = getScaleFactor32( signal_fx, lg ); - prescale_current_frame = s_min( 3, sub( 1 , prescale ) ); + prescale_current_frame = s_min( 3, sub( 1, prescale ) ); prescaleOld = extract_l( mem_fx[4] ); @@ -376,7 +376,7 @@ void hp20_fx_32( prescale = s_min( prescale, diff ); } - prescale = s_min( 3, sub( 1 , prescale ) ); + prescale = s_min( 3, sub( 1, prescale ) ); diff = sub( prescale, prescaleOld ); #ifdef ISSUE_1836_replace_overflow_libcom -- GitLab From 03918345f565d6ff4e1f778e64d8e920249153fb Mon Sep 17 00:00:00 2001 From: naghibza Date: Thu, 18 Sep 2025 08:47:08 +0200 Subject: [PATCH 03/14] increased one bit headroom. --- lib_com/hp50_fx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_com/hp50_fx.c b/lib_com/hp50_fx.c index adfe3d099..7851e80f0 100644 --- a/lib_com/hp50_fx.c +++ b/lib_com/hp50_fx.c @@ -355,7 +355,7 @@ void hp20_fx_32( Word16 prescale, prescaleOld, prescale_current_frame, diff; prescale = getScaleFactor32( signal_fx, lg ); - prescale_current_frame = s_min( 3, sub( 1, prescale ) ); + prescale_current_frame = s_min( 3, sub( HP20_COEFF_SCALE, prescale ) ); prescaleOld = extract_l( mem_fx[4] ); @@ -376,7 +376,7 @@ void hp20_fx_32( prescale = s_min( prescale, diff ); } - prescale = s_min( 3, sub( 1, prescale ) ); + prescale = s_min( 3, sub( HP20_COEFF_SCALE, prescale ) ); diff = sub( prescale, prescaleOld ); #ifdef ISSUE_1836_replace_overflow_libcom -- GitLab From 702f2cb1c36e7468edb0cd54854d8b13e063bc0e Mon Sep 17 00:00:00 2001 From: naghibza Date: Thu, 18 Sep 2025 10:41:41 +0200 Subject: [PATCH 04/14] replaced getScaleFactor32 with L_norm_arr. --- lib_com/hp50_fx.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib_com/hp50_fx.c b/lib_com/hp50_fx.c index 7851e80f0..98ba70b60 100644 --- a/lib_com/hp50_fx.c +++ b/lib_com/hp50_fx.c @@ -354,9 +354,8 @@ void hp20_fx_32( #endif Word16 prescale, prescaleOld, prescale_current_frame, diff; - prescale = getScaleFactor32( signal_fx, lg ); - prescale_current_frame = s_min( 3, sub( HP20_COEFF_SCALE, prescale ) ); - + prescale = L_norm_arr( signal_fx, lg ); + prescale_current_frame = sub( HP20_COEFF_SCALE, prescale ); prescaleOld = extract_l( mem_fx[4] ); @@ -376,7 +375,7 @@ void hp20_fx_32( prescale = s_min( prescale, diff ); } - prescale = s_min( 3, sub( HP20_COEFF_SCALE, prescale ) ); + prescale = sub( HP20_COEFF_SCALE, prescale ); diff = sub( prescale, prescaleOld ); #ifdef ISSUE_1836_replace_overflow_libcom -- GitLab From 10b6b230d1cd0c96f3080e24b6333ac1fe810160 Mon Sep 17 00:00:00 2001 From: naghibza Date: Thu, 18 Sep 2025 12:21:40 +0200 Subject: [PATCH 05/14] replaced W_extract_h with W_round64_L. --- lib_com/hp50_fx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib_com/hp50_fx.c b/lib_com/hp50_fx.c index 98ba70b60..146490aca 100644 --- a/lib_com/hp50_fx.c +++ b/lib_com/hp50_fx.c @@ -457,7 +457,7 @@ void hp20_fx_32( W_sum = W_mac_32_32( W_sum, mem_fx[0], a2_fx ); /* y2*a2 */ W_sum = W_mac_32_32( W_sum, mem_fx[1], a1_fx ); /* y1*a1 */ W_y2 = W_shl( W_sum, HP20_COEFF_SCALE ); - signal_fx[0] = W_extract_h( W_shl( W_y2, prescale ) ); + signal_fx[0] = W_round64_L( W_shl( W_y2, prescale ) ); move32(); W_sum = W_mult_32_32( b2_fx, mem_fx[3] ); /* b2*x2 */ @@ -467,7 +467,7 @@ void hp20_fx_32( W_sum = W_mac_32_32( W_sum, mem_fx[1], a2_fx ); /* y2*a2 */ W_sum = W_mac_32_32( W_sum, W_extract_h( W_y2 ), a1_fx ); /* y1*a1 */ W_y1 = W_shl( W_sum, HP20_COEFF_SCALE ); - signal_fx[1] = W_extract_h( W_shl( W_y1, prescale ) ); + signal_fx[1] = W_round64_L( W_shl( W_y1, prescale ) ); move32(); diff = sub( prescale_current_frame, prescale ); @@ -486,7 +486,7 @@ void hp20_fx_32( W_sum = W_mac_32_32( W_sum, W_extract_h( W_y1 ), a1_fx ); /* y1*a1 */ W_y0 = W_shl( W_sum, HP20_COEFF_SCALE ); - signal_fx[i] = W_extract_h( W_shl( W_y0, prescale_current_frame ) ); + signal_fx[i] = W_round64_L( W_shl( W_y0, prescale_current_frame ) ); move32(); x2 = x1; -- GitLab From 63fa4592cf995107b3c0fa5ffa0deed9827cae81 Mon Sep 17 00:00:00 2001 From: naghibza Date: Thu, 18 Sep 2025 13:57:35 +0200 Subject: [PATCH 06/14] replaced all W_extract_h with W_round64_L. --- lib_com/hp50_fx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib_com/hp50_fx.c b/lib_com/hp50_fx.c index 146490aca..392604731 100644 --- a/lib_com/hp50_fx.c +++ b/lib_com/hp50_fx.c @@ -465,7 +465,7 @@ void hp20_fx_32( x1 = L_shr( signal_fx[1], prescale ); W_sum = W_mac_32_32( W_sum, b2_fx, x1 ); /* b2*x0 */ W_sum = W_mac_32_32( W_sum, mem_fx[1], a2_fx ); /* y2*a2 */ - W_sum = W_mac_32_32( W_sum, W_extract_h( W_y2 ), a1_fx ); /* y1*a1 */ + W_sum = W_mac_32_32( W_sum, W_round64_L( W_y2 ), a1_fx ); /* y1*a1 */ W_y1 = W_shl( W_sum, HP20_COEFF_SCALE ); signal_fx[1] = W_round64_L( W_shl( W_y1, prescale ) ); move32(); @@ -482,8 +482,8 @@ void hp20_fx_32( W_sum = W_mac_32_32( W_sum, b1_fx, x1 ); /* b1*x1 */ x0 = L_shr( signal_fx[i], prescale_current_frame ); W_sum = W_mac_32_32( W_sum, b2_fx, x0 ); /* b2*x0 */ - W_sum = W_mac_32_32( W_sum, W_extract_h( W_y2 ), a2_fx ); /* y2*a2 */ - W_sum = W_mac_32_32( W_sum, W_extract_h( W_y1 ), a1_fx ); /* y1*a1 */ + W_sum = W_mac_32_32( W_sum, W_round64_L( W_y2 ), a2_fx ); /* y2*a2 */ + W_sum = W_mac_32_32( W_sum, W_round64_L( W_y1 ), a1_fx ); /* y1*a1 */ W_y0 = W_shl( W_sum, HP20_COEFF_SCALE ); signal_fx[i] = W_round64_L( W_shl( W_y0, prescale_current_frame ) ); @@ -500,8 +500,8 @@ void hp20_fx_32( move64(); } - mem_fx[0] = W_extract_h( W_y2 ); - mem_fx[1] = W_extract_h( W_y1 ); + mem_fx[0] = W_round64_L( W_y2 ); + mem_fx[1] = W_round64_L( W_y1 ); mem_fx[2] = x2; mem_fx[3] = x1; -- GitLab From 13f79214082a556cb5f19c81fd26ea138a27350b Mon Sep 17 00:00:00 2001 From: naghibza Date: Thu, 18 Sep 2025 15:04:53 +0200 Subject: [PATCH 07/14] replaced W_mult_32_32 with W_mult0_32_32 to avoid a 1-bit shift in W_mac_32_32. --- lib_com/hp50_fx.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/lib_com/hp50_fx.c b/lib_com/hp50_fx.c index 392604731..69d9da899 100644 --- a/lib_com/hp50_fx.c +++ b/lib_com/hp50_fx.c @@ -450,23 +450,23 @@ void hp20_fx_32( Word64 W_sum, W_y0, W_y1, W_y2; Word32 x0, x1, x2; - W_sum = W_mult_32_32( b2_fx, mem_fx[2] ); /* b2*x2 */ - W_sum = W_mac_32_32( W_sum, b1_fx, mem_fx[3] ); /* b1*x1 */ + W_sum = W_mult0_32_32( b2_fx, mem_fx[2] ); /* b2*x2 */ + W_sum = W_add( W_sum, W_mult0_32_32( b1_fx, mem_fx[3] ) ); /* b1*x1 */ x2 = L_shr( signal_fx[0], prescale ); - W_sum = W_mac_32_32( W_sum, b2_fx, x2 ); /* b2*x0 */ - W_sum = W_mac_32_32( W_sum, mem_fx[0], a2_fx ); /* y2*a2 */ - W_sum = W_mac_32_32( W_sum, mem_fx[1], a1_fx ); /* y1*a1 */ - W_y2 = W_shl( W_sum, HP20_COEFF_SCALE ); + W_sum = W_add( W_sum, W_mult0_32_32( b2_fx, x2 ) ); /* b2*x0 */ + W_sum = W_add( W_sum, W_mult0_32_32( mem_fx[0], a2_fx ) ); /* y2*a2 */ + W_sum = W_add( W_sum, W_mult0_32_32( mem_fx[1], a1_fx ) ); /* y1*a1 */ + W_y2 = W_shl( W_sum, add( 1, HP20_COEFF_SCALE ) ); signal_fx[0] = W_round64_L( W_shl( W_y2, prescale ) ); move32(); - W_sum = W_mult_32_32( b2_fx, mem_fx[3] ); /* b2*x2 */ - W_sum = W_mac_32_32( W_sum, b1_fx, x2 ); /* b1*x1 */ + W_sum = W_mult0_32_32( b2_fx, mem_fx[3] ); /* b2*x2 */ + W_sum = W_add( W_sum, W_mult0_32_32( b1_fx, x2 ) ); /* b1*x1 */ x1 = L_shr( signal_fx[1], prescale ); - W_sum = W_mac_32_32( W_sum, b2_fx, x1 ); /* b2*x0 */ - W_sum = W_mac_32_32( W_sum, mem_fx[1], a2_fx ); /* y2*a2 */ - W_sum = W_mac_32_32( W_sum, W_round64_L( W_y2 ), a1_fx ); /* y1*a1 */ - W_y1 = W_shl( W_sum, HP20_COEFF_SCALE ); + W_sum = W_add( W_sum, W_mult0_32_32( b2_fx, x1 ) ); /* b2*x0 */ + W_sum = W_add( W_sum, W_mult0_32_32( mem_fx[1], a2_fx ) ); /* y2*a2 */ + W_sum = W_add( W_sum, W_mult0_32_32( W_round64_L( W_y2 ), a1_fx ) ); /* y1*a1 */ + W_y1 = W_shl( W_sum, add( 1, HP20_COEFF_SCALE ) ); signal_fx[1] = W_round64_L( W_shl( W_y1, prescale ) ); move32(); @@ -478,13 +478,13 @@ void hp20_fx_32( FOR( i = 2; i < lg; i++ ) { - W_sum = W_mult_32_32( b2_fx, x2 ); /* b2*x2 */ - W_sum = W_mac_32_32( W_sum, b1_fx, x1 ); /* b1*x1 */ + W_sum = W_mult0_32_32( b2_fx, x2 ); /* b2*x2 */ + W_sum = W_add( W_sum, W_mult0_32_32( b1_fx, x1 ) ); /* b1*x1 */ x0 = L_shr( signal_fx[i], prescale_current_frame ); - W_sum = W_mac_32_32( W_sum, b2_fx, x0 ); /* b2*x0 */ - W_sum = W_mac_32_32( W_sum, W_round64_L( W_y2 ), a2_fx ); /* y2*a2 */ - W_sum = W_mac_32_32( W_sum, W_round64_L( W_y1 ), a1_fx ); /* y1*a1 */ - W_y0 = W_shl( W_sum, HP20_COEFF_SCALE ); + W_sum = W_add( W_sum, W_mult0_32_32( b2_fx, x0 ) ); /* b2*x0 */ + W_sum = W_add( W_sum, W_mult0_32_32( W_round64_L( W_y2 ), a2_fx ) ); /* y2*a2 */ + W_sum = W_add( W_sum, W_mult0_32_32( W_round64_L( W_y1 ), a1_fx ) ); /* y1*a1 */ + W_y0 = W_shl( W_sum, add( 1, HP20_COEFF_SCALE ) ); signal_fx[i] = W_round64_L( W_shl( W_y0, prescale_current_frame ) ); move32(); -- GitLab From a8273e9d7a9994b61e4f9b712e955c08efd42300 Mon Sep 17 00:00:00 2001 From: naghibza Date: Thu, 18 Sep 2025 16:11:16 +0200 Subject: [PATCH 08/14] changed hp20_fx_32() coefficient from Q29 to Q30 --- lib_com/hp50_fx.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/lib_com/hp50_fx.c b/lib_com/hp50_fx.c index 69d9da899..574f8c506 100644 --- a/lib_com/hp50_fx.c +++ b/lib_com/hp50_fx.c @@ -41,6 +41,7 @@ #include "wmc_auto.h" #define HP20_COEFF_SCALE ( 2 ) +#define HP20_FX_COEFF_SCALE ( 1 ) /* * hp20 * @@ -355,7 +356,7 @@ void hp20_fx_32( Word16 prescale, prescaleOld, prescale_current_frame, diff; prescale = L_norm_arr( signal_fx, lg ); - prescale_current_frame = sub( HP20_COEFF_SCALE, prescale ); + prescale_current_frame = sub( add( 1, HP20_FX_COEFF_SCALE ), prescale ); prescaleOld = extract_l( mem_fx[4] ); @@ -375,7 +376,7 @@ void hp20_fx_32( prescale = s_min( prescale, diff ); } - prescale = sub( HP20_COEFF_SCALE, prescale ); + prescale = sub( add( 1, HP20_FX_COEFF_SCALE ), prescale ); diff = sub( prescale, prescaleOld ); #ifdef ISSUE_1836_replace_overflow_libcom @@ -405,10 +406,10 @@ void hp20_fx_32( [b,a] = butter(2, 20.0/4000.0, 'high'); b = [0.988954248067140 -1.977908496134280 0.988954248067140] a =[1.000000000000000 -1.977786483776764 0.978030508491796]*/ - a1_fx = 1061816033l /* 1.977786483776764 Q29*/; - a2_fx = -525076131l /*-0.978030508491796 Q29*/; - b1_fx = -1061881538l /*-1.977908496134280 Q29*/; - b2_fx = 530940769l /* 0.988954248067140 Q29*/; + a1_fx = 2123632067 /* 1.977786483776764 Q30*/; + a2_fx = -1050152262 /*-0.978030508491796 Q30*/; + b1_fx = -2123763076 /*-1.977908496134280 Q30*/; + b2_fx = 1061881538 /* 0.988954248067140 Q30*/; } ELSE IF( EQ_32( Fs, 16000 ) ) { @@ -416,10 +417,10 @@ void hp20_fx_32( [b,a] = butter(2, 20.0/8000.0, 'high'); b =[ 0.994461788958195 -1.988923577916390 0.994461788958195] a =[1.000000000000000 -1.988892905899653 0.988954249933127] */ - a1_fx = 1067778748l /* 1.988892905899653 Q29*/; - a2_fx = -530940770l /*-0.988954249933127 Q29*/; - b1_fx = -1067795215l /*-1.988923577916390 Q29*/; - b2_fx = 533897608l /* 0.994461788958195 Q29*/; + a1_fx = 2135557497 /* 1.988892905899653 Q30*/; + a2_fx = -1061881540 /*-0.988954249933127 Q30*/; + b1_fx = -2135590430 /*-1.988923577916390 Q30*/; + b2_fx = 1067795215 /* 0.994461788958195 Q30*/; } ELSE IF( EQ_32( Fs, 32000 ) ) { @@ -427,10 +428,10 @@ void hp20_fx_32( [b,a] = butter(2, 20.0/16000.0, 'high'); b =[0.997227049904470 -1.994454099808940 0.997227049904470] a =[1.000000000000000 -1.994446410541927 0.994461789075954]*/ - a1_fx = 1070760263l /* 1.994446410541927 Q29*/; - a2_fx = -533897608l /*-0.994461789075954 Q29*/; - b1_fx = -1070764392l /*-1.994454099808940 Q29*/; - b2_fx = 535382196l /* 0.997227049904470 Q29*/; + a1_fx = 2141520527 /* 1.994446410541927 Q30*/; + a2_fx = -1067795215 /*-0.994461789075954 Q30*/; + b1_fx = -2141528783 /*-1.994454099808940 Q30*/; + b2_fx = 1070764392 /* 0.997227049904470 Q30*/; } ELSE { @@ -438,10 +439,10 @@ void hp20_fx_32( [b,a] = butter(2, 20.0/24000.0, 'high'); b =[ 0.998150511190452 -1.996301022380904 0.998150511190452] a =[1.000000000000000 -1.996297601769122 0.996304442992686]*/ - a1_fx = 1071754114l /* 1.996297601769122 Q29*/; - a2_fx = -534886875l /*-0.996304442992686 Q29*/; - b1_fx = -1071755951l /*-1.996301022380904 Q29*/; - b2_fx = 535877975l /* 0.998150511190452 Q29*/; + a1_fx = 2143508228 /* 1.996297601769122 Q30*/; + a2_fx = -1069773750 /*-0.996304442992686 Q30*/; + b1_fx = -2143511901 /*-1.996301022380904 Q30*/; + b2_fx = 1071755951 /* 0.998150511190452 Q30*/; } move32(); move32(); @@ -456,7 +457,7 @@ void hp20_fx_32( W_sum = W_add( W_sum, W_mult0_32_32( b2_fx, x2 ) ); /* b2*x0 */ W_sum = W_add( W_sum, W_mult0_32_32( mem_fx[0], a2_fx ) ); /* y2*a2 */ W_sum = W_add( W_sum, W_mult0_32_32( mem_fx[1], a1_fx ) ); /* y1*a1 */ - W_y2 = W_shl( W_sum, add( 1, HP20_COEFF_SCALE ) ); + W_y2 = W_shl( W_sum, add( 1, HP20_FX_COEFF_SCALE ) ); signal_fx[0] = W_round64_L( W_shl( W_y2, prescale ) ); move32(); @@ -466,7 +467,7 @@ void hp20_fx_32( W_sum = W_add( W_sum, W_mult0_32_32( b2_fx, x1 ) ); /* b2*x0 */ W_sum = W_add( W_sum, W_mult0_32_32( mem_fx[1], a2_fx ) ); /* y2*a2 */ W_sum = W_add( W_sum, W_mult0_32_32( W_round64_L( W_y2 ), a1_fx ) ); /* y1*a1 */ - W_y1 = W_shl( W_sum, add( 1, HP20_COEFF_SCALE ) ); + W_y1 = W_shl( W_sum, add( 1, HP20_FX_COEFF_SCALE ) ); signal_fx[1] = W_round64_L( W_shl( W_y1, prescale ) ); move32(); @@ -484,7 +485,7 @@ void hp20_fx_32( W_sum = W_add( W_sum, W_mult0_32_32( b2_fx, x0 ) ); /* b2*x0 */ W_sum = W_add( W_sum, W_mult0_32_32( W_round64_L( W_y2 ), a2_fx ) ); /* y2*a2 */ W_sum = W_add( W_sum, W_mult0_32_32( W_round64_L( W_y1 ), a1_fx ) ); /* y1*a1 */ - W_y0 = W_shl( W_sum, add( 1, HP20_COEFF_SCALE ) ); + W_y0 = W_shl( W_sum, add( 1, HP20_FX_COEFF_SCALE ) ); signal_fx[i] = W_round64_L( W_shl( W_y0, prescale_current_frame ) ); move32(); -- GitLab From 8f7e2fa8c6aa40858e132dafd8c5fb011ec44022 Mon Sep 17 00:00:00 2001 From: naghibza Date: Thu, 18 Sep 2025 16:16:42 +0200 Subject: [PATCH 09/14] applied clang formatting patch --- lib_com/hp50_fx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_com/hp50_fx.c b/lib_com/hp50_fx.c index 574f8c506..50c9a9309 100644 --- a/lib_com/hp50_fx.c +++ b/lib_com/hp50_fx.c @@ -40,7 +40,7 @@ #include "prot_fx.h" #include "wmc_auto.h" -#define HP20_COEFF_SCALE ( 2 ) +#define HP20_COEFF_SCALE ( 2 ) #define HP20_FX_COEFF_SCALE ( 1 ) /* * hp20 -- GitLab From 89202abaac54279c8a9ddc21c6289fc1b535315a Mon Sep 17 00:00:00 2001 From: naghibza Date: Thu, 18 Sep 2025 17:42:59 +0200 Subject: [PATCH 10/14] applied hp20_fx_32 optimization as hp20_fx_32_opt in decoder --- lib_com/hp50_fx.c | 5 ++--- lib_com/options.h | 2 +- lib_dec/ivas_jbm_dec_fx.c | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/lib_com/hp50_fx.c b/lib_com/hp50_fx.c index 50c9a9309..9e539e9cb 100644 --- a/lib_com/hp50_fx.c +++ b/lib_com/hp50_fx.c @@ -341,7 +341,7 @@ void hp20( Word16 signal[], /* i/o: signal to filter any * #ifdef HP20_FIX32_RECODING -void hp20_fx_32( +void hp20_fx_32_opt( Word32 signal_fx[], const Word16 lg, Word32 mem_fx[], @@ -513,7 +513,7 @@ void hp20_fx_32( return; } -#else +#endif void hp20_fx_32( Word32 signal_fx[], const Word16 lg, @@ -698,4 +698,3 @@ void hp20_fx_32( return; } -#endif diff --git a/lib_com/options.h b/lib_com/options.h index 2ca7f505d..86c966ec5 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -104,7 +104,7 @@ #define FIX_1962_FORMAT_CONV_SPECTRAL_DIFF /* FhG: Improved precision of targetEnergy in ivas_ls_setup_conversion_process_mdct_fx() */ #define FIX_2003_CON_TCX_OVERFLOW /* FhG: Use a dynamic scaling factor for the synth buffer at the output of con_tcx_ivas_fx() */ -#define HP20_FIX32_RECODING /* FhG: reduced headroom in hp20_fx_32 calculation */ +#define HP20_FIX32_RECODING /* FhG: optimized hp20_fx_32 calculation and applied it as hp20_fx_32_opt in decoder */ /* #################### Start BASOP porting switches ############################ */ #define NONBE_1244_FIX_SWB_BWE_MEMORY /* VA: issue 1244: fix to SWB BWE memory in case of switching from FB coding - pending a review by Huawei */ diff --git a/lib_dec/ivas_jbm_dec_fx.c b/lib_dec/ivas_jbm_dec_fx.c index fe36c1ea2..022d1bf99 100644 --- a/lib_dec/ivas_jbm_dec_fx.c +++ b/lib_dec/ivas_jbm_dec_fx.c @@ -159,7 +159,11 @@ ivas_error ivas_jbm_dec_tc_fx( /* HP filtering */ FOR( n = 0; n < s_min( nchan_out, st_ivas->nchan_transport ); n++ ) { +#ifdef HP20_FIX32_RECODING + hp20_fx_32_opt( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#else hp20_fx_32( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#endif } } ELSE IF( EQ_32( st_ivas->ivas_format, ISM_FORMAT ) ) @@ -226,7 +230,11 @@ ivas_error ivas_jbm_dec_tc_fx( } /* HP filtering */ +#ifdef HP20_FIX32_RECODING + hp20_fx_32_opt( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#else hp20_fx_32( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#endif } test(); @@ -577,7 +585,11 @@ ivas_error ivas_jbm_dec_tc_fx( /* HP filtering */ FOR( n = 0; n < nchan_remapped; n++ ) { +#ifdef HP20_FIX32_RECODING + hp20_fx_32_opt( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#else hp20_fx_32( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#endif } IF( EQ_32( st_ivas->ivas_format, SBA_FORMAT ) ) @@ -770,7 +782,11 @@ ivas_error ivas_jbm_dec_tc_fx( FOR( n = 0; n < getNumChanSynthesis( st_ivas ); n++ ) { +#ifdef HP20_FIX32_RECODING + hp20_fx_32_opt( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#else hp20_fx_32( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#endif } Word16 output_q = 11; @@ -1009,7 +1025,11 @@ ivas_error ivas_jbm_dec_tc_fx( /* HP filtering */ FOR( n = 0; n < getNumChanSynthesis( st_ivas ); n++ ) { +#ifdef HP20_FIX32_RECODING + hp20_fx_32_opt( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#else hp20_fx_32( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#endif } nchan_remapped = ivas_sba_remapTCs_fx( &p_output_fx[sba_ch_idx], st_ivas, output_frame ); @@ -1122,7 +1142,11 @@ ivas_error ivas_jbm_dec_tc_fx( { IF( NE_16( n, LFE_CHANNEL ) ) { +#ifdef HP20_FIX32_RECODING + hp20_fx_32_opt( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#else hp20_fx_32( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#endif } } @@ -1180,7 +1204,11 @@ ivas_error ivas_jbm_dec_tc_fx( { IF( NE_16( n, LFE_CHANNEL ) ) { +#ifdef HP20_FIX32_RECODING + hp20_fx_32_opt( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#else hp20_fx_32( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#endif } } @@ -1246,7 +1274,11 @@ ivas_error ivas_jbm_dec_tc_fx( /* HP filtering */ FOR( n = 0; n < st_ivas->nchan_transport; n++ ) { +#ifdef HP20_FIX32_RECODING + hp20_fx_32_opt( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#else hp20_fx_32( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#endif } /* Rendering */ @@ -1460,7 +1492,11 @@ ivas_error ivas_jbm_dec_tc_fx( /* HP filtering */ FOR( n = 0; n < getNumChanSynthesis( st_ivas ); n++ ) { +#ifdef HP20_FIX32_RECODING + hp20_fx_32_opt( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#else hp20_fx_32( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#endif } IF( EQ_32( st_ivas->renderer_type, RENDERER_MCMASA_MONO_STEREO ) ) -- GitLab From a15d5d0c8f8f37b637dd95e92e47094fa7e5c2a5 Mon Sep 17 00:00:00 2001 From: naghibza Date: Thu, 18 Sep 2025 17:59:00 +0200 Subject: [PATCH 11/14] added hp20_fx_32_opt declaration --- lib_com/prot_fx.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index 30b594d79..5473a9e0d 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -10481,6 +10481,14 @@ void hp20_fx_32( Word32 mem_fx[], const Word32 Fs ); +#ifdef HP20_FIX32_RECODING +void hp20_fx_32_opt( + Word32 signal_fx[], + const Word16 lg, + Word32 mem_fx[], + const Word32 Fs ); +#endif + void getTCXMode_ivas_fx( Decoder_State *st, /* i/o: decoder memory state */ Decoder_State *st0, /* i : bitstream */ -- GitLab From a77a208fe10d839a7e819c742ac25927498bd5c4 Mon Sep 17 00:00:00 2001 From: naghibza Date: Fri, 19 Sep 2025 15:33:30 +0200 Subject: [PATCH 12/14] modified prescale calculation in hp20_fx_32_opt() --- lib_com/hp50_fx.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/lib_com/hp50_fx.c b/lib_com/hp50_fx.c index 9e539e9cb..a15124c9f 100644 --- a/lib_com/hp50_fx.c +++ b/lib_com/hp50_fx.c @@ -349,35 +349,27 @@ void hp20_fx_32_opt( { Word32 i; Word32 a1_fx, a2_fx, b1_fx, b2_fx; - Word32 diff_pos, diff_neg; -#ifndef ISSUE_1836_replace_overflow_libcom - Flag Overflow = 0; -#endif Word16 prescale, prescaleOld, prescale_current_frame, diff; + Word32 tmp_mem[4]; prescale = L_norm_arr( signal_fx, lg ); prescale_current_frame = sub( add( 1, HP20_FX_COEFF_SCALE ), prescale ); prescaleOld = extract_l( mem_fx[4] ); -#ifdef ISSUE_1836_replace_overflow_libcom - diff_pos = norm_l( L_shl_sat( L_max( mem_fx[2], mem_fx[3] ), prescaleOld ) ); - diff_neg = norm_l( L_shl_sat( L_min( mem_fx[2], mem_fx[3] ), prescaleOld ) ); -#else - diff_pos = norm_l( L_shl_o( L_max( mem_fx[2], mem_fx[3] ), prescaleOld, &Overflow ) ); - diff_neg = norm_l( L_shl_o( L_min( mem_fx[2], mem_fx[3] ), prescaleOld, &Overflow ) ); -#endif + Copy32( mem_fx, tmp_mem, 4 ); + Scale_sig32( tmp_mem, 4, prescaleOld ); + diff = L_norm_arr( tmp_mem, 4 ); + prescale = s_min( prescale, diff ); - diff = L_min( diff_pos, diff_neg ); + prescale = sub( add( 1, HP20_FX_COEFF_SCALE ), prescale ); - IF( NE_16( diff, 0 ) ) + IF( EQ_16( prescale_current_frame, -29 ) ) // signal_fx buffer contains only zeros, so use the mem_fx scale_factor instead { - prescale = s_min( prescale, diff ); + prescale_current_frame = prescale; } - prescale = sub( add( 1, HP20_FX_COEFF_SCALE ), prescale ); - diff = sub( prescale, prescaleOld ); #ifdef ISSUE_1836_replace_overflow_libcom mem_fx[0] = L_shr_sat( mem_fx[0], diff ); -- GitLab From 9fa0972503076fdb8f126732c109ab90396228c3 Mon Sep 17 00:00:00 2001 From: Arthur Date: Mon, 22 Sep 2025 18:46:14 +0200 Subject: [PATCH 13/14] added changes according to review comments, results should be same as in last commit --- lib_com/hp50_fx.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/lib_com/hp50_fx.c b/lib_com/hp50_fx.c index a15124c9f..cd69ddf5a 100644 --- a/lib_com/hp50_fx.c +++ b/lib_com/hp50_fx.c @@ -353,19 +353,26 @@ void hp20_fx_32_opt( Word32 tmp_mem[4]; prescale = L_norm_arr( signal_fx, lg ); - prescale_current_frame = sub( add( 1, HP20_FX_COEFF_SCALE ), prescale ); + prescale_current_frame = sub( 1 + HP20_FX_COEFF_SCALE, prescale ); prescaleOld = extract_l( mem_fx[4] ); - Copy32( mem_fx, tmp_mem, 4 ); - Scale_sig32( tmp_mem, 4, prescaleOld ); + tmp_mem[0] = L_shl_sat( mem_fx[0], prescaleOld ); + tmp_mem[1] = L_shl_sat( mem_fx[1], prescaleOld ); + tmp_mem[2] = L_shl_sat( mem_fx[2], prescaleOld ); + tmp_mem[3] = L_shl_sat( mem_fx[3], prescaleOld ); + move32(); + move32(); + move32(); + move32(); + diff = L_norm_arr( tmp_mem, 4 ); prescale = s_min( prescale, diff ); - prescale = sub( add( 1, HP20_FX_COEFF_SCALE ), prescale ); + prescale = sub( 1 + HP20_FX_COEFF_SCALE, prescale ); - IF( EQ_16( prescale_current_frame, -29 ) ) // signal_fx buffer contains only zeros, so use the mem_fx scale_factor instead + if( EQ_16( prescale_current_frame, 1 + HP20_FX_COEFF_SCALE - 31 ) ) // signal_fx buffer contains only zeros, so use the mem_fx scale_factor instead { prescale_current_frame = prescale; } @@ -373,21 +380,18 @@ void hp20_fx_32_opt( diff = sub( prescale, prescaleOld ); #ifdef ISSUE_1836_replace_overflow_libcom mem_fx[0] = L_shr_sat( mem_fx[0], diff ); - move32(); mem_fx[1] = L_shr_sat( mem_fx[1], diff ); - move32(); mem_fx[2] = L_shr_sat( mem_fx[2], diff ); - move32(); mem_fx[3] = L_shr_sat( mem_fx[3], diff ); #else mem_fx[0] = L_shr_o( mem_fx[0], diff, &Overflow ); - move32(); mem_fx[1] = L_shr_o( mem_fx[1], diff, &Overflow ); - move32(); mem_fx[2] = L_shr_o( mem_fx[2], diff, &Overflow ); - move32(); mem_fx[3] = L_shr_o( mem_fx[3], diff, &Overflow ); #endif + move32(); + move32(); + move32(); move32(); mem_fx[4] = L_deposit_l( prescale_current_frame ); move32(); @@ -449,7 +453,7 @@ void hp20_fx_32_opt( W_sum = W_add( W_sum, W_mult0_32_32( b2_fx, x2 ) ); /* b2*x0 */ W_sum = W_add( W_sum, W_mult0_32_32( mem_fx[0], a2_fx ) ); /* y2*a2 */ W_sum = W_add( W_sum, W_mult0_32_32( mem_fx[1], a1_fx ) ); /* y1*a1 */ - W_y2 = W_shl( W_sum, add( 1, HP20_FX_COEFF_SCALE ) ); + W_y2 = W_shl( W_sum, 1 + HP20_FX_COEFF_SCALE ); signal_fx[0] = W_round64_L( W_shl( W_y2, prescale ) ); move32(); @@ -459,7 +463,7 @@ void hp20_fx_32_opt( W_sum = W_add( W_sum, W_mult0_32_32( b2_fx, x1 ) ); /* b2*x0 */ W_sum = W_add( W_sum, W_mult0_32_32( mem_fx[1], a2_fx ) ); /* y2*a2 */ W_sum = W_add( W_sum, W_mult0_32_32( W_round64_L( W_y2 ), a1_fx ) ); /* y1*a1 */ - W_y1 = W_shl( W_sum, add( 1, HP20_FX_COEFF_SCALE ) ); + W_y1 = W_shl( W_sum, 1 + HP20_FX_COEFF_SCALE ); signal_fx[1] = W_round64_L( W_shl( W_y1, prescale ) ); move32(); @@ -477,7 +481,7 @@ void hp20_fx_32_opt( W_sum = W_add( W_sum, W_mult0_32_32( b2_fx, x0 ) ); /* b2*x0 */ W_sum = W_add( W_sum, W_mult0_32_32( W_round64_L( W_y2 ), a2_fx ) ); /* y2*a2 */ W_sum = W_add( W_sum, W_mult0_32_32( W_round64_L( W_y1 ), a1_fx ) ); /* y1*a1 */ - W_y0 = W_shl( W_sum, add( 1, HP20_FX_COEFF_SCALE ) ); + W_y0 = W_shl( W_sum, 1 + HP20_FX_COEFF_SCALE ); signal_fx[i] = W_round64_L( W_shl( W_y0, prescale_current_frame ) ); move32(); -- GitLab From 7350dbe5c4ad746e7360330187382fcc4949fd9f Mon Sep 17 00:00:00 2001 From: Arthur Date: Mon, 22 Sep 2025 18:55:13 +0200 Subject: [PATCH 14/14] fix clang format issue --- lib_com/hp50_fx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_com/hp50_fx.c b/lib_com/hp50_fx.c index cd69ddf5a..9d6f10ed2 100644 --- a/lib_com/hp50_fx.c +++ b/lib_com/hp50_fx.c @@ -372,7 +372,7 @@ void hp20_fx_32_opt( prescale = sub( 1 + HP20_FX_COEFF_SCALE, prescale ); - if( EQ_16( prescale_current_frame, 1 + HP20_FX_COEFF_SCALE - 31 ) ) // signal_fx buffer contains only zeros, so use the mem_fx scale_factor instead + if ( EQ_16( prescale_current_frame, 1 + HP20_FX_COEFF_SCALE - 31 ) ) // signal_fx buffer contains only zeros, so use the mem_fx scale_factor instead { prescale_current_frame = prescale; } -- GitLab