From 39ee6c2d7b8edbe2aeb1ebaef1c3e1850253dd4e Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Mon, 15 Sep 2025 17:14:59 +0200 Subject: [PATCH 1/8] Inside a FOR loop within tcx_ltp_synth_filter32() there was an IF which was impeding WMOPS performance. --- lib_com/tcx_ltp_fx.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/lib_com/tcx_ltp_fx.c b/lib_com/tcx_ltp_fx.c index 9f92f0b5b..ca943050d 100644 --- a/lib_com/tcx_ltp_fx.c +++ b/lib_com/tcx_ltp_fx.c @@ -579,8 +579,7 @@ static void tcx_ltp_synth_filter32( L = tcxLtpFilters[filtIdx].length; /* Q0 */ move16(); - - alpha = 0; + alpha = 0x7FFF; /* 1 in Q15 */ move16(); IF( fade != 0 ) { @@ -605,7 +604,13 @@ static void tcx_ltp_synth_filter32( if ( fade < 0 ) step = negate( step ); } - + if ( zir != NULL ) + { + FOR( j = 0; j < length; j++ ) + { + synth[j] = L_sub_sat( synth[j], zir[j] ); + } + } FOR( j = 0; j < length; j++ ) { s = L_deposit_l( 0 ); @@ -625,22 +630,14 @@ static void tcx_ltp_synth_filter32( fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */ L_tmp = L_sub_sat( s, Mpy_32_16_r( s2, ALPHA ) ); /* Qx */ L_tmp2 = Mpy_32_16_r( L_tmp, gain ); /* Qx */ - IF( fade != 0 ) L_tmp2 = Mpy_32_16_r( L_tmp2, alpha ); /* Qx */ L_tmp2 = L_add_sat( synth[j], L_tmp2 ); /* Qx */ - if ( zir != NULL ) - { - L_tmp2 = L_sub_sat( L_tmp2, zir[j] ); /* Qx */ - } synth_ltp[j] = L_tmp2; /* Qx */ move16(); BASOP_SATURATE_WARNING_OFF_EVS; - if ( fade != 0 ) - { - alpha = add_sat( alpha, step ); - } + alpha = add_sat( alpha, step ); BASOP_SATURATE_WARNING_ON_EVS; x0++; -- GitLab From 8db6d403e1508d624a0861081138c811207e2e1c Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Mon, 15 Sep 2025 17:19:38 +0200 Subject: [PATCH 2/8] applied the clang patch --- lib_com/tcx_ltp_fx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_com/tcx_ltp_fx.c b/lib_com/tcx_ltp_fx.c index ca943050d..870fac6f6 100644 --- a/lib_com/tcx_ltp_fx.c +++ b/lib_com/tcx_ltp_fx.c @@ -630,8 +630,8 @@ static void tcx_ltp_synth_filter32( fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */ L_tmp = L_sub_sat( s, Mpy_32_16_r( s2, ALPHA ) ); /* Qx */ L_tmp2 = Mpy_32_16_r( L_tmp, gain ); /* Qx */ - L_tmp2 = Mpy_32_16_r( L_tmp2, alpha ); /* Qx */ - L_tmp2 = L_add_sat( synth[j], L_tmp2 ); /* Qx */ + L_tmp2 = Mpy_32_16_r( L_tmp2, alpha ); /* Qx */ + L_tmp2 = L_add_sat( synth[j], L_tmp2 ); /* Qx */ synth_ltp[j] = L_tmp2; /* Qx */ move16(); -- GitLab From f92c665e2ff914dc8d25ecbc31dfcbf6de03323e Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Tue, 16 Sep 2025 10:14:25 +0200 Subject: [PATCH 3/8] improving bit exactness. --- lib_com/tcx_ltp_fx.c | 94 ++++++++++++++++++++++++++++++-------------- 1 file changed, 65 insertions(+), 29 deletions(-) diff --git a/lib_com/tcx_ltp_fx.c b/lib_com/tcx_ltp_fx.c index 870fac6f6..a6a4af90d 100644 --- a/lib_com/tcx_ltp_fx.c +++ b/lib_com/tcx_ltp_fx.c @@ -611,39 +611,75 @@ static void tcx_ltp_synth_filter32( synth[j] = L_sub_sat( synth[j], zir[j] ); } } - FOR( j = 0; j < length; j++ ) + IF ( fade != 0) { - s = L_deposit_l( 0 ); - s2 = L_deposit_l( 0 ); - k = 0; - move16(); - FOR( i = 0; i < L; i++ ) + FOR( j = 0; j < length; j++ ) { - s = Madd_32_16_r( Madd_32_16_r( s, x0[i], w0[k] ), x1[-i], w1[k] ); /* Qx */ - s2 = Madd_32_16_r( Madd_32_16_r( s2, y0[i], v0[k] ), y1[-i], v1[k] ); /* Qx */ - k = k + pitch_res; + s = L_deposit_l( 0 ); + s2 = L_deposit_l( 0 ); + k = 0; + move16(); + FOR( i = 0; i < L; i++ ) + { + s = Madd_32_16_r( Madd_32_16_r( s, x0[i], w0[k] ), x1[-i], w1[k] ); /* Qx */ + s2 = Madd_32_16_r( Madd_32_16_r( s2, y0[i], v0[k] ), y1[-i], v1[k] ); /* Qx */ + k = k + pitch_res; + } + + /* s2 *= ALPHA; + normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; + zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; + fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */ + L_tmp = L_sub_sat( s, Mpy_32_16_r( s2, ALPHA ) ); /* Qx */ + L_tmp2 = Mpy_32_16_r( L_tmp, gain ); /* Qx */ + L_tmp2 = Mpy_32_16_r( L_tmp2, alpha ); /* Qx */ + L_tmp2 = L_add_sat( synth[j], L_tmp2 ); /* Qx */ + + synth_ltp[j] = L_tmp2; /* Qx */ + move16(); + + BASOP_SATURATE_WARNING_OFF_EVS; + alpha = add_sat( alpha, step ); + BASOP_SATURATE_WARNING_ON_EVS; + + x0++; + x1++; + y0++; + y1++; + } + } + ELSE + { + FOR( j = 0; j < length; j++ ) + { + s = L_deposit_l( 0 ); + s2 = L_deposit_l( 0 ); + k = 0; + move16(); + FOR( i = 0; i < L; i++ ) + { + s = Madd_32_16_r( Madd_32_16_r( s, x0[i], w0[k] ), x1[-i], w1[k] ); /* Qx */ + s2 = Madd_32_16_r( Madd_32_16_r( s2, y0[i], v0[k] ), y1[-i], v1[k] ); /* Qx */ + k = k + pitch_res; + } + + /* s2 *= ALPHA; + normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; + zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; + fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */ + L_tmp = L_sub_sat( s, Mpy_32_16_r( s2, ALPHA ) ); /* Qx */ + L_tmp2 = Mpy_32_16_r( L_tmp, gain ); /* Qx */ + L_tmp2 = L_add_sat( synth[j], L_tmp2 ); /* Qx */ + + synth_ltp[j] = L_tmp2; /* Qx */ + move16(); + + x0++; + x1++; + y0++; + y1++; } - /* s2 *= ALPHA; - normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; - zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; - fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */ - L_tmp = L_sub_sat( s, Mpy_32_16_r( s2, ALPHA ) ); /* Qx */ - L_tmp2 = Mpy_32_16_r( L_tmp, gain ); /* Qx */ - L_tmp2 = Mpy_32_16_r( L_tmp2, alpha ); /* Qx */ - L_tmp2 = L_add_sat( synth[j], L_tmp2 ); /* Qx */ - - synth_ltp[j] = L_tmp2; /* Qx */ - move16(); - - BASOP_SATURATE_WARNING_OFF_EVS; - alpha = add_sat( alpha, step ); - BASOP_SATURATE_WARNING_ON_EVS; - - x0++; - x1++; - y0++; - y1++; } } ELSE -- GitLab From fcc5477ef192fda7e02c36bcaab22a1f3a5df2e7 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Tue, 16 Sep 2025 10:38:04 +0200 Subject: [PATCH 4/8] applied the clang patch. --- lib_com/tcx_ltp_fx.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/lib_com/tcx_ltp_fx.c b/lib_com/tcx_ltp_fx.c index a6a4af90d..266dfb1e9 100644 --- a/lib_com/tcx_ltp_fx.c +++ b/lib_com/tcx_ltp_fx.c @@ -611,7 +611,7 @@ static void tcx_ltp_synth_filter32( synth[j] = L_sub_sat( synth[j], zir[j] ); } } - IF ( fade != 0) + IF( fade != 0 ) { FOR( j = 0; j < length; j++ ) { @@ -625,7 +625,7 @@ static void tcx_ltp_synth_filter32( s2 = Madd_32_16_r( Madd_32_16_r( s2, y0[i], v0[k] ), y1[-i], v1[k] ); /* Qx */ k = k + pitch_res; } - + /* s2 *= ALPHA; normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; @@ -634,14 +634,14 @@ static void tcx_ltp_synth_filter32( L_tmp2 = Mpy_32_16_r( L_tmp, gain ); /* Qx */ L_tmp2 = Mpy_32_16_r( L_tmp2, alpha ); /* Qx */ L_tmp2 = L_add_sat( synth[j], L_tmp2 ); /* Qx */ - + synth_ltp[j] = L_tmp2; /* Qx */ move16(); - + BASOP_SATURATE_WARNING_OFF_EVS; alpha = add_sat( alpha, step ); BASOP_SATURATE_WARNING_ON_EVS; - + x0++; x1++; y0++; @@ -662,7 +662,7 @@ static void tcx_ltp_synth_filter32( s2 = Madd_32_16_r( Madd_32_16_r( s2, y0[i], v0[k] ), y1[-i], v1[k] ); /* Qx */ k = k + pitch_res; } - + /* s2 *= ALPHA; normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; @@ -670,16 +670,15 @@ static void tcx_ltp_synth_filter32( L_tmp = L_sub_sat( s, Mpy_32_16_r( s2, ALPHA ) ); /* Qx */ L_tmp2 = Mpy_32_16_r( L_tmp, gain ); /* Qx */ L_tmp2 = L_add_sat( synth[j], L_tmp2 ); /* Qx */ - + synth_ltp[j] = L_tmp2; /* Qx */ move16(); - + x0++; x1++; y0++; y1++; } - } } ELSE -- GitLab From c1c64d944c1324bd6b6aff8295c6f61f0d86950d Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Wed, 17 Sep 2025 10:40:56 +0200 Subject: [PATCH 5/8] more loop unrolling in tcx_ltp_synth_filter() and tcx_ltp_synth_filter32() for bit exact WMOPS improvement. --- lib_com/tcx_ltp_fx.c | 368 +++++++++++++++++++++++++++++++------------ 1 file changed, 271 insertions(+), 97 deletions(-) diff --git a/lib_com/tcx_ltp_fx.c b/lib_com/tcx_ltp_fx.c index 266dfb1e9..9f762c08b 100644 --- a/lib_com/tcx_ltp_fx.c +++ b/lib_com/tcx_ltp_fx.c @@ -491,48 +491,152 @@ static void tcx_ltp_synth_filter( if ( fade < 0 ) step = negate( step ); } - - FOR( j = 0; j < length; j++ ) + IF ( zir != NULL ) { - s = L_deposit_l( 0 ); - s2 = L_deposit_l( 0 ); - k = 0; - move16(); - FOR( i = 0; i < L; i++ ) + IF ( fade != 0) { - s = L_mac_sat( L_mac_sat( s, w0[k], x0[i] ), w1[k], x1[-i] ); /* Qx */ - s2 = L_mac_sat( L_mac_sat( s2, v0[k], y0[i] ), v1[k], y1[-i] ); /* Qx */ - k = k + pitch_res; + FOR( j = 0; j < length; j++ ) + { + s = L_deposit_l( 0 ); + s2 = L_deposit_l( 0 ); + k = 0; + move16(); + FOR( i = 0; i < L; i++ ) + { + s = L_mac_sat( L_mac_sat( s, w0[k], x0[i] ), w1[k], x1[-i] ); /* Qx */ + s2 = L_mac_sat( L_mac_sat( s2, v0[k], y0[i] ), v1[k], y1[-i] ); /* Qx */ + k = k + pitch_res; + } + + /* s2 *= ALPHA; + normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; + zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; + fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */ + i = sub_sat( round_fx_sat( s ), mult_r_sat( round_fx_sat( s2 ), ALPHA ) ); /* Qx */ + k = mult_r( gain, i ); /* Qx */ + k = mult_r( k, alpha ); /* Qx */ + k = add_sat( synth[j], k ); /* Qx */ + k = sub_sat( k, zir[j] ); /* Qx */ + + synth_ltp[j] = k; /* Qx */ + move16(); + + BASOP_SATURATE_WARNING_OFF_EVS; + alpha = add_sat( alpha, step ); + BASOP_SATURATE_WARNING_ON_EVS; + + x0++; + x1++; + y0++; + y1++; + } } - - /* s2 *= ALPHA; - normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; - zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; - fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */ - i = sub_sat( round_fx_sat( s ), mult_r_sat( round_fx_sat( s2 ), ALPHA ) ); /* Qx */ - k = mult_r( gain, i ); /* Qx */ - if ( fade != 0 ) - k = mult_r( k, alpha ); /* Qx */ - k = add_sat( synth[j], k ); /* Qx */ - if ( zir != NULL ) + ELSE { - k = sub_sat( k, zir[j] ); /* Qx */ + FOR( j = 0; j < length; j++ ) + { + s = L_deposit_l( 0 ); + s2 = L_deposit_l( 0 ); + k = 0; + move16(); + FOR( i = 0; i < L; i++ ) + { + s = L_mac_sat( L_mac_sat( s, w0[k], x0[i] ), w1[k], x1[-i] ); /* Qx */ + s2 = L_mac_sat( L_mac_sat( s2, v0[k], y0[i] ), v1[k], y1[-i] ); /* Qx */ + k = k + pitch_res; + } + + /* s2 *= ALPHA; + normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; + zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; + fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */ + i = sub_sat( round_fx_sat( s ), mult_r_sat( round_fx_sat( s2 ), ALPHA ) ); /* Qx */ + k = mult_r( gain, i ); /* Qx */ + k = add_sat( synth[j], k ); /* Qx */ + k = sub_sat( k, zir[j] ); /* Qx */ + + synth_ltp[j] = k; /* Qx */ + move16(); + + x0++; + x1++; + y0++; + y1++; + } } - - synth_ltp[j] = k; /* Qx */ - move16(); - - BASOP_SATURATE_WARNING_OFF_EVS; - if ( fade != 0 ) + } + ELSE + { + IF ( fade != 0) { - alpha = add_sat( alpha, step ); + FOR( j = 0; j < length; j++ ) + { + s = L_deposit_l( 0 ); + s2 = L_deposit_l( 0 ); + k = 0; + move16(); + FOR( i = 0; i < L; i++ ) + { + s = L_mac_sat( L_mac_sat( s, w0[k], x0[i] ), w1[k], x1[-i] ); /* Qx */ + s2 = L_mac_sat( L_mac_sat( s2, v0[k], y0[i] ), v1[k], y1[-i] ); /* Qx */ + k = k + pitch_res; + } + + /* s2 *= ALPHA; + normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; + zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; + fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */ + i = sub_sat( round_fx_sat( s ), mult_r_sat( round_fx_sat( s2 ), ALPHA ) ); /* Qx */ + k = mult_r( gain, i ); /* Qx */ + k = mult_r( k, alpha ); + k = add_sat( synth[j], k ); /* Qx */ + + synth_ltp[j] = k; /* Qx */ + move16(); + + BASOP_SATURATE_WARNING_OFF_EVS; + alpha = add_sat( alpha, step ); + BASOP_SATURATE_WARNING_ON_EVS; + + x0++; + x1++; + y0++; + y1++; + } } - BASOP_SATURATE_WARNING_ON_EVS; - - x0++; - x1++; - y0++; - y1++; + ELSE + { + FOR( j = 0; j < length; j++ ) + { + s = L_deposit_l( 0 ); + s2 = L_deposit_l( 0 ); + k = 0; + move16(); + FOR( i = 0; i < L; i++ ) + { + s = L_mac_sat( L_mac_sat( s, w0[k], x0[i] ), w1[k], x1[-i] ); /* Qx */ + s2 = L_mac_sat( L_mac_sat( s2, v0[k], y0[i] ), v1[k], y1[-i] ); /* Qx */ + k = k + pitch_res; + } + + /* s2 *= ALPHA; + normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; + zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; + fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */ + i = sub_sat( round_fx_sat( s ), mult_r_sat( round_fx_sat( s2 ), ALPHA ) ); /* Qx */ + k = mult_r( gain, i ); /* Qx */ + k = add_sat( synth[j], k ); /* Qx */ + + synth_ltp[j] = k; /* Qx */ + move16(); + + + x0++; + x1++; + y0++; + y1++; + } + } } } ELSE @@ -579,7 +683,7 @@ static void tcx_ltp_synth_filter32( L = tcxLtpFilters[filtIdx].length; /* Q0 */ move16(); - alpha = 0x7FFF; /* 1 in Q15 */ + alpha = 0; move16(); IF( fade != 0 ) { @@ -604,80 +708,150 @@ static void tcx_ltp_synth_filter32( if ( fade < 0 ) step = negate( step ); } - if ( zir != NULL ) + IF( fade != 0 ) { - FOR( j = 0; j < length; j++ ) + IF ( zir != NULL ) { - synth[j] = L_sub_sat( synth[j], zir[j] ); + FOR( j = 0; j < length; j++ ) + { + s = L_deposit_l( 0 ); + s2 = L_deposit_l( 0 ); + k = 0; + move16(); + FOR( i = 0; i < L; i++ ) + { + s = Madd_32_16_r( Madd_32_16_r( s, x0[i], w0[k] ), x1[-i], w1[k] ); /* Qx */ + s2 = Madd_32_16_r( Madd_32_16_r( s2, y0[i], v0[k] ), y1[-i], v1[k] ); /* Qx */ + k = k + pitch_res; + } + + /* s2 *= ALPHA; + normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; + zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; + fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */ + L_tmp = L_sub_sat( s, Mpy_32_16_r( s2, ALPHA ) ); /* Qx */ + L_tmp2 = Mpy_32_16_r( L_tmp, gain ); /* Qx */ + L_tmp2 = Mpy_32_16_r( L_tmp2, alpha ); /* Qx */ + L_tmp2 = L_sub_sat( L_tmp2, zir[j] ); /* Qx */ + L_tmp2 = L_add_sat( synth[j], L_tmp2 ); /* Qx */ + + synth_ltp[j] = L_tmp2; /* Qx */ + move16(); + + BASOP_SATURATE_WARNING_OFF_EVS; + alpha = add_sat( alpha, step ); + BASOP_SATURATE_WARNING_ON_EVS; + + x0++; + x1++; + y0++; + y1++; + } } - } - IF( fade != 0 ) - { - FOR( j = 0; j < length; j++ ) + ELSE { - s = L_deposit_l( 0 ); - s2 = L_deposit_l( 0 ); - k = 0; - move16(); - FOR( i = 0; i < L; i++ ) + FOR( j = 0; j < length; j++ ) { - s = Madd_32_16_r( Madd_32_16_r( s, x0[i], w0[k] ), x1[-i], w1[k] ); /* Qx */ - s2 = Madd_32_16_r( Madd_32_16_r( s2, y0[i], v0[k] ), y1[-i], v1[k] ); /* Qx */ - k = k + pitch_res; + s = L_deposit_l( 0 ); + s2 = L_deposit_l( 0 ); + k = 0; + move16(); + FOR( i = 0; i < L; i++ ) + { + s = Madd_32_16_r( Madd_32_16_r( s, x0[i], w0[k] ), x1[-i], w1[k] ); /* Qx */ + s2 = Madd_32_16_r( Madd_32_16_r( s2, y0[i], v0[k] ), y1[-i], v1[k] ); /* Qx */ + k = k + pitch_res; + } + + /* s2 *= ALPHA; + normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; + zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; + fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */ + L_tmp = L_sub_sat( s, Mpy_32_16_r( s2, ALPHA ) ); /* Qx */ + L_tmp2 = Mpy_32_16_r( L_tmp, gain ); /* Qx */ + L_tmp2 = Mpy_32_16_r( L_tmp2, alpha ); /* Qx */ + L_tmp2 = L_add_sat( synth[j], L_tmp2 ); /* Qx */ + + synth_ltp[j] = L_tmp2; /* Qx */ + move16(); + + BASOP_SATURATE_WARNING_OFF_EVS; + alpha = add_sat( alpha, step ); + BASOP_SATURATE_WARNING_ON_EVS; + + x0++; + x1++; + y0++; + y1++; } - - /* s2 *= ALPHA; - normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; - zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; - fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */ - L_tmp = L_sub_sat( s, Mpy_32_16_r( s2, ALPHA ) ); /* Qx */ - L_tmp2 = Mpy_32_16_r( L_tmp, gain ); /* Qx */ - L_tmp2 = Mpy_32_16_r( L_tmp2, alpha ); /* Qx */ - L_tmp2 = L_add_sat( synth[j], L_tmp2 ); /* Qx */ - - synth_ltp[j] = L_tmp2; /* Qx */ - move16(); - - BASOP_SATURATE_WARNING_OFF_EVS; - alpha = add_sat( alpha, step ); - BASOP_SATURATE_WARNING_ON_EVS; - - x0++; - x1++; - y0++; - y1++; } } ELSE { - FOR( j = 0; j < length; j++ ) + IF ( zir != NULL ) { - s = L_deposit_l( 0 ); - s2 = L_deposit_l( 0 ); - k = 0; - move16(); - FOR( i = 0; i < L; i++ ) + FOR( j = 0; j < length; j++ ) { - s = Madd_32_16_r( Madd_32_16_r( s, x0[i], w0[k] ), x1[-i], w1[k] ); /* Qx */ - s2 = Madd_32_16_r( Madd_32_16_r( s2, y0[i], v0[k] ), y1[-i], v1[k] ); /* Qx */ - k = k + pitch_res; + s = L_deposit_l( 0 ); + s2 = L_deposit_l( 0 ); + k = 0; + move16(); + FOR( i = 0; i < L; i++ ) + { + s = Madd_32_16_r( Madd_32_16_r( s, x0[i], w0[k] ), x1[-i], w1[k] ); /* Qx */ + s2 = Madd_32_16_r( Madd_32_16_r( s2, y0[i], v0[k] ), y1[-i], v1[k] ); /* Qx */ + k = k + pitch_res; + } + + /* s2 *= ALPHA; + normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; + zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; + fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */ + L_tmp = L_sub_sat( s, Mpy_32_16_r( s2, ALPHA ) ); /* Qx */ + L_tmp2 = Mpy_32_16_r( L_tmp, gain ); /* Qx */ + L_tmp2 = L_sub_sat( L_tmp2, zir[j] ); /* Qx */ + L_tmp2 = L_add_sat( synth[j], L_tmp2 ); /* Qx */ + + synth_ltp[j] = L_tmp2; /* Qx */ + move16(); + + x0++; + x1++; + y0++; + y1++; + } + } + ELSE + { + FOR( j = 0; j < length; j++ ) + { + s = L_deposit_l( 0 ); + s2 = L_deposit_l( 0 ); + k = 0; + move16(); + FOR( i = 0; i < L; i++ ) + { + s = Madd_32_16_r( Madd_32_16_r( s, x0[i], w0[k] ), x1[-i], w1[k] ); /* Qx */ + s2 = Madd_32_16_r( Madd_32_16_r( s2, y0[i], v0[k] ), y1[-i], v1[k] ); /* Qx */ + k = k + pitch_res; + } + + /* s2 *= ALPHA; + normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; + zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; + fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */ + L_tmp = L_sub_sat( s, Mpy_32_16_r( s2, ALPHA ) ); /* Qx */ + L_tmp2 = Mpy_32_16_r( L_tmp, gain ); /* Qx */ + L_tmp2 = L_add_sat( synth[j], L_tmp2 ); /* Qx */ + + synth_ltp[j] = L_tmp2; /* Qx */ + move16(); + + x0++; + x1++; + y0++; + y1++; } - - /* s2 *= ALPHA; - normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; - zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; - fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */ - L_tmp = L_sub_sat( s, Mpy_32_16_r( s2, ALPHA ) ); /* Qx */ - L_tmp2 = Mpy_32_16_r( L_tmp, gain ); /* Qx */ - L_tmp2 = L_add_sat( synth[j], L_tmp2 ); /* Qx */ - - synth_ltp[j] = L_tmp2; /* Qx */ - move16(); - - x0++; - x1++; - y0++; - y1++; } } } -- GitLab From 13ca321c4a652d8942e47372201a4cea348108af Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Wed, 17 Sep 2025 10:45:48 +0200 Subject: [PATCH 6/8] applied the clang patch. --- lib_com/tcx_ltp_fx.c | 82 ++++++++++++++++++++++---------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/lib_com/tcx_ltp_fx.c b/lib_com/tcx_ltp_fx.c index 9f762c08b..3f1f52aab 100644 --- a/lib_com/tcx_ltp_fx.c +++ b/lib_com/tcx_ltp_fx.c @@ -491,9 +491,9 @@ static void tcx_ltp_synth_filter( if ( fade < 0 ) step = negate( step ); } - IF ( zir != NULL ) + IF( zir != NULL ) { - IF ( fade != 0) + IF( fade != 0 ) { FOR( j = 0; j < length; j++ ) { @@ -507,24 +507,24 @@ static void tcx_ltp_synth_filter( s2 = L_mac_sat( L_mac_sat( s2, v0[k], y0[i] ), v1[k], y1[-i] ); /* Qx */ k = k + pitch_res; } - + /* s2 *= ALPHA; normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */ i = sub_sat( round_fx_sat( s ), mult_r_sat( round_fx_sat( s2 ), ALPHA ) ); /* Qx */ k = mult_r( gain, i ); /* Qx */ - k = mult_r( k, alpha ); /* Qx */ - k = add_sat( synth[j], k ); /* Qx */ - k = sub_sat( k, zir[j] ); /* Qx */ - + k = mult_r( k, alpha ); /* Qx */ + k = add_sat( synth[j], k ); /* Qx */ + k = sub_sat( k, zir[j] ); /* Qx */ + synth_ltp[j] = k; /* Qx */ move16(); - + BASOP_SATURATE_WARNING_OFF_EVS; alpha = add_sat( alpha, step ); BASOP_SATURATE_WARNING_ON_EVS; - + x0++; x1++; y0++; @@ -545,19 +545,19 @@ static void tcx_ltp_synth_filter( s2 = L_mac_sat( L_mac_sat( s2, v0[k], y0[i] ), v1[k], y1[-i] ); /* Qx */ k = k + pitch_res; } - + /* s2 *= ALPHA; normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */ i = sub_sat( round_fx_sat( s ), mult_r_sat( round_fx_sat( s2 ), ALPHA ) ); /* Qx */ k = mult_r( gain, i ); /* Qx */ - k = add_sat( synth[j], k ); /* Qx */ - k = sub_sat( k, zir[j] ); /* Qx */ - + k = add_sat( synth[j], k ); /* Qx */ + k = sub_sat( k, zir[j] ); /* Qx */ + synth_ltp[j] = k; /* Qx */ move16(); - + x0++; x1++; y0++; @@ -567,7 +567,7 @@ static void tcx_ltp_synth_filter( } ELSE { - IF ( fade != 0) + IF( fade != 0 ) { FOR( j = 0; j < length; j++ ) { @@ -581,7 +581,7 @@ static void tcx_ltp_synth_filter( s2 = L_mac_sat( L_mac_sat( s2, v0[k], y0[i] ), v1[k], y1[-i] ); /* Qx */ k = k + pitch_res; } - + /* s2 *= ALPHA; normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; @@ -590,14 +590,14 @@ static void tcx_ltp_synth_filter( k = mult_r( gain, i ); /* Qx */ k = mult_r( k, alpha ); k = add_sat( synth[j], k ); /* Qx */ - + synth_ltp[j] = k; /* Qx */ move16(); - + BASOP_SATURATE_WARNING_OFF_EVS; alpha = add_sat( alpha, step ); BASOP_SATURATE_WARNING_ON_EVS; - + x0++; x1++; y0++; @@ -618,25 +618,25 @@ static void tcx_ltp_synth_filter( s2 = L_mac_sat( L_mac_sat( s2, v0[k], y0[i] ), v1[k], y1[-i] ); /* Qx */ k = k + pitch_res; } - + /* s2 *= ALPHA; normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */ i = sub_sat( round_fx_sat( s ), mult_r_sat( round_fx_sat( s2 ), ALPHA ) ); /* Qx */ k = mult_r( gain, i ); /* Qx */ - k = add_sat( synth[j], k ); /* Qx */ - + k = add_sat( synth[j], k ); /* Qx */ + synth_ltp[j] = k; /* Qx */ move16(); - - + + x0++; x1++; y0++; y1++; } - } + } } } ELSE @@ -710,7 +710,7 @@ static void tcx_ltp_synth_filter32( } IF( fade != 0 ) { - IF ( zir != NULL ) + IF( zir != NULL ) { FOR( j = 0; j < length; j++ ) { @@ -724,7 +724,7 @@ static void tcx_ltp_synth_filter32( s2 = Madd_32_16_r( Madd_32_16_r( s2, y0[i], v0[k] ), y1[-i], v1[k] ); /* Qx */ k = k + pitch_res; } - + /* s2 *= ALPHA; normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; @@ -734,14 +734,14 @@ static void tcx_ltp_synth_filter32( L_tmp2 = Mpy_32_16_r( L_tmp2, alpha ); /* Qx */ L_tmp2 = L_sub_sat( L_tmp2, zir[j] ); /* Qx */ L_tmp2 = L_add_sat( synth[j], L_tmp2 ); /* Qx */ - + synth_ltp[j] = L_tmp2; /* Qx */ move16(); - + BASOP_SATURATE_WARNING_OFF_EVS; alpha = add_sat( alpha, step ); BASOP_SATURATE_WARNING_ON_EVS; - + x0++; x1++; y0++; @@ -762,7 +762,7 @@ static void tcx_ltp_synth_filter32( s2 = Madd_32_16_r( Madd_32_16_r( s2, y0[i], v0[k] ), y1[-i], v1[k] ); /* Qx */ k = k + pitch_res; } - + /* s2 *= ALPHA; normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; @@ -771,14 +771,14 @@ static void tcx_ltp_synth_filter32( L_tmp2 = Mpy_32_16_r( L_tmp, gain ); /* Qx */ L_tmp2 = Mpy_32_16_r( L_tmp2, alpha ); /* Qx */ L_tmp2 = L_add_sat( synth[j], L_tmp2 ); /* Qx */ - + synth_ltp[j] = L_tmp2; /* Qx */ move16(); - + BASOP_SATURATE_WARNING_OFF_EVS; alpha = add_sat( alpha, step ); BASOP_SATURATE_WARNING_ON_EVS; - + x0++; x1++; y0++; @@ -788,7 +788,7 @@ static void tcx_ltp_synth_filter32( } ELSE { - IF ( zir != NULL ) + IF( zir != NULL ) { FOR( j = 0; j < length; j++ ) { @@ -802,7 +802,7 @@ static void tcx_ltp_synth_filter32( s2 = Madd_32_16_r( Madd_32_16_r( s2, y0[i], v0[k] ), y1[-i], v1[k] ); /* Qx */ k = k + pitch_res; } - + /* s2 *= ALPHA; normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; @@ -811,10 +811,10 @@ static void tcx_ltp_synth_filter32( L_tmp2 = Mpy_32_16_r( L_tmp, gain ); /* Qx */ L_tmp2 = L_sub_sat( L_tmp2, zir[j] ); /* Qx */ L_tmp2 = L_add_sat( synth[j], L_tmp2 ); /* Qx */ - + synth_ltp[j] = L_tmp2; /* Qx */ move16(); - + x0++; x1++; y0++; @@ -835,7 +835,7 @@ static void tcx_ltp_synth_filter32( s2 = Madd_32_16_r( Madd_32_16_r( s2, y0[i], v0[k] ), y1[-i], v1[k] ); /* Qx */ k = k + pitch_res; } - + /* s2 *= ALPHA; normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; @@ -843,10 +843,10 @@ static void tcx_ltp_synth_filter32( L_tmp = L_sub_sat( s, Mpy_32_16_r( s2, ALPHA ) ); /* Qx */ L_tmp2 = Mpy_32_16_r( L_tmp, gain ); /* Qx */ L_tmp2 = L_add_sat( synth[j], L_tmp2 ); /* Qx */ - + synth_ltp[j] = L_tmp2; /* Qx */ move16(); - + x0++; x1++; y0++; -- GitLab From 51b2b4537bfe968495a852aba37c3b9a75c0e612 Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Wed, 1 Oct 2025 10:04:38 +0200 Subject: [PATCH 7/8] add compilation switch --- lib_com/options.h | 3 +++ lib_com/tcx_ltp_fx.c | 46 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/lib_com/options.h b/lib_com/options.h index 5ae3f1416..485a52efd 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -104,6 +104,9 @@ #define FIX_1962_FORMAT_CONV_SPECTRAL_DIFF /* FhG: Improved precision of targetEnergy in ivas_ls_setup_conversion_process_mdct_fx() */ #define FIX_2003_CON_TCX_OVERFLOW /* FhG: Use a dynamic scaling factor for the synth buffer at the output of con_tcx_ivas_fx() */ +#define OPT_TCXLTP_FILTER_LOOP /* FhG: optimize loop in tcx_ltp_synth_filter */ + + /* #################### Start BASOP porting switches ############################ */ #define NONBE_1244_FIX_SWB_BWE_MEMORY /* VA: issue 1244: fix to SWB BWE memory in case of switching from FB coding - pending a review by Huawei */ diff --git a/lib_com/tcx_ltp_fx.c b/lib_com/tcx_ltp_fx.c index 3f1f52aab..6f2edb4ce 100644 --- a/lib_com/tcx_ltp_fx.c +++ b/lib_com/tcx_ltp_fx.c @@ -491,6 +491,8 @@ static void tcx_ltp_synth_filter( if ( fade < 0 ) step = negate( step ); } + +#ifdef OPT_TCXLTP_FILTER_LOOP IF( zir != NULL ) { IF( fade != 0 ) @@ -638,6 +640,50 @@ static void tcx_ltp_synth_filter( } } } +#else + FOR( j = 0; j < length; j++ ) + { + s = L_deposit_l( 0 ); + s2 = L_deposit_l( 0 ); + k = 0; + move16(); + FOR( i = 0; i < L; i++ ) + { + s = L_mac_sat( L_mac_sat( s, w0[k], x0[i] ), w1[k], x1[-i] ); /* Qx */ + s2 = L_mac_sat( L_mac_sat( s2, v0[k], y0[i] ), v1[k], y1[-i] ); /* Qx */ + k = k + pitch_res; + } + + /* s2 *= ALPHA; + normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; + zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; + fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */ + i = sub_sat( round_fx_sat( s ), mult_r_sat( round_fx_sat( s2 ), ALPHA ) ); /* Qx */ + k = mult_r( gain, i ); /* Qx */ + if ( fade != 0 ) + k = mult_r( k, alpha ); /* Qx */ + k = add_sat( synth[j], k ); /* Qx */ + if ( zir != NULL ) + { + k = sub_sat( k, zir[j] ); /* Qx */ + } + + synth_ltp[j] = k; /* Qx */ + move16(); + + BASOP_SATURATE_WARNING_OFF_EVS; + if ( fade != 0 ) + { + alpha = add_sat( alpha, step ); + } + BASOP_SATURATE_WARNING_ON_EVS; + + x0++; + x1++; + y0++; + y1++; + } +#endif } ELSE { -- GitLab From e04810d166c467b237d53973564b0329724b8d79 Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Wed, 1 Oct 2025 10:13:32 +0200 Subject: [PATCH 8/8] add compilation switch, part 2 --- lib_com/tcx_ltp_fx.c | 46 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/lib_com/tcx_ltp_fx.c b/lib_com/tcx_ltp_fx.c index 6f2edb4ce..d9112ffa0 100644 --- a/lib_com/tcx_ltp_fx.c +++ b/lib_com/tcx_ltp_fx.c @@ -754,6 +754,8 @@ static void tcx_ltp_synth_filter32( if ( fade < 0 ) step = negate( step ); } + +#ifdef OPT_TCXLTP_FILTER_LOOP IF( fade != 0 ) { IF( zir != NULL ) @@ -900,6 +902,50 @@ static void tcx_ltp_synth_filter32( } } } +#else + FOR( j = 0; j < length; j++ ) + { + s = L_deposit_l( 0 ); + s2 = L_deposit_l( 0 ); + k = 0; + move16(); + FOR( i = 0; i < L; i++ ) + { + s = Madd_32_16_r( Madd_32_16_r( s, x0[i], w0[k] ), x1[-i], w1[k] ); /* Qx */ + s2 = Madd_32_16_r( Madd_32_16_r( s2, y0[i], v0[k] ), y1[-i], v1[k] ); /* Qx */ + k = k + pitch_res; + } + + /* s2 *= ALPHA; + normal: synth_ltp[j] = synth[j] - gain * s2 + gain * s; + zir: synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j]; + fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */ + L_tmp = L_sub_sat( s, Mpy_32_16_r( s2, ALPHA ) ); /* Qx */ + L_tmp2 = Mpy_32_16_r( L_tmp, gain ); /* Qx */ + IF( fade != 0 ) + L_tmp2 = Mpy_32_16_r( L_tmp2, alpha ); /* Qx */ + L_tmp2 = L_add_sat( synth[j], L_tmp2 ); /* Qx */ + if ( zir != NULL ) + { + L_tmp2 = L_sub_sat( L_tmp2, zir[j] ); /* Qx */ + } + + synth_ltp[j] = L_tmp2; /* Qx */ + move16(); + + BASOP_SATURATE_WARNING_OFF_EVS; + if ( fade != 0 ) + { + alpha = add_sat( alpha, step ); + } + BASOP_SATURATE_WARNING_ON_EVS; + + x0++; + x1++; + y0++; + y1++; + } +#endif } ELSE { -- GitLab