diff --git a/lib_com/hp50_fx.c b/lib_com/hp50_fx.c index d6a1182a4e9aa45305e27197e88260cf78d0d7e2..9d6f10ed2533a1b6c8028e3851235b18ae91e63f 100644 --- a/lib_com/hp50_fx.c +++ b/lib_com/hp50_fx.c @@ -40,7 +40,8 @@ #include "prot_fx.h" #include "wmc_auto.h" -#define HP20_COEFF_SCALE ( 2 ) +#define HP20_COEFF_SCALE ( 2 ) +#define HP20_FX_COEFF_SCALE ( 1 ) /* * hp20 * @@ -340,7 +341,7 @@ void hp20( Word16 signal[], /* i/o: signal to filter any * #ifdef HP20_FIX32_RECODING -void hp20_fx_32( +void hp20_fx_32_opt( Word32 signal_fx[], const Word16 lg, Word32 mem_fx[], @@ -348,54 +349,49 @@ void hp20_fx_32( { Word32 i; Word32 a1_fx, a2_fx, b1_fx, b2_fx; - Word32 diff_pos, diff_neg; -#ifndef ISSUE_1836_replace_overflow_libcom - Flag Overflow = 0; -#endif Word16 prescale, prescaleOld, prescale_current_frame, diff; + Word32 tmp_mem[4]; - prescale = getScaleFactor32( signal_fx, lg ); - prescale_current_frame = s_min( 3, sub( 1 + HP20_COEFF_SCALE, prescale ) ); - + prescale = L_norm_arr( signal_fx, lg ); + prescale_current_frame = sub( 1 + HP20_FX_COEFF_SCALE, prescale ); prescaleOld = extract_l( mem_fx[4] ); -#ifdef ISSUE_1836_replace_overflow_libcom - diff_pos = norm_l( L_shl_sat( L_max( mem_fx[2], mem_fx[3] ), prescaleOld ) ); - diff_neg = norm_l( L_shl_sat( L_min( mem_fx[2], mem_fx[3] ), prescaleOld ) ); -#else - diff_pos = norm_l( L_shl_o( L_max( mem_fx[2], mem_fx[3] ), prescaleOld, &Overflow ) ); - diff_neg = norm_l( L_shl_o( L_min( mem_fx[2], mem_fx[3] ), prescaleOld, &Overflow ) ); -#endif + tmp_mem[0] = L_shl_sat( mem_fx[0], prescaleOld ); + tmp_mem[1] = L_shl_sat( mem_fx[1], prescaleOld ); + tmp_mem[2] = L_shl_sat( mem_fx[2], prescaleOld ); + tmp_mem[3] = L_shl_sat( mem_fx[3], prescaleOld ); + move32(); + move32(); + move32(); + move32(); + + diff = L_norm_arr( tmp_mem, 4 ); + prescale = s_min( prescale, diff ); - diff = L_max( diff_pos, diff_neg ); + prescale = sub( 1 + HP20_FX_COEFF_SCALE, prescale ); - IF( NE_16( diff, 0 ) ) + if ( EQ_16( prescale_current_frame, 1 + HP20_FX_COEFF_SCALE - 31 ) ) // signal_fx buffer contains only zeros, so use the mem_fx scale_factor instead { - prescale = s_min( prescale, diff ); + prescale_current_frame = prescale; } - prescale = s_min( 3, sub( 1 + HP20_COEFF_SCALE, prescale ) ); - diff = sub( prescale, prescaleOld ); #ifdef ISSUE_1836_replace_overflow_libcom mem_fx[0] = L_shr_sat( mem_fx[0], diff ); - move32(); mem_fx[1] = L_shr_sat( mem_fx[1], diff ); - move32(); mem_fx[2] = L_shr_sat( mem_fx[2], diff ); - move32(); mem_fx[3] = L_shr_sat( mem_fx[3], diff ); #else mem_fx[0] = L_shr_o( mem_fx[0], diff, &Overflow ); - move32(); mem_fx[1] = L_shr_o( mem_fx[1], diff, &Overflow ); - move32(); mem_fx[2] = L_shr_o( mem_fx[2], diff, &Overflow ); - move32(); mem_fx[3] = L_shr_o( mem_fx[3], diff, &Overflow ); #endif + move32(); + move32(); + move32(); move32(); mem_fx[4] = L_deposit_l( prescale_current_frame ); move32(); @@ -406,10 +402,10 @@ void hp20_fx_32( [b,a] = butter(2, 20.0/4000.0, 'high'); b = [0.988954248067140 -1.977908496134280 0.988954248067140] a =[1.000000000000000 -1.977786483776764 0.978030508491796]*/ - a1_fx = 1061816033l /* 1.977786483776764 Q29*/; - a2_fx = -525076131l /*-0.978030508491796 Q29*/; - b1_fx = -1061881538l /*-1.977908496134280 Q29*/; - b2_fx = 530940769l /* 0.988954248067140 Q29*/; + a1_fx = 2123632067 /* 1.977786483776764 Q30*/; + a2_fx = -1050152262 /*-0.978030508491796 Q30*/; + b1_fx = -2123763076 /*-1.977908496134280 Q30*/; + b2_fx = 1061881538 /* 0.988954248067140 Q30*/; } ELSE IF( EQ_32( Fs, 16000 ) ) { @@ -417,10 +413,10 @@ void hp20_fx_32( [b,a] = butter(2, 20.0/8000.0, 'high'); b =[ 0.994461788958195 -1.988923577916390 0.994461788958195] a =[1.000000000000000 -1.988892905899653 0.988954249933127] */ - a1_fx = 1067778748l /* 1.988892905899653 Q29*/; - a2_fx = -530940770l /*-0.988954249933127 Q29*/; - b1_fx = -1067795215l /*-1.988923577916390 Q29*/; - b2_fx = 533897608l /* 0.994461788958195 Q29*/; + a1_fx = 2135557497 /* 1.988892905899653 Q30*/; + a2_fx = -1061881540 /*-0.988954249933127 Q30*/; + b1_fx = -2135590430 /*-1.988923577916390 Q30*/; + b2_fx = 1067795215 /* 0.994461788958195 Q30*/; } ELSE IF( EQ_32( Fs, 32000 ) ) { @@ -428,10 +424,10 @@ void hp20_fx_32( [b,a] = butter(2, 20.0/16000.0, 'high'); b =[0.997227049904470 -1.994454099808940 0.997227049904470] a =[1.000000000000000 -1.994446410541927 0.994461789075954]*/ - a1_fx = 1070760263l /* 1.994446410541927 Q29*/; - a2_fx = -533897608l /*-0.994461789075954 Q29*/; - b1_fx = -1070764392l /*-1.994454099808940 Q29*/; - b2_fx = 535382196l /* 0.997227049904470 Q29*/; + a1_fx = 2141520527 /* 1.994446410541927 Q30*/; + a2_fx = -1067795215 /*-0.994461789075954 Q30*/; + b1_fx = -2141528783 /*-1.994454099808940 Q30*/; + b2_fx = 1070764392 /* 0.997227049904470 Q30*/; } ELSE { @@ -439,10 +435,10 @@ void hp20_fx_32( [b,a] = butter(2, 20.0/24000.0, 'high'); b =[ 0.998150511190452 -1.996301022380904 0.998150511190452] a =[1.000000000000000 -1.996297601769122 0.996304442992686]*/ - a1_fx = 1071754114l /* 1.996297601769122 Q29*/; - a2_fx = -534886875l /*-0.996304442992686 Q29*/; - b1_fx = -1071755951l /*-1.996301022380904 Q29*/; - b2_fx = 535877975l /* 0.998150511190452 Q29*/; + a1_fx = 2143508228 /* 1.996297601769122 Q30*/; + a2_fx = -1069773750 /*-0.996304442992686 Q30*/; + b1_fx = -2143511901 /*-1.996301022380904 Q30*/; + b2_fx = 1071755951 /* 0.998150511190452 Q30*/; } move32(); move32(); @@ -451,24 +447,24 @@ void hp20_fx_32( Word64 W_sum, W_y0, W_y1, W_y2; Word32 x0, x1, x2; - W_sum = W_mult_32_32( b2_fx, mem_fx[2] ); /* b2*x2 */ - W_sum = W_mac_32_32( W_sum, b1_fx, mem_fx[3] ); /* b1*x1 */ + W_sum = W_mult0_32_32( b2_fx, mem_fx[2] ); /* b2*x2 */ + W_sum = W_add( W_sum, W_mult0_32_32( b1_fx, mem_fx[3] ) ); /* b1*x1 */ x2 = L_shr( signal_fx[0], prescale ); - W_sum = W_mac_32_32( W_sum, b2_fx, x2 ); /* b2*x0 */ - W_sum = W_mac_32_32( W_sum, mem_fx[0], a2_fx ); /* y2*a2 */ - W_sum = W_mac_32_32( W_sum, mem_fx[1], a1_fx ); /* y1*a1 */ - W_y2 = W_shl( W_sum, HP20_COEFF_SCALE ); - signal_fx[0] = W_extract_h( W_shl( W_y2, prescale ) ); + W_sum = W_add( W_sum, W_mult0_32_32( b2_fx, x2 ) ); /* b2*x0 */ + W_sum = W_add( W_sum, W_mult0_32_32( mem_fx[0], a2_fx ) ); /* y2*a2 */ + W_sum = W_add( W_sum, W_mult0_32_32( mem_fx[1], a1_fx ) ); /* y1*a1 */ + W_y2 = W_shl( W_sum, 1 + HP20_FX_COEFF_SCALE ); + signal_fx[0] = W_round64_L( W_shl( W_y2, prescale ) ); move32(); - W_sum = W_mult_32_32( b2_fx, mem_fx[3] ); /* b2*x2 */ - W_sum = W_mac_32_32( W_sum, b1_fx, x2 ); /* b1*x1 */ + W_sum = W_mult0_32_32( b2_fx, mem_fx[3] ); /* b2*x2 */ + W_sum = W_add( W_sum, W_mult0_32_32( b1_fx, x2 ) ); /* b1*x1 */ x1 = L_shr( signal_fx[1], prescale ); - W_sum = W_mac_32_32( W_sum, b2_fx, x1 ); /* b2*x0 */ - W_sum = W_mac_32_32( W_sum, mem_fx[1], a2_fx ); /* y2*a2 */ - W_sum = W_mac_32_32( W_sum, W_extract_h( W_y2 ), a1_fx ); /* y1*a1 */ - W_y1 = W_shl( W_sum, HP20_COEFF_SCALE ); - signal_fx[1] = W_extract_h( W_shl( W_y1, prescale ) ); + W_sum = W_add( W_sum, W_mult0_32_32( b2_fx, x1 ) ); /* b2*x0 */ + W_sum = W_add( W_sum, W_mult0_32_32( mem_fx[1], a2_fx ) ); /* y2*a2 */ + W_sum = W_add( W_sum, W_mult0_32_32( W_round64_L( W_y2 ), a1_fx ) ); /* y1*a1 */ + W_y1 = W_shl( W_sum, 1 + HP20_FX_COEFF_SCALE ); + signal_fx[1] = W_round64_L( W_shl( W_y1, prescale ) ); move32(); diff = sub( prescale_current_frame, prescale ); @@ -479,15 +475,15 @@ void hp20_fx_32( FOR( i = 2; i < lg; i++ ) { - W_sum = W_mult_32_32( b2_fx, x2 ); /* b2*x2 */ - W_sum = W_mac_32_32( W_sum, b1_fx, x1 ); /* b1*x1 */ + W_sum = W_mult0_32_32( b2_fx, x2 ); /* b2*x2 */ + W_sum = W_add( W_sum, W_mult0_32_32( b1_fx, x1 ) ); /* b1*x1 */ x0 = L_shr( signal_fx[i], prescale_current_frame ); - W_sum = W_mac_32_32( W_sum, b2_fx, x0 ); /* b2*x0 */ - W_sum = W_mac_32_32( W_sum, W_extract_h( W_y2 ), a2_fx ); /* y2*a2 */ - W_sum = W_mac_32_32( W_sum, W_extract_h( W_y1 ), a1_fx ); /* y1*a1 */ - W_y0 = W_shl( W_sum, HP20_COEFF_SCALE ); + W_sum = W_add( W_sum, W_mult0_32_32( b2_fx, x0 ) ); /* b2*x0 */ + W_sum = W_add( W_sum, W_mult0_32_32( W_round64_L( W_y2 ), a2_fx ) ); /* y2*a2 */ + W_sum = W_add( W_sum, W_mult0_32_32( W_round64_L( W_y1 ), a1_fx ) ); /* y1*a1 */ + W_y0 = W_shl( W_sum, 1 + HP20_FX_COEFF_SCALE ); - signal_fx[i] = W_extract_h( W_shl( W_y0, prescale_current_frame ) ); + signal_fx[i] = W_round64_L( W_shl( W_y0, prescale_current_frame ) ); move32(); x2 = x1; @@ -501,8 +497,8 @@ void hp20_fx_32( move64(); } - mem_fx[0] = W_extract_h( W_y2 ); - mem_fx[1] = W_extract_h( W_y1 ); + mem_fx[0] = W_round64_L( W_y2 ); + mem_fx[1] = W_round64_L( W_y1 ); mem_fx[2] = x2; mem_fx[3] = x1; @@ -513,7 +509,7 @@ void hp20_fx_32( return; } -#else +#endif void hp20_fx_32( Word32 signal_fx[], const Word16 lg, @@ -698,4 +694,3 @@ void hp20_fx_32( return; } -#endif diff --git a/lib_com/options.h b/lib_com/options.h index 7517ca330dfda4f60667976f4f48de804f8e2f90..1adebda2586e24fe42964865cae9d4a4b618dbd3 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -106,6 +106,7 @@ #define FIX_1962_FORMAT_CONV_SPECTRAL_DIFF /* FhG: Improved precision of targetEnergy in ivas_ls_setup_conversion_process_mdct_fx() */ #define FIX_2003_CON_TCX_OVERFLOW /* FhG: Use a dynamic scaling factor for the synth buffer at the output of con_tcx_ivas_fx() */ +#define HP20_FIX32_RECODING /* FhG: optimized hp20_fx_32 calculation and applied it as hp20_fx_32_opt in decoder */ #define OPT_TCXLTP_FILTER_LOOP /* FhG: optimize loop in tcx_ltp_synth_filter */ #define FIX_2049_DIFF_IN_DECORR_TAIL /* FhG: correct scale values in ivas_dirac_dec_binaural_process_output_fx() */ #define FIX_2602_NONBE_SAT_IN_SWB_TBE_SCALE /* Dolby/FhG: fix for issue 2026: Saturation in SWB TBE re-scaling function preventing StereoDownmix complexity job to complete */ diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index b9822fc9b886c7f7e8773cf4a37d2197fdf7bb70..d5583074029523cae0e89ed9c6f22ee087d32e2e 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -10490,6 +10490,14 @@ void hp20_fx_32( Word32 mem_fx[], const Word32 Fs ); +#ifdef HP20_FIX32_RECODING +void hp20_fx_32_opt( + Word32 signal_fx[], + const Word16 lg, + Word32 mem_fx[], + const Word32 Fs ); +#endif + void getTCXMode_ivas_fx( Decoder_State *st, /* i/o: decoder memory state */ Decoder_State *st0, /* i : bitstream */ diff --git a/lib_dec/ivas_jbm_dec_fx.c b/lib_dec/ivas_jbm_dec_fx.c index a1988168cc491fed46256a4d16a7b7089a2bdbd6..fa1f9b39dc144cdeb90d480adcf6a5ecf900bfb9 100644 --- a/lib_dec/ivas_jbm_dec_fx.c +++ b/lib_dec/ivas_jbm_dec_fx.c @@ -159,7 +159,11 @@ ivas_error ivas_jbm_dec_tc_fx( /* HP filtering */ FOR( n = 0; n < s_min( nchan_out, st_ivas->nchan_transport ); n++ ) { +#ifdef HP20_FIX32_RECODING + hp20_fx_32_opt( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#else hp20_fx_32( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#endif } } ELSE IF( EQ_32( st_ivas->ivas_format, ISM_FORMAT ) ) @@ -226,7 +230,11 @@ ivas_error ivas_jbm_dec_tc_fx( } /* HP filtering */ +#ifdef HP20_FIX32_RECODING + hp20_fx_32_opt( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#else hp20_fx_32( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#endif } test(); @@ -577,7 +585,11 @@ ivas_error ivas_jbm_dec_tc_fx( /* HP filtering */ FOR( n = 0; n < nchan_remapped; n++ ) { +#ifdef HP20_FIX32_RECODING + hp20_fx_32_opt( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#else hp20_fx_32( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#endif } IF( EQ_32( st_ivas->ivas_format, SBA_FORMAT ) ) @@ -770,7 +782,11 @@ ivas_error ivas_jbm_dec_tc_fx( FOR( n = 0; n < getNumChanSynthesis( st_ivas ); n++ ) { +#ifdef HP20_FIX32_RECODING + hp20_fx_32_opt( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#else hp20_fx_32( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#endif } Word16 output_q = 11; @@ -1009,7 +1025,11 @@ ivas_error ivas_jbm_dec_tc_fx( /* HP filtering */ FOR( n = 0; n < getNumChanSynthesis( st_ivas ); n++ ) { +#ifdef HP20_FIX32_RECODING + hp20_fx_32_opt( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#else hp20_fx_32( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#endif } nchan_remapped = ivas_sba_remapTCs_fx( &p_output_fx[sba_ch_idx], st_ivas, output_frame ); @@ -1122,7 +1142,11 @@ ivas_error ivas_jbm_dec_tc_fx( { IF( NE_16( n, LFE_CHANNEL ) ) { +#ifdef HP20_FIX32_RECODING + hp20_fx_32_opt( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#else hp20_fx_32( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#endif } } @@ -1180,7 +1204,11 @@ ivas_error ivas_jbm_dec_tc_fx( { IF( NE_16( n, LFE_CHANNEL ) ) { +#ifdef HP20_FIX32_RECODING + hp20_fx_32_opt( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#else hp20_fx_32( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#endif } } @@ -1246,7 +1274,11 @@ ivas_error ivas_jbm_dec_tc_fx( /* HP filtering */ FOR( n = 0; n < st_ivas->nchan_transport; n++ ) { +#ifdef HP20_FIX32_RECODING + hp20_fx_32_opt( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#else hp20_fx_32( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#endif } /* Rendering */ @@ -1460,7 +1492,11 @@ ivas_error ivas_jbm_dec_tc_fx( /* HP filtering */ FOR( n = 0; n < getNumChanSynthesis( st_ivas ); n++ ) { +#ifdef HP20_FIX32_RECODING + hp20_fx_32_opt( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#else hp20_fx_32( p_output_fx[n], output_frame, st_ivas->mem_hp20_out_fx[n], output_Fs ); +#endif } IF( EQ_32( st_ivas->renderer_type, RENDERER_MCMASA_MONO_STEREO ) )