Loading .gitlab-ci.yml +4 −4 Original line number Diff line number Diff line Loading @@ -168,7 +168,7 @@ stages: - current_commit_sha=$(git rev-parse HEAD) ### build reference binaries - git checkout $FLOAT_REF_BRANCH - git pull - git pull origin $FLOAT_REF_BRANCH - *activate-debug-mode-info-if-set - make clean - make -j Loading @@ -184,7 +184,7 @@ stages: - current_commit_sha=$(git rev-parse HEAD) ### build merge target binaries - git checkout $CI_MERGE_REQUEST_TARGET_BRANCH_NAME - git pull - git pull origin $CI_MERGE_REQUEST_TARGET_BRANCH_NAME - *activate-debug-mode-info-if-set - make clean - make -j Loading Loading @@ -232,7 +232,7 @@ stages: - git fetch - git restore . # Just as a precaution - git checkout $BASOP_CI_BRANCH_PC_REPO - git pull - git pull origin $BASOP_CI_BRANCH_PC_REPO - cd - - cp -r $SCRIPTS_DIR/ci . - cp -r $SCRIPTS_DIR/scripts . Loading Loading @@ -658,7 +658,7 @@ stages: ### run main now - git checkout $CI_MERGE_REQUEST_TARGET_BRANCH_NAME - git pull - git pull origin $CI_MERGE_REQUEST_TARGET_BRANCH_NAME - make clean - make -j # need to restore cache again Loading lib_com/cldfb.c +83 −30 Original line number Diff line number Diff line Loading @@ -400,8 +400,13 @@ void cldfbAnalysis_ts_fx( rr12_fx = L_sub( r1_fx, r2_fx ); // q -1 ri12_fx = L_negate( L_add( i1_fx, i2_fx ) ); // q - 1 /*cplxMult(&rBuffer[2*k],&rBuffer[2*k+1],rr12,ri12,rot_vctr_re[k],rot_vctr_im[k]);*/ #ifdef OPT_AVOID_STATE_BUF_RESCALE rBuffer_fx[2 * k] = Msub_32_32( Mpy_32_32( rr12_fx, rot_vctr_re_fx[k] ), ri12_fx, rot_vctr_im_fx[k] ); // q - 3 rBuffer_fx[2 * k + 1] = Madd_32_32( Mpy_32_32( rr12_fx, rot_vctr_im_fx[k] ), ri12_fx, rot_vctr_re_fx[k] ); // q - 3 #else /* OPT_AVOID_STATE_BUF_RESCALE */ rBuffer_fx[2 * k] = L_sub( Mpy_32_32( rr12_fx, rot_vctr_re_fx[k] ), Mpy_32_32( ri12_fx, rot_vctr_im_fx[k] ) ); // q - 3 rBuffer_fx[2 * k + 1] = L_add( Mpy_32_32( rr12_fx, rot_vctr_im_fx[k] ), Mpy_32_32( ri12_fx, rot_vctr_re_fx[k] ) ); // q - 3 #endif /* OPT_AVOID_STATE_BUF_RESCALE */ move32(); move32(); Loading @@ -409,8 +414,13 @@ void cldfbAnalysis_ts_fx( ir12_fx = L_add( r1_fx, r2_fx ); // q - 1 ii12_fx = L_sub( i1_fx, i2_fx ); // q - 1 /*cplxMult(&iBuffer[2*k],&iBuffer[2*k+1],ir12,ii12,rot_vctr_re[k],rot_vctr_im[k]);*/ #ifdef OPT_AVOID_STATE_BUF_RESCALE iBuffer_fx[2 * k] = Msub_32_32( Mpy_32_32( ir12_fx, rot_vctr_re_fx[k] ), ii12_fx, rot_vctr_im_fx[k] ); // q - 3 iBuffer_fx[2 * k + 1] = Madd_32_32( Mpy_32_32( ir12_fx, rot_vctr_im_fx[k] ), ii12_fx, rot_vctr_re_fx[k] ); // q - 3 #else /* OPT_AVOID_STATE_BUF_RESCALE */ iBuffer_fx[2 * k] = L_sub( Mpy_32_32( ir12_fx, rot_vctr_re_fx[k] ), Mpy_32_32( ii12_fx, rot_vctr_im_fx[k] ) ); // q - 3 iBuffer_fx[2 * k + 1] = L_add( Mpy_32_32( ir12_fx, rot_vctr_im_fx[k] ), Mpy_32_32( ii12_fx, rot_vctr_re_fx[k] ) ); // q - 3 #endif /* OPT_AVOID_STATE_BUF_RESCALE */ move32(); move32(); } Loading Loading @@ -451,8 +461,13 @@ void cldfbAnalysis_ts_fx( rr12_fx = L_add( r1_fx, r2_fx ); // q - 1 ri12_fx = L_sub( i1_fx, i2_fx ); // q - 1 /*cplxMult(&rBuffer[2*k],&rBuffer[2*k+1],rr12,ri12,rot_vctr_re[k],rot_vctr_im[k]);*/ #ifdef OPT_AVOID_STATE_BUF_RESCALE rBuffer_fx[2 * k] = Msub_32_32( Mpy_32_32( rr12_fx, rot_vctr_re_fx[k] ), ri12_fx, rot_vctr_im_fx[k] ); // q - 3 rBuffer_fx[2 * k + 1] = Madd_32_32( Mpy_32_32( rr12_fx, rot_vctr_im_fx[k] ), ri12_fx, rot_vctr_re_fx[k] ); // q - 3 #else /* OPT_AVOID_STATE_BUF_RESCALE */ rBuffer_fx[2 * k] = L_sub( Mpy_32_32( rr12_fx, rot_vctr_re_fx[k] ), Mpy_32_32( ri12_fx, rot_vctr_im_fx[k] ) ); // q - 3 rBuffer_fx[2 * k + 1] = L_add( Mpy_32_32( rr12_fx, rot_vctr_im_fx[k] ), Mpy_32_32( ri12_fx, rot_vctr_re_fx[k] ) ); // q - 3 #endif /* OPT_AVOID_STATE_BUF_RESCALE */ move32(); move32(); Loading @@ -460,8 +475,13 @@ void cldfbAnalysis_ts_fx( ir12_fx = L_sub( r1_fx, r2_fx ); // q - 1 ii12_fx = L_add( i1_fx, i2_fx ); // q - 1 /*cplxMult(&iBuffer[2*k],&iBuffer[2*k+1],ir12,ii12,rot_vctr_re[k],rot_vctr_im[k]);*/ #ifdef OPT_AVOID_STATE_BUF_RESCALE iBuffer_fx[2 * k] = Msub_32_32( Mpy_32_32( ir12_fx, rot_vctr_re_fx[k] ), ii12_fx, rot_vctr_im_fx[k] ); // q - 3 iBuffer_fx[2 * k + 1] = Madd_32_32( Mpy_32_32( ir12_fx, rot_vctr_im_fx[k] ), ii12_fx, rot_vctr_re_fx[k] ); // q - 3 #else /* OPT_AVOID_STATE_BUF_RESCALE */ iBuffer_fx[2 * k] = L_sub( Mpy_32_32( ir12_fx, rot_vctr_re_fx[k] ), Mpy_32_32( ii12_fx, rot_vctr_im_fx[k] ) ); // q - 3 iBuffer_fx[2 * k + 1] = L_add( Mpy_32_32( ir12_fx, rot_vctr_im_fx[k] ), Mpy_32_32( ii12_fx, rot_vctr_re_fx[k] ) ); // q - 3 #endif /* OPT_AVOID_STATE_BUF_RESCALE */ move32(); move32(); } Loading Loading @@ -490,8 +510,13 @@ void cldfbAnalysis_ts_fx( FOR( k = 0; k < M2; k++ ) { /*cplxMult(&realBuffer[M1-1-(2*k)],&realBuffer[2*k],rBuffer[2*k],rBuffer[2*k+1],rot_vctr_re[k],rot_vctr_im[k]);*/ #ifdef OPT_AVOID_STATE_BUF_RESCALE realBuffer_fx[( ( M1 - 1 ) - ( k * 2 ) )] = Msub_32_32( Mpy_32_32( rBuffer_fx[2 * k], rot_vctr_re_fx[k] ), rBuffer_fx[2 * k + 1], rot_vctr_im_fx[k] ); // q - 5 realBuffer_fx[2 * k] = Madd_32_32( Mpy_32_32( rBuffer_fx[2 * k], rot_vctr_im_fx[k] ), rBuffer_fx[2 * k + 1], rot_vctr_re_fx[k] ); // q - 5 #else /* OPT_AVOID_STATE_BUF_RESCALE */ realBuffer_fx[( ( M1 - 1 ) - ( k * 2 ) )] = L_sub( Mpy_32_32( rBuffer_fx[2 * k], rot_vctr_re_fx[k] ), Mpy_32_32( rBuffer_fx[2 * k + 1], rot_vctr_im_fx[k] ) ); // q - 5 realBuffer_fx[2 * k] = L_add( Mpy_32_32( rBuffer_fx[2 * k], rot_vctr_im_fx[k] ), Mpy_32_32( rBuffer_fx[2 * k + 1], rot_vctr_re_fx[k] ) ); // q - 5 #endif /* OPT_AVOID_STATE_BUF_RESCALE */ move32(); move32(); } Loading Loading @@ -520,8 +545,13 @@ void cldfbAnalysis_ts_fx( { /* do it inplace */ /*cplxMult(&imagBuffer[2*k],&imagBuffer[M1-1-(2*k)],iBuffer[2*k],iBuffer[2*k+1],rot_vctr_re[k],rot_vctr_im[k]);*/ #ifdef OPT_AVOID_STATE_BUF_RESCALE imagBuffer_fx[2 * k] = Msub_32_32( Mpy_32_32( iBuffer_fx[2 * k], rot_vctr_re_fx[k] ), iBuffer_fx[2 * k + 1], rot_vctr_im_fx[k] ); // q - 5 imagBuffer_fx[( M1 - 1 ) - ( k * 2 )] = Madd_32_32( Mpy_32_32( iBuffer_fx[2 * k], rot_vctr_im_fx[k] ), iBuffer_fx[2 * k + 1], rot_vctr_re_fx[k] ); // q - 5 #else /* OPT_AVOID_STATE_BUF_RESCALE */ imagBuffer_fx[2 * k] = L_sub( Mpy_32_32( iBuffer_fx[2 * k], rot_vctr_re_fx[k] ), Mpy_32_32( iBuffer_fx[2 * k + 1], rot_vctr_im_fx[k] ) ); // q - 5 imagBuffer_fx[( M1 - 1 ) - ( k * 2 )] = L_add( Mpy_32_32( iBuffer_fx[2 * k], rot_vctr_im_fx[k] ), Mpy_32_32( iBuffer_fx[2 * k + 1], rot_vctr_re_fx[k] ) ); // q - 5 #endif /* OPT_AVOID_STATE_BUF_RESCALE */ move32(); move32(); } Loading @@ -542,8 +572,13 @@ void cldfbAnalysis_ts_fx( /*cplxMult(&realBuffer[k], &imagBuffer[k], realBuffer[k], imagBuffer[k], rot_vctr_delay_re[k], rot_vctr_delay_im[k]);*/ /*realBuffer[k] = rBuffer[k]; imagBuffer[k] = iBuffer[k];*/ #ifdef OPT_AVOID_STATE_BUF_RESCALE cplx_aux_fx = Msub_32_32( Mpy_32_32( realBuffer_fx[k], rot_vctr_delay_re_fx[k] ), imagBuffer_fx[k], rot_vctr_delay_im_fx[k] ); // q - 5 imagBuffer_fx[k] = Madd_32_32( Mpy_32_32( realBuffer_fx[k], rot_vctr_delay_im_fx[k] ), imagBuffer_fx[k], rot_vctr_delay_re_fx[k] ); // q - 5 #else /* OPT_AVOID_STATE_BUF_RESCALE */ cplx_aux_fx = L_sub( Mpy_32_32( realBuffer_fx[k], rot_vctr_delay_re_fx[k] ), Mpy_32_32( imagBuffer_fx[k], rot_vctr_delay_im_fx[k] ) ); // q - 5 imagBuffer_fx[k] = L_add( Mpy_32_32( realBuffer_fx[k], rot_vctr_delay_im_fx[k] ), Mpy_32_32( imagBuffer_fx[k], rot_vctr_delay_re_fx[k] ) ); // q - 5 #endif /* OPT_AVOID_STATE_BUF_RESCALE */ realBuffer_fx[k] = cplx_aux_fx; move32(); move32(); Loading Loading @@ -1091,6 +1126,7 @@ void cldfbAnalysis_ts_fx_fixed_q( return; } /*-------------------------------------------------------------------* * cldfbSynthesis_ivas() * Loading @@ -1102,6 +1138,9 @@ void cldfbSynthesis_ivas_fx( Word32 *timeOut_fx, /* o : output time domain samples Qx - 1*/ const Word16 samplesToProcess, /* i : number of processed samples */ const Word16 shift, /* i : scale for state buffer */ #ifdef OPT_AVOID_STATE_BUF_RESCALE const Word16 out_shift, /* i : scale for output buffer */ #endif /* OPT_AVOID_STATE_BUF_RESCALE */ HANDLE_CLDFB_FILTER_BANK h_cldfb /* i : filter bank state */ ) { Loading Loading @@ -1295,11 +1334,11 @@ void cldfbSynthesis_ivas_fx( FOR( i = 0; i < L2; i++ ) { Word32 prod = L_shl_sat( Mpy_32_16_1( new_samples_fx[L2 - 1 - i], p_filter_sf ), shift ); accu0 = Madd_32_16( synthesisBuffer_fx[i], prod, p_filter[i] ); // Qx - 1 accu1 = Madd_32_16( synthesisBuffer_fx[1 * L2 + i], prod, p_filter[( 1 * L2 + i )] ); // Qx - 1 accu2 = Madd_32_16( synthesisBuffer_fx[2 * L2 + i], prod, p_filter[( 2 * L2 + i )] ); // Qx - 1 accu3 = Madd_32_16( synthesisBuffer_fx[3 * L2 + i], prod, p_filter[( 3 * L2 + i )] ); // Qx - 1 accu4 = Madd_32_16( synthesisBuffer_fx[4 * L2 + i], prod, p_filter[( 4 * L2 + i )] ); // Qx - 1 accu0 = Madd_32_16( synthesisBuffer_fx[i], prod, p_filter[i] ); // Qx -1 + shift accu1 = Madd_32_16( synthesisBuffer_fx[1 * L2 + i], prod, p_filter[( 1 * L2 + i )] ); // Qx -1 + shift accu2 = Madd_32_16( synthesisBuffer_fx[2 * L2 + i], prod, p_filter[( 2 * L2 + i )] ); // Qx -1 + shift accu3 = Madd_32_16( synthesisBuffer_fx[3 * L2 + i], prod, p_filter[( 3 * L2 + i )] ); // Qx -1 + shift accu4 = Madd_32_16( synthesisBuffer_fx[4 * L2 + i], prod, p_filter[( 4 * L2 + i )] ); // Qx -1 + shift synthesisBuffer_fx[i] = accu0; move32(); Loading @@ -1314,11 +1353,26 @@ void cldfbSynthesis_ivas_fx( } } #ifdef OPT_AVOID_STATE_BUF_RESCALE IF( 0 == out_shift ) { #endif /* OPT_AVOID_STATE_BUF_RESCALE */ FOR( i = 0; i < M1; i++ ) { ptr_time_out_fx[( M1 - 1 ) - i] = synthesisBuffer_fx[4 * L2 + M1 + i]; move32(); } #ifdef OPT_AVOID_STATE_BUF_RESCALE } ELSE { FOR( i = 0; i < M1; i++ ) { ptr_time_out_fx[( M1 - 1 ) - i] = L_shl_sat( synthesisBuffer_fx[4 * L2 + M1 + i], out_shift ); move32(); } } #endif /* OPT_AVOID_STATE_BUF_RESCALE */ ptr_time_out_fx += M1; Loading @@ -1333,7 +1387,6 @@ void cldfbSynthesis_ivas_fx( return; } void configureCldfb_ivas_enc_fx( HANDLE_CLDFB_FILTER_BANK h_cldfb, /* i/o: filter bank handle */ const Word32 sampling_rate /* i : sampling rate */ Loading lib_com/options.h +1 −0 Original line number Diff line number Diff line Loading @@ -69,6 +69,7 @@ /* Note: each compile switch (FIX_1101_...) is independent from the other ones */ //#define OPT_STEREO_32KBPS_V1 /* Optimization made in stereo decoding path for 32kbps decoding */ #define OPT_AVOID_STATE_BUF_RESCALE /* Optimization made to avoid rescale of synth state buffer */ #define FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx /*FhG: WMOPS tuning, nonbe*/ #define FIX_1310_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot /*FhG: WMOPS tuning, nonbe*/ #define FIX_1379_MASA_ANGLE_ROUND Loading lib_com/prot_fx.h +8 −5 Original line number Diff line number Diff line Loading @@ -9745,6 +9745,9 @@ void cldfbSynthesis_ivas_fx( Word32 *timeOut_fx, /* o : output time domain samples Qx - 1*/ const Word16 samplesToProcess, /* i : number of processed samples */ const Word16 shift, /* i : scale for state buffer */ #ifdef OPT_AVOID_STATE_BUF_RESCALE const Word16 out_shift, /* i : scale for output buffer */ #endif HANDLE_CLDFB_FILTER_BANK h_cldfb /* i : filter bank state */ ); Loading lib_dec/FEC_HQ_phase_ecu_fx.c +6 −6 Original line number Diff line number Diff line Loading @@ -4565,13 +4565,13 @@ static void ivas_fec_noise_filling_fx( pt6 = &p_mdct_ola[0]; FOR( k = 0; k < tmp_fx; k++ ) { L_tmp = L_mult( *sinq_tab, *sinq_tab ); /*Q30 */ L_tmp = L_mult( *sinq_tab, *sinq_tab ); /*Q31 */ sinq_tab++; q2 = round_fx( L_sub( 2147483647, L_tmp ) ); /*Q15 */ q1 = round_fx( L_tmp ); /*Q15 */ L_tmp = L_mult( ( *pt1 ), q1 ); /*Qsynth+16 */ L_tmp = L_add( L_tmp, L_shr( Mpy_32_16_1( L_deposit_h( *pt6++ ), q2 ), Q_old_out ) ); /*Qsynth+16 */ ( *pt1++ ) = round_fx( L_tmp ); /*Qsynth */ L_tmp = L_add_sat( L_tmp, L_shr_sat( Mpy_32_16_1( L_deposit_h( *pt6++ ), q2 ), Q_old_out ) ); /*Qsynth+16 */ ( *pt1++ ) = round_fx_sat( L_tmp ); /*Qsynth */ move16(); } Loading Loading
.gitlab-ci.yml +4 −4 Original line number Diff line number Diff line Loading @@ -168,7 +168,7 @@ stages: - current_commit_sha=$(git rev-parse HEAD) ### build reference binaries - git checkout $FLOAT_REF_BRANCH - git pull - git pull origin $FLOAT_REF_BRANCH - *activate-debug-mode-info-if-set - make clean - make -j Loading @@ -184,7 +184,7 @@ stages: - current_commit_sha=$(git rev-parse HEAD) ### build merge target binaries - git checkout $CI_MERGE_REQUEST_TARGET_BRANCH_NAME - git pull - git pull origin $CI_MERGE_REQUEST_TARGET_BRANCH_NAME - *activate-debug-mode-info-if-set - make clean - make -j Loading Loading @@ -232,7 +232,7 @@ stages: - git fetch - git restore . # Just as a precaution - git checkout $BASOP_CI_BRANCH_PC_REPO - git pull - git pull origin $BASOP_CI_BRANCH_PC_REPO - cd - - cp -r $SCRIPTS_DIR/ci . - cp -r $SCRIPTS_DIR/scripts . Loading Loading @@ -658,7 +658,7 @@ stages: ### run main now - git checkout $CI_MERGE_REQUEST_TARGET_BRANCH_NAME - git pull - git pull origin $CI_MERGE_REQUEST_TARGET_BRANCH_NAME - make clean - make -j # need to restore cache again Loading
lib_com/cldfb.c +83 −30 Original line number Diff line number Diff line Loading @@ -400,8 +400,13 @@ void cldfbAnalysis_ts_fx( rr12_fx = L_sub( r1_fx, r2_fx ); // q -1 ri12_fx = L_negate( L_add( i1_fx, i2_fx ) ); // q - 1 /*cplxMult(&rBuffer[2*k],&rBuffer[2*k+1],rr12,ri12,rot_vctr_re[k],rot_vctr_im[k]);*/ #ifdef OPT_AVOID_STATE_BUF_RESCALE rBuffer_fx[2 * k] = Msub_32_32( Mpy_32_32( rr12_fx, rot_vctr_re_fx[k] ), ri12_fx, rot_vctr_im_fx[k] ); // q - 3 rBuffer_fx[2 * k + 1] = Madd_32_32( Mpy_32_32( rr12_fx, rot_vctr_im_fx[k] ), ri12_fx, rot_vctr_re_fx[k] ); // q - 3 #else /* OPT_AVOID_STATE_BUF_RESCALE */ rBuffer_fx[2 * k] = L_sub( Mpy_32_32( rr12_fx, rot_vctr_re_fx[k] ), Mpy_32_32( ri12_fx, rot_vctr_im_fx[k] ) ); // q - 3 rBuffer_fx[2 * k + 1] = L_add( Mpy_32_32( rr12_fx, rot_vctr_im_fx[k] ), Mpy_32_32( ri12_fx, rot_vctr_re_fx[k] ) ); // q - 3 #endif /* OPT_AVOID_STATE_BUF_RESCALE */ move32(); move32(); Loading @@ -409,8 +414,13 @@ void cldfbAnalysis_ts_fx( ir12_fx = L_add( r1_fx, r2_fx ); // q - 1 ii12_fx = L_sub( i1_fx, i2_fx ); // q - 1 /*cplxMult(&iBuffer[2*k],&iBuffer[2*k+1],ir12,ii12,rot_vctr_re[k],rot_vctr_im[k]);*/ #ifdef OPT_AVOID_STATE_BUF_RESCALE iBuffer_fx[2 * k] = Msub_32_32( Mpy_32_32( ir12_fx, rot_vctr_re_fx[k] ), ii12_fx, rot_vctr_im_fx[k] ); // q - 3 iBuffer_fx[2 * k + 1] = Madd_32_32( Mpy_32_32( ir12_fx, rot_vctr_im_fx[k] ), ii12_fx, rot_vctr_re_fx[k] ); // q - 3 #else /* OPT_AVOID_STATE_BUF_RESCALE */ iBuffer_fx[2 * k] = L_sub( Mpy_32_32( ir12_fx, rot_vctr_re_fx[k] ), Mpy_32_32( ii12_fx, rot_vctr_im_fx[k] ) ); // q - 3 iBuffer_fx[2 * k + 1] = L_add( Mpy_32_32( ir12_fx, rot_vctr_im_fx[k] ), Mpy_32_32( ii12_fx, rot_vctr_re_fx[k] ) ); // q - 3 #endif /* OPT_AVOID_STATE_BUF_RESCALE */ move32(); move32(); } Loading Loading @@ -451,8 +461,13 @@ void cldfbAnalysis_ts_fx( rr12_fx = L_add( r1_fx, r2_fx ); // q - 1 ri12_fx = L_sub( i1_fx, i2_fx ); // q - 1 /*cplxMult(&rBuffer[2*k],&rBuffer[2*k+1],rr12,ri12,rot_vctr_re[k],rot_vctr_im[k]);*/ #ifdef OPT_AVOID_STATE_BUF_RESCALE rBuffer_fx[2 * k] = Msub_32_32( Mpy_32_32( rr12_fx, rot_vctr_re_fx[k] ), ri12_fx, rot_vctr_im_fx[k] ); // q - 3 rBuffer_fx[2 * k + 1] = Madd_32_32( Mpy_32_32( rr12_fx, rot_vctr_im_fx[k] ), ri12_fx, rot_vctr_re_fx[k] ); // q - 3 #else /* OPT_AVOID_STATE_BUF_RESCALE */ rBuffer_fx[2 * k] = L_sub( Mpy_32_32( rr12_fx, rot_vctr_re_fx[k] ), Mpy_32_32( ri12_fx, rot_vctr_im_fx[k] ) ); // q - 3 rBuffer_fx[2 * k + 1] = L_add( Mpy_32_32( rr12_fx, rot_vctr_im_fx[k] ), Mpy_32_32( ri12_fx, rot_vctr_re_fx[k] ) ); // q - 3 #endif /* OPT_AVOID_STATE_BUF_RESCALE */ move32(); move32(); Loading @@ -460,8 +475,13 @@ void cldfbAnalysis_ts_fx( ir12_fx = L_sub( r1_fx, r2_fx ); // q - 1 ii12_fx = L_add( i1_fx, i2_fx ); // q - 1 /*cplxMult(&iBuffer[2*k],&iBuffer[2*k+1],ir12,ii12,rot_vctr_re[k],rot_vctr_im[k]);*/ #ifdef OPT_AVOID_STATE_BUF_RESCALE iBuffer_fx[2 * k] = Msub_32_32( Mpy_32_32( ir12_fx, rot_vctr_re_fx[k] ), ii12_fx, rot_vctr_im_fx[k] ); // q - 3 iBuffer_fx[2 * k + 1] = Madd_32_32( Mpy_32_32( ir12_fx, rot_vctr_im_fx[k] ), ii12_fx, rot_vctr_re_fx[k] ); // q - 3 #else /* OPT_AVOID_STATE_BUF_RESCALE */ iBuffer_fx[2 * k] = L_sub( Mpy_32_32( ir12_fx, rot_vctr_re_fx[k] ), Mpy_32_32( ii12_fx, rot_vctr_im_fx[k] ) ); // q - 3 iBuffer_fx[2 * k + 1] = L_add( Mpy_32_32( ir12_fx, rot_vctr_im_fx[k] ), Mpy_32_32( ii12_fx, rot_vctr_re_fx[k] ) ); // q - 3 #endif /* OPT_AVOID_STATE_BUF_RESCALE */ move32(); move32(); } Loading Loading @@ -490,8 +510,13 @@ void cldfbAnalysis_ts_fx( FOR( k = 0; k < M2; k++ ) { /*cplxMult(&realBuffer[M1-1-(2*k)],&realBuffer[2*k],rBuffer[2*k],rBuffer[2*k+1],rot_vctr_re[k],rot_vctr_im[k]);*/ #ifdef OPT_AVOID_STATE_BUF_RESCALE realBuffer_fx[( ( M1 - 1 ) - ( k * 2 ) )] = Msub_32_32( Mpy_32_32( rBuffer_fx[2 * k], rot_vctr_re_fx[k] ), rBuffer_fx[2 * k + 1], rot_vctr_im_fx[k] ); // q - 5 realBuffer_fx[2 * k] = Madd_32_32( Mpy_32_32( rBuffer_fx[2 * k], rot_vctr_im_fx[k] ), rBuffer_fx[2 * k + 1], rot_vctr_re_fx[k] ); // q - 5 #else /* OPT_AVOID_STATE_BUF_RESCALE */ realBuffer_fx[( ( M1 - 1 ) - ( k * 2 ) )] = L_sub( Mpy_32_32( rBuffer_fx[2 * k], rot_vctr_re_fx[k] ), Mpy_32_32( rBuffer_fx[2 * k + 1], rot_vctr_im_fx[k] ) ); // q - 5 realBuffer_fx[2 * k] = L_add( Mpy_32_32( rBuffer_fx[2 * k], rot_vctr_im_fx[k] ), Mpy_32_32( rBuffer_fx[2 * k + 1], rot_vctr_re_fx[k] ) ); // q - 5 #endif /* OPT_AVOID_STATE_BUF_RESCALE */ move32(); move32(); } Loading Loading @@ -520,8 +545,13 @@ void cldfbAnalysis_ts_fx( { /* do it inplace */ /*cplxMult(&imagBuffer[2*k],&imagBuffer[M1-1-(2*k)],iBuffer[2*k],iBuffer[2*k+1],rot_vctr_re[k],rot_vctr_im[k]);*/ #ifdef OPT_AVOID_STATE_BUF_RESCALE imagBuffer_fx[2 * k] = Msub_32_32( Mpy_32_32( iBuffer_fx[2 * k], rot_vctr_re_fx[k] ), iBuffer_fx[2 * k + 1], rot_vctr_im_fx[k] ); // q - 5 imagBuffer_fx[( M1 - 1 ) - ( k * 2 )] = Madd_32_32( Mpy_32_32( iBuffer_fx[2 * k], rot_vctr_im_fx[k] ), iBuffer_fx[2 * k + 1], rot_vctr_re_fx[k] ); // q - 5 #else /* OPT_AVOID_STATE_BUF_RESCALE */ imagBuffer_fx[2 * k] = L_sub( Mpy_32_32( iBuffer_fx[2 * k], rot_vctr_re_fx[k] ), Mpy_32_32( iBuffer_fx[2 * k + 1], rot_vctr_im_fx[k] ) ); // q - 5 imagBuffer_fx[( M1 - 1 ) - ( k * 2 )] = L_add( Mpy_32_32( iBuffer_fx[2 * k], rot_vctr_im_fx[k] ), Mpy_32_32( iBuffer_fx[2 * k + 1], rot_vctr_re_fx[k] ) ); // q - 5 #endif /* OPT_AVOID_STATE_BUF_RESCALE */ move32(); move32(); } Loading @@ -542,8 +572,13 @@ void cldfbAnalysis_ts_fx( /*cplxMult(&realBuffer[k], &imagBuffer[k], realBuffer[k], imagBuffer[k], rot_vctr_delay_re[k], rot_vctr_delay_im[k]);*/ /*realBuffer[k] = rBuffer[k]; imagBuffer[k] = iBuffer[k];*/ #ifdef OPT_AVOID_STATE_BUF_RESCALE cplx_aux_fx = Msub_32_32( Mpy_32_32( realBuffer_fx[k], rot_vctr_delay_re_fx[k] ), imagBuffer_fx[k], rot_vctr_delay_im_fx[k] ); // q - 5 imagBuffer_fx[k] = Madd_32_32( Mpy_32_32( realBuffer_fx[k], rot_vctr_delay_im_fx[k] ), imagBuffer_fx[k], rot_vctr_delay_re_fx[k] ); // q - 5 #else /* OPT_AVOID_STATE_BUF_RESCALE */ cplx_aux_fx = L_sub( Mpy_32_32( realBuffer_fx[k], rot_vctr_delay_re_fx[k] ), Mpy_32_32( imagBuffer_fx[k], rot_vctr_delay_im_fx[k] ) ); // q - 5 imagBuffer_fx[k] = L_add( Mpy_32_32( realBuffer_fx[k], rot_vctr_delay_im_fx[k] ), Mpy_32_32( imagBuffer_fx[k], rot_vctr_delay_re_fx[k] ) ); // q - 5 #endif /* OPT_AVOID_STATE_BUF_RESCALE */ realBuffer_fx[k] = cplx_aux_fx; move32(); move32(); Loading Loading @@ -1091,6 +1126,7 @@ void cldfbAnalysis_ts_fx_fixed_q( return; } /*-------------------------------------------------------------------* * cldfbSynthesis_ivas() * Loading @@ -1102,6 +1138,9 @@ void cldfbSynthesis_ivas_fx( Word32 *timeOut_fx, /* o : output time domain samples Qx - 1*/ const Word16 samplesToProcess, /* i : number of processed samples */ const Word16 shift, /* i : scale for state buffer */ #ifdef OPT_AVOID_STATE_BUF_RESCALE const Word16 out_shift, /* i : scale for output buffer */ #endif /* OPT_AVOID_STATE_BUF_RESCALE */ HANDLE_CLDFB_FILTER_BANK h_cldfb /* i : filter bank state */ ) { Loading Loading @@ -1295,11 +1334,11 @@ void cldfbSynthesis_ivas_fx( FOR( i = 0; i < L2; i++ ) { Word32 prod = L_shl_sat( Mpy_32_16_1( new_samples_fx[L2 - 1 - i], p_filter_sf ), shift ); accu0 = Madd_32_16( synthesisBuffer_fx[i], prod, p_filter[i] ); // Qx - 1 accu1 = Madd_32_16( synthesisBuffer_fx[1 * L2 + i], prod, p_filter[( 1 * L2 + i )] ); // Qx - 1 accu2 = Madd_32_16( synthesisBuffer_fx[2 * L2 + i], prod, p_filter[( 2 * L2 + i )] ); // Qx - 1 accu3 = Madd_32_16( synthesisBuffer_fx[3 * L2 + i], prod, p_filter[( 3 * L2 + i )] ); // Qx - 1 accu4 = Madd_32_16( synthesisBuffer_fx[4 * L2 + i], prod, p_filter[( 4 * L2 + i )] ); // Qx - 1 accu0 = Madd_32_16( synthesisBuffer_fx[i], prod, p_filter[i] ); // Qx -1 + shift accu1 = Madd_32_16( synthesisBuffer_fx[1 * L2 + i], prod, p_filter[( 1 * L2 + i )] ); // Qx -1 + shift accu2 = Madd_32_16( synthesisBuffer_fx[2 * L2 + i], prod, p_filter[( 2 * L2 + i )] ); // Qx -1 + shift accu3 = Madd_32_16( synthesisBuffer_fx[3 * L2 + i], prod, p_filter[( 3 * L2 + i )] ); // Qx -1 + shift accu4 = Madd_32_16( synthesisBuffer_fx[4 * L2 + i], prod, p_filter[( 4 * L2 + i )] ); // Qx -1 + shift synthesisBuffer_fx[i] = accu0; move32(); Loading @@ -1314,11 +1353,26 @@ void cldfbSynthesis_ivas_fx( } } #ifdef OPT_AVOID_STATE_BUF_RESCALE IF( 0 == out_shift ) { #endif /* OPT_AVOID_STATE_BUF_RESCALE */ FOR( i = 0; i < M1; i++ ) { ptr_time_out_fx[( M1 - 1 ) - i] = synthesisBuffer_fx[4 * L2 + M1 + i]; move32(); } #ifdef OPT_AVOID_STATE_BUF_RESCALE } ELSE { FOR( i = 0; i < M1; i++ ) { ptr_time_out_fx[( M1 - 1 ) - i] = L_shl_sat( synthesisBuffer_fx[4 * L2 + M1 + i], out_shift ); move32(); } } #endif /* OPT_AVOID_STATE_BUF_RESCALE */ ptr_time_out_fx += M1; Loading @@ -1333,7 +1387,6 @@ void cldfbSynthesis_ivas_fx( return; } void configureCldfb_ivas_enc_fx( HANDLE_CLDFB_FILTER_BANK h_cldfb, /* i/o: filter bank handle */ const Word32 sampling_rate /* i : sampling rate */ Loading
lib_com/options.h +1 −0 Original line number Diff line number Diff line Loading @@ -69,6 +69,7 @@ /* Note: each compile switch (FIX_1101_...) is independent from the other ones */ //#define OPT_STEREO_32KBPS_V1 /* Optimization made in stereo decoding path for 32kbps decoding */ #define OPT_AVOID_STATE_BUF_RESCALE /* Optimization made to avoid rescale of synth state buffer */ #define FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx /*FhG: WMOPS tuning, nonbe*/ #define FIX_1310_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot /*FhG: WMOPS tuning, nonbe*/ #define FIX_1379_MASA_ANGLE_ROUND Loading
lib_com/prot_fx.h +8 −5 Original line number Diff line number Diff line Loading @@ -9745,6 +9745,9 @@ void cldfbSynthesis_ivas_fx( Word32 *timeOut_fx, /* o : output time domain samples Qx - 1*/ const Word16 samplesToProcess, /* i : number of processed samples */ const Word16 shift, /* i : scale for state buffer */ #ifdef OPT_AVOID_STATE_BUF_RESCALE const Word16 out_shift, /* i : scale for output buffer */ #endif HANDLE_CLDFB_FILTER_BANK h_cldfb /* i : filter bank state */ ); Loading
lib_dec/FEC_HQ_phase_ecu_fx.c +6 −6 Original line number Diff line number Diff line Loading @@ -4565,13 +4565,13 @@ static void ivas_fec_noise_filling_fx( pt6 = &p_mdct_ola[0]; FOR( k = 0; k < tmp_fx; k++ ) { L_tmp = L_mult( *sinq_tab, *sinq_tab ); /*Q30 */ L_tmp = L_mult( *sinq_tab, *sinq_tab ); /*Q31 */ sinq_tab++; q2 = round_fx( L_sub( 2147483647, L_tmp ) ); /*Q15 */ q1 = round_fx( L_tmp ); /*Q15 */ L_tmp = L_mult( ( *pt1 ), q1 ); /*Qsynth+16 */ L_tmp = L_add( L_tmp, L_shr( Mpy_32_16_1( L_deposit_h( *pt6++ ), q2 ), Q_old_out ) ); /*Qsynth+16 */ ( *pt1++ ) = round_fx( L_tmp ); /*Qsynth */ L_tmp = L_add_sat( L_tmp, L_shr_sat( Mpy_32_16_1( L_deposit_h( *pt6++ ), q2 ), Q_old_out ) ); /*Qsynth+16 */ ( *pt1++ ) = round_fx_sat( L_tmp ); /*Qsynth */ move16(); } Loading