Commit 71c85ce9 authored by Manuel Jander's avatar Manuel Jander
Browse files

Issue #867 : use 2 scale regions for reference_power vectors to improve...

Issue #867 : use 2 scale regions for reference_power vectors to improve precision. Work in progress.
parent fa042433
Loading
Loading
Loading
Loading
+3 −15
Original line number Diff line number Diff line
@@ -98,19 +98,7 @@
#define FIX_1298                                /* VA: fix possible assert in gaus_enc */
#define FIX_1300_ICA_SHIFT_QUANT_IMPROV         /* VA: Fix to 1300 to improve precision of the lag quantizer */
#define FIX_1301_CORRECT_TD_CNST                /* VA: Fix 1301, correct wrong constant in TD stereo */
#define NONBE_FIX_1277_EVS_DTX_HIGH_RATE_THRESHOLD      /* VA/Eri: FLP issue 1277: Fix Mismatch in DTX high-rate threshold between EVS float and BASOP */
#define NONBE_FIX_708_OSBA_BR_SWITCHING_CRASH   /* FhG: issue 708: fix crash in OSBA BR switching with long test vectors */
//#define OPT_STEREO_32KBPS_V1                    /* Optimization made in stereo decoding path for 32kbps decoding */
#define OPT_AVOID_STATE_BUF_RESCALE             /* Optimization made to avoid rescale of synth state buffer */
#define FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx                 /*FhG: WMOPS tuning, nonbe*/
#define FIX_1310_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot   /*FhG: WMOPS tuning, nonbe*/
/* Both following 2 macros (IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST*) are independent from each other, they refer to different code blocks */
#define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE    /* FhG: reduces WMOPS of param_mc_prm_est, bit-exact to previous version */
//#define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE /* FhG: reduces WMOPS of param_mc_prm_est, not bit-exact to previous version. Obsoleted by MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE. */
#define HARM_PUSH_BIT
#define HARM_ENC_INIT
//#define HARM_SCE_INIT

#define TEST_HR

#define FIX_867_CLDFB_NRG_SCALE                 /* Issue 867: split cldfb energy scale into 2 regions for better precision */
//#define FIX_867_CLDFB_NRG_SCALE_CLDFB           /* Issue 867: use dynamic scale for CLDFB analysis. Almost zero improvement. */
//#define FIX_867_CLDFB_NRG_SCALE_CLDFB_MASK      /* Issue 867: erase higher cldfb values to remove noise from MDCT */
#endif
+174 −69

File changed.

Preview size limit exceeded, changes collapsed.

+75 −75
Original line number Diff line number Diff line
@@ -308,13 +308,11 @@ ivas_error ivas_dirac_dec_output_synthesis_open_fx(
#ifdef FIX_867_CLDFB_NRG_SCALE
        dirac_output_synthesis_state->reference_power_smooth_prev_q[0] = Q31;
        dirac_output_synthesis_state->reference_power_smooth_prev_q[1] = Q31;
        move16();
        move16();
        move16(); move16();
#else
        dirac_output_synthesis_state->reference_power_smooth_prev_q = Q31;
        move16();
#endif

        IF( ( dirac_output_synthesis_state->direction_smoothness_prev_fx = (Word32 *) malloc( hSpatParamRendCom->num_freq_bands * sizeof( Word32 ) ) ) == NULL )
        {
            return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis\n" ) );
@@ -497,8 +495,7 @@ void ivas_dirac_dec_output_synthesis_init_fx(
#ifdef FIX_867_CLDFB_NRG_SCALE
        h_dirac_output_synthesis_state->proto_power_smooth_prev_q[0] = Q31;
        h_dirac_output_synthesis_state->proto_power_smooth_prev_q[1] = Q31;
        move16();
        move16();
        move16(); move16();
#else
        h_dirac_output_synthesis_state->proto_power_smooth_prev_q = Q31;
        move16();
@@ -2054,6 +2051,26 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
        // Scale cy_auto_diff_smooth_fx if required
        IF( diff_start_band != 0 )
        {
#ifdef FIX_867_CLDFB_NRG_SCALE
            /* Is this necessary at all ? */
            q_com = s_min( s_min( q_reference_power_smooth[0], q_reference_power_smooth[1] ), h_dirac_output_synthesis_state->q_cy_auto_diff_smooth );
            scale_sig32( reference_power_smooth, CLDFB_NO_CHANNELS_HALF, sub( q_com, q_reference_power_smooth[0] ) );                                         /**q_reference_power_smooth->q_com*/
            scale_sig32( reference_power_smooth + CLDFB_NO_CHANNELS_HALF, sub( num_freq_bands, CLDFB_NO_CHANNELS_HALF ), sub( q_com, q_reference_power_smooth[1] ) );                                         /**q_reference_power_smooth->q_com*/
            scale_sig32( h_dirac_output_synthesis_state->reference_power_smooth_prev_fx, CLDFB_NO_CHANNELS_HALF, sub( q_com, q_reference_power_smooth[0] ) ); /**q_reference_power_smooth->q_com*/
            scale_sig32( h_dirac_output_synthesis_state->reference_power_smooth_prev_fx + CLDFB_NO_CHANNELS_HALF, sub( num_freq_bands, CLDFB_NO_CHANNELS_HALF ), sub( q_com, q_reference_power_smooth[1] ) ); /**q_reference_power_smooth->q_com*/
            scale_sig32( h_dirac_output_synthesis_state->cy_auto_diff_smooth_fx,
                         i_mult( num_freq_bands, nchan_target_psds ),
                         sub( q_com, h_dirac_output_synthesis_state->q_cy_auto_diff_smooth ) ); /*h_dirac_output_synthesis_state->q_cy_auto_diff_smooth -> q_com*/
            q_reference_power_smooth[0] = q_com;
            q_reference_power_smooth[1] = q_com;
            move16(); move16();
            h_dirac_output_synthesis_state->reference_power_smooth_prev_q[0] = q_com;
            h_dirac_output_synthesis_state->reference_power_smooth_prev_q[1] = q_com;
            move16(); move16();

            h_dirac_output_synthesis_state->q_cy_auto_diff_smooth = q_com;
            move16();
#else
            q_com = s_min( *q_reference_power_smooth, h_dirac_output_synthesis_state->q_cy_auto_diff_smooth );
            scale_sig32( reference_power_smooth, num_freq_bands, sub( q_com, *q_reference_power_smooth ) );                                         /**q_reference_power_smooth->q_com*/
            scale_sig32( h_dirac_output_synthesis_state->reference_power_smooth_prev_fx, num_freq_bands, sub( q_com, *q_reference_power_smooth ) ); /**q_reference_power_smooth->q_com*/
@@ -2067,6 +2084,7 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(

            h_dirac_output_synthesis_state->q_cy_auto_diff_smooth = q_com;
            move16();
#endif
        }
#endif

@@ -2077,6 +2095,7 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
                                               h_dirac_output_synthesis_state->diffuse_responses_square_fx,
                                               h_dirac_output_synthesis_state->cy_auto_diff_smooth_fx,
                                               &h_dirac_output_synthesis_state->q_cy_auto_diff_smooth );

    }

    /*-----------------------------------------------------------------*
@@ -2216,9 +2235,13 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(

        exp = 0;
        move16();

#if 0
        tmp = BASOP_Util_Divide3232_Scale( weightedDirectionSmoothness, L_add( sumWeight, EPSILON_FX ), &exp ); /*Q(15-exp)*/
        smoothedDirectionSmoothness = L_shl_sat( L_deposit_l( tmp ), add( sub( Q31, Q15 ), exp ) );             // Q31
#else
        L_tmp = BASOP_Util_Divide3232_Scale_cadence( weightedDirectionSmoothness, L_add( sumWeight, EPSILON_FX ), &exp ); /*Q(15-exp)*/
        smoothedDirectionSmoothness = L_shl_sat( L_tmp , exp );             // Q31
#endif

        h_dirac_output_synthesis_state->direction_smoothness_prev_fx[l] = smoothedDirectionSmoothness; // Q31
        move32();
@@ -2290,18 +2313,16 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
    // Move proto_power_smooth_fx to common Q-factor

#ifdef FIX_867_CLDFB_NRG_SCALE
    Word16 min_exp2 = -64;
    Word16 min_exp2 = MIN_16;
    min_exp = MIN_16;
    move16();
    move16();
    move16(); move16();
    Word16 q_tmp2 = Q31;
    q_tmp = Q31;
    move16();
    move16();
    move16(); move16();

    FOR( k = 0; k < num_protos_dir; k++ )
    {
        FOR( l = 0; l < s_min( num_freq_bands, CLDFB_NO_CHANNELS_HALF ); l++ )
        FOR( l = 0; l < CLDFB_NO_CHANNELS_HALF; l++ )
        {
            min_exp = s_max(min_exp, exp_arr[k * num_freq_bands + l]);
        }
@@ -2315,7 +2336,7 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(

    FOR( k = 0; k < num_protos_dir; k++ )
    {
        FOR( l = 0; l < s_min( num_freq_bands, CLDFB_NO_CHANNELS_HALF ); l++ )
        FOR( l = 0; l < CLDFB_NO_CHANNELS_HALF; l++ )
        {
            *p_power_smooth = L_shr( *p_power_smooth, sub( min_exp, exp_arr[k * num_freq_bands + l] ) ); /*(31-(exp-(31-q_proto_power_smooth)))->(31-(min_exp-(31-q_proto_power_smooth)))*/
            move32();
@@ -2328,20 +2349,16 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
            p_power_smooth++;
        }
    }
    q_tmp = add( sub( Q31, min_exp ), sub( Q31, h_dirac_output_synthesis_state->proto_power_smooth_q[0] ) );
    q_tmp2 = add( sub( Q31, min_exp2 ), sub( Q31, h_dirac_output_synthesis_state->proto_power_smooth_q[1] ) );

    // Update the Q-factor
    h_dirac_output_synthesis_state->proto_power_smooth_prev_q[0] = s_min( h_dirac_output_synthesis_state->proto_power_smooth_q[0], h_dirac_output_synthesis_state->proto_power_smooth_prev_q[0] );
    h_dirac_output_synthesis_state->proto_power_smooth_prev_q[1] = s_min( h_dirac_output_synthesis_state->proto_power_smooth_q[1], h_dirac_output_synthesis_state->proto_power_smooth_prev_q[1] );
    move16();
    move16();

    q_tmp = add( sub( Q31, min_exp ), sub( Q31, h_dirac_output_synthesis_state->proto_power_smooth_prev_q[0] ) );
    q_tmp2 = add( sub( Q31, min_exp2 ), sub( Q31, h_dirac_output_synthesis_state->proto_power_smooth_prev_q[1] ) );

    h_dirac_output_synthesis_state->proto_power_smooth_prev_q[0] = h_dirac_output_synthesis_state->proto_power_smooth_q[0];
    h_dirac_output_synthesis_state->proto_power_smooth_prev_q[1] = h_dirac_output_synthesis_state->proto_power_smooth_q[1];
    move16(); move16();
    h_dirac_output_synthesis_state->proto_power_smooth_q[0] = q_tmp;
    h_dirac_output_synthesis_state->proto_power_smooth_q[1] = q_tmp2;
    move16();
    move16();
    move16(); move16();
#else
    min_exp = MIN_16;
    move16();
@@ -2362,7 +2379,6 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(

    p_power_smooth_prev = h_dirac_output_synthesis_state->proto_power_smooth_prev_fx;
    p_power_smooth = h_dirac_output_synthesis_state->proto_power_smooth_fx;

    FOR( k = 0; k < num_protos_dir; k++ )
    {
        FOR( l = 0; l < num_freq_bands; l++ )
@@ -2552,7 +2568,6 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
                                      add( q_cy_auto_dir_smooth_prev_local[k], q_tmp ) ),
                                 Q31 ) );
#endif

            *( p_gains_dir ) = Sqrt32( L_tmp, &exp ); // (Q31 - exp)
            move32();
            *( p_gains_dir ) = L_shl_sat( *( p_gains_dir ), sub( h_dirac_output_synthesis_state->gains_dir_prev_q, sub( Q31, exp ) ) ); // gains_dir_prev_q
@@ -4082,7 +4097,7 @@ static void computeTargetPSDs_direct_fx(
    v_mult_fixed( direct_power_factor, reference_power, direct_power, num_freq_bands ); /* Q31 + Q(q_reference_power) - Q31 = Q(q_reference_power) */

#ifdef FIX_867_CLDFB_NRG_SCALE
    Word16 common1_q = s_min( *q_cy_auto_dir_smooth, s_min( q_reference_power[0], q_reference_power[1] ) );
    Word16 common1_q = s_min( *q_cy_auto_dir_smooth, s_min( q_reference_power[0], q_reference_power[0] ) );
    Word16 common2_q = s_min( *q_cy_cross_dir_smooth, s_min( q_reference_power[0], q_reference_power[1] ) );
#else
    Word16 common1_q = s_min( *q_cy_auto_dir_smooth, *q_reference_power );
@@ -4096,8 +4111,8 @@ static void computeTargetPSDs_direct_fx(

        v_mult_fixed( direct_power, &direct_responses_square[cur_idx], aux_buffer_res, num_freq_bands );               /* Q31 + Q(q_reference_power) - Q31 = Q(q_reference_power) */
#ifdef FIX_867_CLDFB_NRG_SCALE
        scale_sig32( aux_buffer_res, s_min( num_freq_bands, CLDFB_NO_CHANNELS_HALF ), sub( common1_q, q_reference_power[0] ) );                                    /* Q(common1_q) */
        scale_sig32( aux_buffer_res + CLDFB_NO_CHANNELS_HALF, s_max( 0, sub( num_freq_bands, CLDFB_NO_CHANNELS_HALF ) ), sub( common1_q, q_reference_power[1] ) ); /* Q(common1_q) */
        scale_sig32( aux_buffer_res, CLDFB_NO_CHANNELS_HALF, sub( common1_q, q_reference_power[0] ) );                        /* Q(common1_q) */
        scale_sig32( aux_buffer_res + CLDFB_NO_CHANNELS_HALF, sub( num_freq_bands, CLDFB_NO_CHANNELS_HALF ), sub( common1_q, q_reference_power[1] ) ); /* Q(common1_q) */
#else
        scale_sig32( aux_buffer_res, num_freq_bands, sub( common1_q, *q_reference_power ) );                           /* Q(common1_q) */
#endif
@@ -4106,8 +4121,8 @@ static void computeTargetPSDs_direct_fx(

        v_mult_fixed( direct_power, &direct_responses[cur_idx], aux_buffer_res, num_freq_bands );                        /* Q31 + Q(q_reference_power) - Q31 = Q(q_reference_power) */
#ifdef FIX_867_CLDFB_NRG_SCALE
        scale_sig32( aux_buffer_res, s_min( num_freq_bands, CLDFB_NO_CHANNELS_HALF ), sub( common2_q, q_reference_power[0] ) );                                    /* Q(common2_q) */
        scale_sig32( aux_buffer_res + CLDFB_NO_CHANNELS_HALF, s_max( 0, sub( num_freq_bands, CLDFB_NO_CHANNELS_HALF ) ), sub( common2_q, q_reference_power[1] ) ); /* Q(common2_q) */
        scale_sig32( aux_buffer_res, CLDFB_NO_CHANNELS_HALF, sub( common2_q, q_reference_power[0] ) );                        /* Q(common2_q) */
        scale_sig32( aux_buffer_res + CLDFB_NO_CHANNELS_HALF, sub( num_freq_bands, CLDFB_NO_CHANNELS_HALF ), sub( common2_q, q_reference_power[1] ) ); /* Q(common2_q) */
#else
        scale_sig32( aux_buffer_res, num_freq_bands, sub( common2_q, *q_reference_power ) );                             /* Q(common2_q) */
#endif
@@ -4186,7 +4201,7 @@ static void computeTargetPSDs_direct_subframe_fx(
        }
        q_tmp = W_norm( W_max );
#ifdef FIX_867_CLDFB_NRG_SCALE
        FOR( i = 0; i < s_min( num_freq_bands, CLDFB_NO_CHANNELS_HALF ); i++ )
        FOR( i = 0; i < CLDFB_NO_CHANNELS_HALF; i++ )
        {
            cy_auto_dir_smooth[cur_idx + i] = W_extract_h( W_shl( W_tmp[i], q_tmp ) ); /*q_reference_power[0]+q_tmp*/
            move32();
@@ -4197,15 +4212,14 @@ static void computeTargetPSDs_direct_subframe_fx(
            cy_auto_dir_smooth[cur_idx + i] = W_extract_h( W_shl( W_tmp[i], q_tmp2 ) ); /*q_reference_power[1]+q_tmp*/
            move32();
        }
        q_cy_auto_dir_smooth[ch_idx] = add( q_reference_power[0], q_tmp );
        move16();
#else
        FOR( i = 0; i < num_freq_bands; i++ )
        {
            cy_auto_dir_smooth[cur_idx + i] = W_extract_h( W_shl( W_tmp[i], q_tmp ) ); /*q_reference_power+q_tmp*/
            move32();
        }
        q_cy_auto_dir_smooth[ch_idx] = add( *q_reference_power, q_tmp );
#endif
        q_cy_auto_dir_smooth[ch_idx] = add( q_reference_power[0], q_tmp );
        move16();
#endif
#else
@@ -4228,12 +4242,8 @@ static void computeTargetPSDs_direct_subframe_fx(
#endif
    }

#ifdef FIX_867_CLDFB_NRG_SCALE
    *q_cy_cross_dir_smooth = q_reference_power[0];
    move16();
#else
    *q_cy_cross_dir_smooth = *q_reference_power;
    move16();
#endif

    return;
@@ -4311,16 +4321,7 @@ static void computeTargetPSDs_diffuse_subframe_fx(
    v_mult_fixed( diffuse_power_factor, reference_power, diffuse_power, num_freq_bands ); // (Q31, q_reference_power) -> q_reference_power

#ifdef FIX_867_CLDFB_NRG_SCALE
    q_diffuse_power = s_min( q_reference_power[0], q_reference_power[1] );
    Scale_sig32( diffuse_power, s_min( num_freq_bands, CLDFB_NO_CHANNELS_HALF ), sub( q_diffuse_power, q_reference_power[0] ) );
    Scale_sig32( diffuse_power + CLDFB_NO_CHANNELS_HALF, s_max( 0, sub( num_freq_bands, CLDFB_NO_CHANNELS_HALF ) ), sub( q_diffuse_power, q_reference_power[1] ) );
    q_cy_auto_diff_smooth_new = q_diffuse_power;
    IF( LT_16( *q_cy_auto_diff_smooth, q_diffuse_power ) )
    {
        Scale_sig32( diffuse_power, num_freq_bands, sub( *q_cy_auto_diff_smooth, q_cy_auto_diff_smooth_new ) );
        q_cy_auto_diff_smooth_new = *q_cy_auto_diff_smooth;
        move16();
    }
    Scale_sig32(diffuse_power + CLDFB_NO_CHANNELS_HALF, s_max(0, sub(num_freq_bands, CLDFB_NO_CHANNELS_HALF)), sub(q_reference_power[0], q_reference_power[1]));
#endif
    /* compute target auto and cross PSDs of current frame (smoothed) */
    FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx )
@@ -4394,7 +4395,6 @@ static void computeTargetPSDs_diffuse_with_onsets_fx(
        move16();
    }
#endif

    /* compute target auto and cross PSDs of current frame (smoothed) */
    FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx )
    {
+190 −182

File changed.

Preview size limit exceeded, changes collapsed.

+22 −32
Original line number Diff line number Diff line
@@ -8309,8 +8309,7 @@ static void intermidiate_ext_dirac_render(
#ifdef FIX_867_CLDFB_NRG_SCALE
        DirAC_mem.reference_power_smooth_q[0] = DirAC_mem.reference_power_q[0] = Q31;
        DirAC_mem.reference_power_smooth_q[1] = DirAC_mem.reference_power_q[1] = Q31;
        move16();
        move16();
        move16(); move16();
#else
        DirAC_mem.reference_power_smooth_q = DirAC_mem.reference_power_q = Q31;
        move16();
@@ -8444,30 +8443,36 @@ static void intermidiate_ext_dirac_render(
        IF( hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_fx )
        {
#ifdef FIX_867_CLDFB_NRG_SCALE
#if 0
            /* Possible improvement: normalize both scale regions individually. */
            tmp = L_norm_arr( hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_fx, imult1616( hDirACRend->num_protos_dir, hSpatParamRendCom->num_freq_bands ) );
            scale_sig32( hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_fx, hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_len, tmp ); /* Q(hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_q + tmp) */
            hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_q[0] = add( hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_q[0], tmp );
            hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_q[1] = add( hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_q[1], tmp );
            move16(); move16();
#else
            /* Possible improvement: normalize both scale regions individually. */
            tmp = 0;
            move16();
            FOR( slot_idx = 0; slot_idx < hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_len; slot_idx = add( slot_idx, hSpatParamRendCom->num_freq_bands ) )
            {
                tmp = s_min( tmp, L_norm_arr( hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_fx + slot_idx, s_min( hSpatParamRendCom->num_freq_bands, CLDFB_NO_CHANNELS_HALF ) ) );
            FOR ( slot_idx = 0; slot_idx < hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_len; slot_idx = add( slot_idx, hSpatParamRendCom->num_freq_bands) ) {
                tmp = s_min(tmp, L_norm_arr( hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_fx + slot_idx,  CLDFB_NO_CHANNELS_HALF ) );
            }
            FOR( slot_idx = 0; slot_idx < hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_len; slot_idx = add( slot_idx, hSpatParamRendCom->num_freq_bands ) )
            {
                scale_sig32( hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_fx + slot_idx, s_min( hSpatParamRendCom->num_freq_bands, CLDFB_NO_CHANNELS_HALF ), tmp ); /* Q(hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_q + tmp) */
            FOR ( slot_idx = 0; slot_idx < hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_len; slot_idx = add( slot_idx, hSpatParamRendCom->num_freq_bands) ) {
                scale_sig32( hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_fx + slot_idx, CLDFB_NO_CHANNELS_HALF, tmp ); /* Q(hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_q + tmp) */
            }
            hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_q[0] = add( hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_q[0], tmp );
            move16();
            tmp = 0;
            move16();
            FOR( slot_idx = 0; slot_idx < hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_len; slot_idx = add( slot_idx, hSpatParamRendCom->num_freq_bands ) )
            {
                tmp = s_min( tmp, L_norm_arr( hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_fx + slot_idx + CLDFB_NO_CHANNELS_HALF, s_max( 0, sub( hSpatParamRendCom->num_freq_bands, CLDFB_NO_CHANNELS_HALF ) ) ) );
            FOR ( slot_idx = 0; slot_idx < hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_len; slot_idx = add( slot_idx, hSpatParamRendCom->num_freq_bands) ) {
                tmp = s_min(tmp, L_norm_arr( hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_fx + slot_idx + CLDFB_NO_CHANNELS_HALF, sub( hSpatParamRendCom->num_freq_bands, CLDFB_NO_CHANNELS_HALF ) ) );
            }
            FOR( slot_idx = 0; slot_idx < hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_len; slot_idx = add( slot_idx, hSpatParamRendCom->num_freq_bands ) )
            {
                scale_sig32( hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_fx + slot_idx + CLDFB_NO_CHANNELS_HALF, s_max( 0, sub( hSpatParamRendCom->num_freq_bands, CLDFB_NO_CHANNELS_HALF ) ), tmp ); /* Q(hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_q + tmp) */
            FOR ( slot_idx = 0; slot_idx < hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_len; slot_idx = add( slot_idx, hSpatParamRendCom->num_freq_bands) ) {
                scale_sig32( hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_fx + slot_idx + CLDFB_NO_CHANNELS_HALF, sub( hSpatParamRendCom->num_freq_bands, CLDFB_NO_CHANNELS_HALF ), tmp ); /* Q(hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_q + tmp) */
            }
            hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_q[1] = add( hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_q[1], tmp );
            move16();
#endif
#else
            tmp = L_norm_arr( hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_fx, imult1616( hDirACRend->num_protos_dir, hSpatParamRendCom->num_freq_bands ) );
            scale_sig32( hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_fx, hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_len, tmp ); /* Q(hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_q + tmp) */
@@ -8475,25 +8480,10 @@ static void intermidiate_ext_dirac_render(
            move16();
#endif
#ifdef FIX_867_CLDFB_NRG_SCALE
            tmp = 0;
            FOR( slot_idx = 0; slot_idx < hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_len; slot_idx = add( slot_idx, hSpatParamRendCom->num_freq_bands ) )
            {
                tmp = s_min( tmp, L_norm_arr( hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_prev_fx, s_min( CLDFB_NO_CHANNELS_HALF, hSpatParamRendCom->num_freq_bands ) ) );
            }
            FOR( slot_idx = 0; slot_idx < hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_len; slot_idx = add( slot_idx, hSpatParamRendCom->num_freq_bands ) )
            {
                scale_sig32( hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_prev_fx, s_min( CLDFB_NO_CHANNELS_HALF, hSpatParamRendCom->num_freq_bands ), tmp ); /* Q(hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_prev_q + tmp) */
            }
            /* Possible improvement: normalize both scale regions individually. */
            tmp = L_norm_arr( hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_prev_fx, imult1616( hDirACRend->num_protos_dir, hSpatParamRendCom->num_freq_bands ) );
            scale_sig32( hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_prev_fx, hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_prev_len, tmp ); /* Q(hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_prev_q + tmp) */
            hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_prev_q[0] = add( tmp, hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_prev_q[0] );
            move16();
            FOR( slot_idx = 0; slot_idx < hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_len; slot_idx = add( slot_idx, hSpatParamRendCom->num_freq_bands ) )
            {
                tmp = s_min( tmp, L_norm_arr( hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_prev_fx + CLDFB_NO_CHANNELS_HALF, s_max( 0, sub( hSpatParamRendCom->num_freq_bands, CLDFB_NO_CHANNELS_HALF ) ) ) );
            }
            FOR( slot_idx = 0; slot_idx < hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_len; slot_idx = add( slot_idx, hSpatParamRendCom->num_freq_bands ) )
            {
                scale_sig32( hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_prev_fx + CLDFB_NO_CHANNELS_HALF, s_max( 0, sub( hSpatParamRendCom->num_freq_bands, CLDFB_NO_CHANNELS_HALF ) ), tmp ); /* Q(hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_prev_q + tmp) */
            }
            hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_prev_q[1] = add( tmp, hDirACRend->h_output_synthesis_psd_state.proto_power_smooth_prev_q[1] );
            move16();
#else