Commit ec0aad54 authored by Sandesh Venkatesh's avatar Sandesh Venkatesh
Browse files

deindex_sph_idx_fx and ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx updates

[x] Changes have been made in deindex_sph_idx_fx and ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx which reduced the MLD of MASA 2TC at 512 kbps, 48kHz in, 48kHz out, 5_1 out
parent 5790669c
Loading
Loading
Loading
Loading
+254 −11
Original line number Diff line number Diff line
@@ -66,6 +66,258 @@ static int16_t quantize_phi_masa( float phi, const int16_t flag_delta, float *ph
static Word16 quantize_theta_masa_fx( const Word32 x_fx, const Word16 no_cb, Word32 *xhat_fx );

static Word16 quantize_phi_masa_fx( const Word32 phi, const Word16 flag_delta, Word32 *phi_hat, const Word16 n );

static Word32 estim_round[MASA_NO_CIRCLES + 1] = {
    /* Q0 */
    -423,
    0,
    422,
    845,
    1267,
    1689,
    2111,
    2532,
    2953,
    3373,
    3793,
    4212,
    4630,
    5047,
    5464,
    5880,
    6294,
    6708,
    7120,
    7532,
    7941,
    8350,
    8757,
    9163,
    9567,
    9969,
    10370,
    10769,
    11166,
    11561,
    11955,
    12346,
    12735,
    13122,
    13507,
    13890,
    14270,
    14648,
    15023,
    15396,
    15766,
    16134,
    16499,
    16861,
    17220,
    17576,
    17930,
    18280,
    18627,
    18971,
    19312,
    19650,
    19984,
    20315,
    20643,
    20967,
    21288,
    21605,
    21918,
    22228,
    22534,
    22836,
    23135,
    23429,
    23720,
    24007,
    24289,
    24568,
    24842,
    25112,
    25378,
    25640,
    25898,
    26151,
    26400,
    26644,
    26884,
    27119,
    27350,
    27576,
    27798,
    28015,
    28227,
    28435,
    28637,
    28835,
    29029,
    29217,
    29400,
    29579,
    29752,
    29921,
    30084,
    30243,
    30396,
    30544,
    30688,
    30826,
    30959,
    31086,
    31209,
    31326,
    31438,
    31545,
    31647,
    31743,
    31834,
    31919,
    32000,
    32075,
    32144,
    32208,
    32267,
    32320,
    32368,
    32411,
    32448,
    32480,
    32506,
    32527,
    32542,
    32552
};

static Word32 estim_ceil[MASA_NO_CIRCLES + 1] = {
    /* Q0 */
    -422,
    0,
    423,
    845,
    1268,
    1690,
    2111,
    2532,
    2953,
    3374,
    3793,
    4212,
    4630,
    5048,
    5465,
    5880,
    6295,
    6708,
    7121,
    7532,
    7942,
    8350,
    8758,
    9163,
    9567,
    9970,
    10371,
    10770,
    11167,
    11562,
    11955,
    12347,
    12736,
    13123,
    13508,
    13890,
    14270,
    14648,
    15024,
    15396,
    15767,
    16134,
    16499,
    16861,
    17220,
    17577,
    17930,
    18280,
    18628,
    18972,
    19313,
    19650,
    19985,
    20316,
    20644,
    20968,
    21288,
    21605,
    21919,
    22229,
    22535,
    22837,
    23135,
    23430,
    23720,
    24007,
    24290,
    24568,
    24843,
    25113,
    25379,
    25641,
    25898,
    26151,
    26400,
    26644,
    26884,
    27120,
    27350,
    27577,
    27798,
    28015,
    28228,
    28435,
    28638,
    28836,
    29029,
    29217,
    29401,
    29579,
    29753,
    29921,
    30085,
    30243,
    30397,
    30545,
    30688,
    30826,
    30959,
    31087,
    31210,
    31327,
    31439,
    31546,
    31647,
    31743,
    31834,
    31920,
    32000,
    32075,
    32145,
    32209,
    32268,
    32321,
    32369,
    32411,
    32449,
    32480,
    32506,
    32527,
    32543,
    32552
};
#endif


@@ -1564,26 +1816,17 @@ void deindex_sph_idx_fx(
    }
    ELSE
    {
        estim_fx = Mpy_32_32( MASA_ANGLE_AT_EQUATOR_Q31, L_sub( L_shl( id_th, Q22 ), 2097152 ) /* 0.5f in Q22 */ ); /* Q22 */
        base_low = n[0];
        move32();
        IF( GE_16( id_th, 2 ) )
        {
            tmp32 = estim_fx % 26353590;                                     /* 2 * PI in Q22 */
            tmp32 = Mpy_32_32( tmp32, 341782638 /* 2147483647 / 2 * PI */ ); /* Q7 */
            tmp16 = extract_l( L_shr( tmp32, Q7 ) );

            tmp16 = getSineWord16R2( tmp16 );
            tmp16 = sub( tmp16, MASA_ASIN_OFFSET_Q15 );
            tmp32 = Mpy_32_16_1( MASA_NTOT2_FAC_Q15, tmp16 ); /* Q15 */

            IF( EQ_16( id_th, 2 ) )
            {
                base_low = L_add( base_low, L_shl( extract_l( L_shr( ceil_fixed( tmp32, Q15 ), Q15 ) ), 1 ) ); /* Q0 */
                base_low = L_add( base_low, L_shl( estim_ceil[id_th], 1 ) ); /* Q0 */
            }
            ELSE
            {
                base_low = L_add( base_low, L_shl( extract_l( L_shr_r( tmp32, Q15 ) ), 1 ) ); /* Q0 */
                base_low = L_add( base_low, L_shl( estim_round[id_th], 1 ) ); /* Q0 */
            }
        }
        base_up = L_add( base_low, L_shl( n[id_th], 1 ) );
+1 −1
Original line number Diff line number Diff line
@@ -1844,7 +1844,7 @@ void stereo_icBWE_decproc_fx(
            FOR( i = 0; i < output_frame; i++ )
            {
#ifdef FIX_826_PRECISION_LOST_AND_COMPL
                outputHB[0][i] = W_shr( W_mac_32_16( W_mult_32_16( outputHB[0][i], 16384 ), outputHB[1][i], 16384 ), Q16 );
                outputHB[0][i] = W_extract_l( W_shr( W_mac_32_16( W_mult_32_16( outputHB[0][i], 16384 ), outputHB[1][i], 16384 ), Q16 ) );
#else
                outputHB[0][i] = L_shr( ( outputHB[0][i] + outputHB[1][i] ), 1 );
#endif
+123 −38
Original line number Diff line number Diff line
@@ -2947,6 +2947,7 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
    Word32 *p_gains_dir, *p_gains_diff;
    Word32 g, g1, g2;
    Word32 *p_cy_auto_dir_smooth, *p_cy_auto_dir_smooth_prev;
    Word16 q_cy_auto_dir_smooth_local[MAX_OUTPUT_CHANNELS], q_cy_auto_dir_smooth_prev_local[MAX_OUTPUT_CHANNELS];
    Word32 *p_cy_cross_dir_smooth, *p_cy_cross_dir_smooth_prev;
    Word32 *p_cy_auto_diff_smooth, *p_cy_auto_diff_smooth_prev;
    Word32 gains_dir[CLDFB_NO_CHANNELS_MAX * MAX_OUTPUT_CHANNELS];
@@ -2972,6 +2973,10 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
    Word32 tmp32;
    move16();

    Word64 Cldfb_RealBuffer64_fx[MAX_OUTPUT_CHANNELS][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX];
    Word64 Cldfb_ImagBuffer64_fx[MAX_OUTPUT_CHANNELS][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX];
    Word64 W_temp = 0;
    move64();
    push_wmops( "dirac_out_synth_sfr" );

    h_dirac_output_synthesis_params = &( hDirACRend->h_output_synthesis_psd_params );
@@ -2986,6 +2991,7 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
    move16();
    num_freq_bands = hSpatParamRendCom->num_freq_bands;
    move16();
    set16_fx( q_cy_auto_dir_smooth_local, h_dirac_output_synthesis_state->q_cy_auto_dir_smooth, nchan_out_woLFE );

    /*-----------------------------------------------------------------*
     * compute target PSDs
@@ -3022,7 +3028,7 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
                                              h_dirac_output_synthesis_state->direct_responses_fx,
                                              h_dirac_output_synthesis_state->direct_responses_square_fx,
                                              h_dirac_output_synthesis_state->cy_auto_dir_smooth_fx,
                                              &h_dirac_output_synthesis_state->q_cy_auto_dir_smooth,
                                              q_cy_auto_dir_smooth_local,
                                              h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx,
                                              &h_dirac_output_synthesis_state->q_cy_cross_dir_smooth );

@@ -3063,7 +3069,7 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(

        p_cy_auto_dir_smooth = h_dirac_output_synthesis_state->cy_auto_dir_smooth_fx;   // q_cy_auto_dir_smooth
        p_cy_auto_diff_smooth = h_dirac_output_synthesis_state->cy_auto_diff_smooth_fx; // q_cy_auto_diff_smooth
        q_com = s_min( h_dirac_output_synthesis_state->q_cy_auto_dir_smooth, h_dirac_output_synthesis_state->q_cy_auto_diff_smooth );
        q_com = s_min( q_cy_auto_dir_smooth_local[1], h_dirac_output_synthesis_state->q_cy_auto_diff_smooth );

        IF( EQ_32( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_MONO ) )
        {
@@ -3071,7 +3077,7 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
            move16();
            exp1 = 0;
            move16();
            tmp32 = BASOP_Util_Divide3232_Scale_cadence( L_shl( p_cy_auto_dir_smooth[num_freq_bands], sub( q_com, h_dirac_output_synthesis_state->q_cy_auto_dir_smooth ) ),
            tmp32 = BASOP_Util_Divide3232_Scale_cadence( L_shl( p_cy_auto_dir_smooth[num_freq_bands], sub( q_com, q_cy_auto_dir_smooth_local[1] ) ),
                                                         ( L_add( Sqrt32( h_dirac_output_synthesis_state->direct_power_factor_fx[0], &exp ), EPSILON_FX ) ), // (Q31 - exp)
                                                         &exp1 );
            target_power_y = L_shr( tmp32, 1 ); // Q31 + (q_com - (31 - exp))
@@ -3095,7 +3101,7 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
        ELSE
        {
            target_power_y = L_add(
                L_shl( p_cy_auto_dir_smooth[num_freq_bands], sub( q_com, h_dirac_output_synthesis_state->q_cy_auto_dir_smooth ) ),
                L_shl( p_cy_auto_dir_smooth[num_freq_bands], sub( q_com, q_cy_auto_dir_smooth_local[1] ) ),
                L_shl( p_cy_auto_diff_smooth[num_freq_bands], sub( q_com, h_dirac_output_synthesis_state->q_cy_auto_diff_smooth ) ) ); // q_com
            exp = q_com;
            move16();
@@ -3249,7 +3255,7 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
    }

    // Move proto_power_smooth_fx to common Q-factor
    min_exp = 0;
    min_exp = MIN_16;
    move16();
    q_tmp = Q31;
    move16();
@@ -3295,12 +3301,21 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(

    p_cy_auto_dir_smooth = h_dirac_output_synthesis_state->cy_auto_dir_smooth_fx;
    p_cy_auto_dir_smooth_prev = h_dirac_output_synthesis_state->cy_auto_dir_smooth_prev_fx;
    q_com = s_min( h_dirac_output_synthesis_state->q_cy_auto_dir_smooth, h_dirac_output_synthesis_state->q_cy_auto_dir_smooth_prev );
    scale_sig32( p_cy_auto_dir_smooth, imult1616( nchan_out_woLFE, num_freq_bands ), sub( q_com, h_dirac_output_synthesis_state->q_cy_auto_dir_smooth ) );
    scale_sig32( p_cy_auto_dir_smooth_prev, imult1616( nchan_out_woLFE, num_freq_bands ), sub( q_com, h_dirac_output_synthesis_state->q_cy_auto_dir_smooth_prev ) );
    h_dirac_output_synthesis_state->q_cy_auto_dir_smooth = h_dirac_output_synthesis_state->q_cy_auto_dir_smooth_prev = q_com;
    FOR( k = 0; k < nchan_out_woLFE; k++ )
    {
        q_cy_auto_dir_smooth_prev_local[k] = getScaleFactor32( p_cy_auto_dir_smooth_prev + imult1616( k, num_freq_bands ), num_freq_bands );
        move16();
        scale_sig32( p_cy_auto_dir_smooth_prev + imult1616( k, num_freq_bands ), num_freq_bands, q_cy_auto_dir_smooth_prev_local[k] );
        q_cy_auto_dir_smooth_prev_local[k] = add( q_cy_auto_dir_smooth_prev_local[k], h_dirac_output_synthesis_state->q_cy_auto_dir_smooth_prev );
        move16();
        q_com = s_min( q_cy_auto_dir_smooth_local[k], q_cy_auto_dir_smooth_prev_local[k] );
        scale_sig32( p_cy_auto_dir_smooth + imult1616( k, num_freq_bands ), num_freq_bands, sub( q_com, q_cy_auto_dir_smooth_local[k] ) );
        scale_sig32( p_cy_auto_dir_smooth_prev + imult1616( k, num_freq_bands ), num_freq_bands, sub( q_com, q_cy_auto_dir_smooth_prev_local[k] ) );
        q_cy_auto_dir_smooth_local[k] = q_cy_auto_dir_smooth_prev_local[k] = q_com;
        move16();
        move16();
    }


    p_cy_cross_dir_smooth = h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx;
    p_cy_cross_dir_smooth_prev = h_dirac_output_synthesis_state->cy_cross_dir_smooth_prev_fx;
@@ -3331,16 +3346,16 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
            move32();
            g2 = L_sub( ONE_IN_Q31, g1 ); // Q31
            *( p_cy_auto_dir_smooth_prev ) = L_add( Mpy_32_32( g1, ( *( p_cy_auto_dir_smooth++ ) ) ),
                                                    Mpy_32_32( g2, ( *( p_cy_auto_dir_smooth_prev ) ) ) ); // (Q31, q_cy_auto_dir_smooth_prev) -> q_cy_auto_dir_smooth_prev
                                                    Mpy_32_32( g2, ( *( p_cy_auto_dir_smooth_prev ) ) ) ); // (Q31, q_cy_auto_dir_smooth_prev_local) -> q_cy_auto_dir_smooth_prev_local
            move32();
            *( p_cy_cross_dir_smooth_prev ) = L_add( Mpy_32_32( g1, ( *( p_cy_cross_dir_smooth++ ) ) ),
                                                     Mpy_32_32( g2, ( *( p_cy_cross_dir_smooth_prev ) ) ) ); // (Q31, q_cy_cross_dir_smooth_prev) -> q_cy_cross_dir_smooth_prev
            move32();

            power_smooth_temp = L_shl( *p_power_smooth, norm_l( *p_power_smooth ) );
            L_tmp = Mpy_32_32( power_smooth_temp, ( *( p_cy_auto_dir_smooth_prev++ ) ) ); // proto_power_smooth_q + norm_l( *p_power_smooth ) ) + q_cy_auto_dir_smooth_prev - 31
            L_tmp = Mpy_32_32( power_smooth_temp, ( *( p_cy_auto_dir_smooth_prev++ ) ) ); // proto_power_smooth_q + norm_l( *p_power_smooth ) ) + q_cy_auto_dir_smooth_prev_local - 31
            exp = sub( Q31, sub( add( add( h_dirac_output_synthesis_state->proto_power_smooth_q, norm_l( *p_power_smooth ) ),
                                      h_dirac_output_synthesis_state->q_cy_auto_dir_smooth_prev ),
                                      q_cy_auto_dir_smooth_prev_local[k] ),
                                 Q31 ) );
            p_power_smooth++;

@@ -3406,17 +3421,21 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
            g1 = alpha[l]; // Q31
            move32();
            g2 = L_sub( ONE_IN_Q31, g1 ); // Q31
            *( p_cy_auto_dir_smooth_prev ) = L_add( Mpy_32_32( g1, ( *( p_cy_auto_dir_smooth++ ) ) ),
                                                    Mpy_32_32( g2, ( *( p_cy_auto_dir_smooth_prev ) ) ) ); // (Q31, q_cy_auto_dir_smooth_prev) -> q_cy_auto_dir_smooth_prev
            W_temp = W_add( W_mult_32_32( g1, ( *( p_cy_auto_dir_smooth++ ) ) ),
                            W_mult_32_32( g2, ( *( p_cy_auto_dir_smooth_prev ) ) ) );
            q_tmp = W_norm( W_temp );
            L_tmp = W_extract_h( W_shl( W_temp, q_tmp ) ); // q_cy_auto_dir_smooth_prev_local + q_tmp
            *( p_cy_auto_dir_smooth_prev++ ) = L_shr_r( L_tmp, q_tmp );

            move32();
            *( p_cy_cross_dir_smooth_prev ) = L_add( Mpy_32_32( g1, ( *( p_cy_cross_dir_smooth++ ) ) ),
                                                     Mpy_32_32( g2, ( *( p_cy_cross_dir_smooth_prev ) ) ) ); // (Q31, q_cy_cross_dir_smooth_prev) -> q_cy_cross_dir_smooth_prev
            move32();

            power_smooth_temp = L_shl( *p_power_smooth, norm_l( *p_power_smooth ) );
            L_tmp = Mpy_32_32( power_smooth_temp, ( *( p_cy_auto_dir_smooth_prev++ ) ) ); // proto_power_smooth_q + norm_l( *p_power_smooth ) ) + q_cy_auto_dir_smooth_prev - 31
            L_tmp = Mpy_32_32( power_smooth_temp, L_tmp ); // proto_power_smooth_q + norm_l( *p_power_smooth ) ) + q_cy_auto_dir_smooth_prev_local - 31
            exp = sub( Q31, sub( add( add( h_dirac_output_synthesis_state->proto_power_smooth_q, norm_l( *p_power_smooth ) ),
                                      h_dirac_output_synthesis_state->q_cy_auto_dir_smooth_prev ),
                                      add( q_cy_auto_dir_smooth_prev_local[k], q_tmp ) ),
                                 Q31 ) );

            *( p_gains_dir ) = Sqrt32( L_tmp, &exp ); // (Q31 - exp)
@@ -3498,10 +3517,12 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
            FOR( l = 0; l < num_freq_bands; l++ )
            {
                g = L_add( Mpy_32_32( g1, *( p_gain_1++ ) ), Mpy_32_32( g2, *( p_gain_2++ ) ) ); // (Q31, gains_dir_prev_q) -> gains_dir_prev_q
                RealBuffer[k][buf_idx][l] = Mpy_32_32( g, ( *( p_power_smooth++ ) ) );           // (gains_dir_prev_q, q_proto_direct_buffer) -> gains_dir_prev_q + q_proto_direct_buffer - 31
                move32();
                ImagBuffer[k][buf_idx][l] = Mpy_32_32( g, ( *( p_power_smooth++ ) ) ); // (gains_dir_prev_q, q_proto_direct_buffer) -> gains_dir_prev_q + q_proto_direct_buffer - 31
                move32();

                Cldfb_RealBuffer64_fx[k][buf_idx][l] = W_mult0_32_32( g, ( *( p_power_smooth++ ) ) ); // (gains_dir_prev_q, q_proto_direct_buffer) -> gains_dir_prev_q + q_proto_direct_buffer
                move64();

                Cldfb_ImagBuffer64_fx[k][buf_idx][l] = W_mult0_32_32( g, ( *( p_power_smooth++ ) ) ); // (gains_dir_prev_q, q_proto_direct_buffer) -> gains_dir_prev_q + q_proto_direct_buffer
                move64();
            }
        }

@@ -3518,12 +3539,23 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
            FOR( l = 0; l < h_dirac_output_synthesis_params->max_band_decorr; l++ )
            {
                g = L_add( Mpy_32_32( g1, *( p_gain_1++ ) ), Mpy_32_32( g2, *( p_gain_2++ ) ) ); // (Q31, gains_diff_prev_q) -> gains_diff_prev_q
                RealBuffer[k][buf_idx][l] = L_add( L_shr( RealBuffer[k][buf_idx][l], Q1 ),
                                                   L_shr( Mpy_32_32( g, ( *( p_power_smooth_diff++ ) ) ), sub( Q1, q_align ) ) ); // (gains_diff_prev_q, q_proto_direct_buffer) >> Q1 -> gains_diff_prev_q + q_proto_direct_buffer - 32
                move32();
                ImagBuffer[k][buf_idx][l] = L_add( L_shr( ImagBuffer[k][buf_idx][l], Q1 ),
                                                   L_shr( Mpy_32_32( g, ( *( p_power_smooth_diff++ ) ) ), sub( Q1, q_align ) ) ); // (gains_diff_prev_q, q_proto_direct_buffer) >> Q1 -> gains_diff_prev_q + q_proto_direct_buffer - 32
                move32();
                Cldfb_RealBuffer64_fx[k][buf_idx][l] = W_add( Cldfb_RealBuffer64_fx[k][buf_idx][l],
                                                              W_shr( W_mult0_32_32( g, ( *( p_power_smooth_diff++ ) ) ), negate( q_align ) ) ); // (gains_diff_prev_q, q_proto_direct_buffer) -> gains_diff_prev_q + q_proto_direct_buffer
                move64();

                if ( LT_64( W_temp, W_abs( Cldfb_RealBuffer64_fx[k][buf_idx][l] ) ) )
                {
                    W_temp = W_abs( Cldfb_RealBuffer64_fx[k][buf_idx][l] );
                }

                Cldfb_ImagBuffer64_fx[k][buf_idx][l] = W_add( Cldfb_ImagBuffer64_fx[k][buf_idx][l],
                                                              W_shr( W_mult0_32_32( g, ( *( p_power_smooth_diff++ ) ) ), negate( q_align ) ) ); // (gains_diff_prev_q, q_proto_direct_buffer) -> gains_diff_prev_q + q_proto_direct_buffer
                move64();

                if ( LT_64( W_temp, W_abs( Cldfb_ImagBuffer64_fx[k][buf_idx][l] ) ) )
                {
                    W_temp = W_abs( Cldfb_ImagBuffer64_fx[k][buf_idx][l] );
                }
            }

            /*Direct proto*/
@@ -3534,16 +3566,42 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
            FOR( ; l < num_freq_bands; l++ )
            {
                g = L_add( Mpy_32_32( g1, *( p_gain_1++ ) ), Mpy_32_32( g2, *( p_gain_2++ ) ) ); // (Q31, gains_diff_prev_q) -> gains_diff_prev_q
                RealBuffer[k][buf_idx][l] = L_add( L_shr( RealBuffer[k][buf_idx][l], Q1 ),
                                                   L_shr( Mpy_32_32( g, ( *( p_power_smooth++ ) ) ), Q1 ) ); // (gains_diff_prev_q, q_proto_direct_buffer) >> Q1 -> gains_diff_prev_q + q_proto_direct_buffer - 32
                Cldfb_RealBuffer64_fx[k][buf_idx][l] = W_add( Cldfb_RealBuffer64_fx[k][buf_idx][l],
                                                              W_mult0_32_32( g, ( *( p_power_smooth++ ) ) ) ); // (gains_diff_prev_q, q_proto_direct_buffer) >> Q1 -> gains_diff_prev_q + q_proto_direct_buffer - 32
                move64();

                if ( LT_64( W_temp, W_abs( Cldfb_RealBuffer64_fx[k][buf_idx][l] ) ) )
                {
                    W_temp = W_abs( Cldfb_RealBuffer64_fx[k][buf_idx][l] );
                }

                Cldfb_ImagBuffer64_fx[k][buf_idx][l] = W_add( Cldfb_ImagBuffer64_fx[k][buf_idx][l],
                                                              W_mult0_32_32( g, ( *( p_power_smooth++ ) ) ) ); // (gains_diff_prev_q, q_proto_direct_buffer) >> Q1 -> gains_diff_prev_q + q_proto_direct_buffer - 32
                move64();

                if ( LT_64( W_temp, W_abs( Cldfb_ImagBuffer64_fx[k][buf_idx][l] ) ) )
                {
                    W_temp = W_abs( Cldfb_ImagBuffer64_fx[k][buf_idx][l] );
                }
            }
        }
    }
    q_align = W_norm( W_temp );
    FOR( buf_idx = 0; buf_idx < nbslots; ++buf_idx )
    {
        FOR( k = 0; k < nchan_out_woLFE; k++ )
        {
            FOR( l = 0; l < num_freq_bands; l++ )
            {
                RealBuffer[k][buf_idx][l] = W_extract_h( W_shl( Cldfb_RealBuffer64_fx[k][buf_idx][l], q_align ) );
                move32();
                ImagBuffer[k][buf_idx][l] = L_add( L_shr( ImagBuffer[k][buf_idx][l], Q1 ),
                                                   L_shr( Mpy_32_32( g, ( *( p_power_smooth++ ) ) ), Q1 ) ); // (gains_diff_prev_q, q_proto_direct_buffer) >> Q1 -> gains_diff_prev_q + q_proto_direct_buffer - 32
                ImagBuffer[k][buf_idx][l] = W_extract_h( W_shl( Cldfb_ImagBuffer64_fx[k][buf_idx][l], q_align ) );
                move32();
            }
        }
    }
    *q_Cldfb = sub( sub( add( h_dirac_output_synthesis_state->proto_direct_buffer_f_q, h_dirac_output_synthesis_state->gains_dir_prev_q ), Q31 ), Q1 );

    *q_Cldfb = sub( add( add( h_dirac_output_synthesis_state->proto_direct_buffer_f_q, h_dirac_output_synthesis_state->gains_dir_prev_q ), q_align ), 32 );
    move16();

    /*-----------------------------------------------------------------*
@@ -3561,9 +3619,21 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
        set_zero_fx( h_dirac_output_synthesis_state->proto_power_diff_smooth_fx, h_dirac_output_synthesis_params->max_band_decorr * nchan_out_woLFE );
    }

    minimum_fx( q_cy_auto_dir_smooth_prev_local, nchan_out_woLFE, &h_dirac_output_synthesis_state->q_cy_auto_dir_smooth_prev );
    FOR( k = 0; k < nchan_out_woLFE; k++ )
    {
        scale_sig32( h_dirac_output_synthesis_state->cy_auto_dir_smooth_prev_fx + ( k * num_freq_bands ), num_freq_bands, sub( h_dirac_output_synthesis_state->q_cy_auto_dir_smooth_prev, q_cy_auto_dir_smooth_prev_local[k] ) );
    }

    set_zero_fx( h_dirac_output_synthesis_state->cy_auto_dir_smooth_fx, imult1616( num_freq_bands, nchan_out_woLFE ) );
    h_dirac_output_synthesis_state->q_cy_auto_dir_smooth = 0;
    move16();
    set_zero_fx( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx, imult1616( num_freq_bands, nchan_out_woLFE ) );
    h_dirac_output_synthesis_state->q_cy_cross_dir_smooth = 0;
    move16();
    set_zero_fx( h_dirac_output_synthesis_state->cy_auto_diff_smooth_fx, imult1616( num_freq_bands, nchan_out_woLFE ) );
    h_dirac_output_synthesis_state->q_cy_auto_diff_smooth = 0;
    move16();

    pop_wmops();

@@ -5938,7 +6008,9 @@ static void computeTargetPSDs_direct_subframe_fx(
    Word32 *cy_cross_dir_smooth,
    Word16 *q_cy_cross_dir_smooth )
{
    Word16 ch_idx, cur_idx;
    Word16 ch_idx, cur_idx, i, q_tmp;
    Word64 W_tmp[CLDFB_NO_CHANNELS_MAX], W_max;
    set64_fx( W_tmp, 0, CLDFB_NO_CHANNELS_MAX );

    /* segment auxiliary buffer */
    Word32 direct_power[CLDFB_NO_CHANNELS_MAX]; /* size: num_freq_bands. */
@@ -5951,12 +6023,25 @@ static void computeTargetPSDs_direct_subframe_fx(
    {
        cur_idx = imult1616( ch_idx, num_freq_bands );

        v_mult_fixed( direct_power, &direct_responses_square[cur_idx], &cy_auto_dir_smooth[cur_idx], num_freq_bands ); // (q_reference_power, Q31) -> q_reference_power
        W_max = 0;
        move64();
        FOR( i = 0; i < num_freq_bands; i++ )
        {
            W_tmp[i] = W_mult_32_32( direct_power[i], direct_responses_square[cur_idx + i] ); // (q_reference_power, Q31) + 1
            move64();
            W_max = W_max < W_abs( W_tmp[i] ) ? W_abs( W_tmp[i] ) : W_max;
        }
        q_tmp = W_norm( W_max );
        FOR( i = 0; i < num_freq_bands; i++ )
        {
            cy_auto_dir_smooth[cur_idx + i] = W_extract_h( W_shl( W_tmp[i], q_tmp ) );
            move32();
        }
        q_cy_auto_dir_smooth[ch_idx] = add( *q_reference_power, q_tmp );
        move16();
        v_mult_fixed( direct_power, &direct_responses[cur_idx], &cy_cross_dir_smooth[cur_idx], num_freq_bands ); // (q_reference_power, Q31) -> q_reference_power
    }

    *q_cy_auto_dir_smooth = *q_reference_power;
    move16();
    *q_cy_cross_dir_smooth = *q_reference_power;
    move16();