Commit 5653c89c authored by vaillancour's avatar vaillancour
Browse files

more comments in function stereo_tdm_ener_analysis()

parent ad0a24c3
Loading
Loading
Loading
Loading
+116 −17
Original line number Diff line number Diff line
@@ -239,13 +239,13 @@ int16_t stereo_tdm_ener_analysis(
        * Overwrite the LRTD decision flag and use the normal TD for the 
        * following cases:
        *1- The signals is already considered as S/M 
        *2- The LRTD flag is set to 1 and the last switching frame 
        *2- The LRTD decision flag is set to 1 and the last switching frame 
        *   happened more than 10 frames ago and both channel are considered 
        *   as inactive or the uncorrelated decision flag is 0 (not completely uncorrelated) 
        *   and the interfering talker score and the weighted interfering score 
        *   are low or the uncorrelated decision flag is 1 (likely uncorrelated) and 
        *   both channel are classified as unvoiced and the uncorrelated score is low
        *3- The LRTD flag is set to 0 and both channel are considered 
        *3- The LRTD decision flag is set to 0 and both channel are considered 
        *   as inactive or the uncorrelated decision flag is 0 (not completely uncorrelated) 
        *   and the interfering talker score and the weighted interfering score 
        *   are low or the uncorrelated decision flag is 1 (likely uncorrelated) and 
@@ -273,6 +273,15 @@ int16_t stereo_tdm_ener_analysis(
        }
    }

    /*----------------------------------------------------------------*
     * Changing the mixing ratio can lead to a feeling of moving sound, 
     * the same for changing primary and secondary channel. The next decide 
     * when it is the right time to allow for a change in mixing or to 
     * change the dominant channel
     * 
     * First a frame counter since LRTD decision is reseted or 
     * increased depending if the decision about LRTD is 1 or 0
     *----------------------------------------------------------------*/
    side_can_change = 0;

    /* update LRTD->DFT stereo hangover counters */
@@ -285,12 +294,36 @@ int16_t stereo_tdm_ener_analysis(
        hStereoTD->tdm_last_LRTD_frame_cnt++;
        hStereoTD->tdm_last_LRTD_frame_cnt = min( hStereoTD->tdm_last_LRTD_frame_cnt, 100 );
    }

    /*----------------------------------------------------------------*
     * If the last element mode was not IVAS, then changing the mixing 
     * ratio or the dominant channel is allowed automatically.
     *----------------------------------------------------------------*/
    if ( hCPE->last_element_mode != IVAS_CPE_TD )
    {
        side_can_change = 1;
    }

    /*----------------------------------------------------------------------------------*
     * If the LRTD decision flag is set to 1 and the left and right long term rms 
     * values are below some threshold computed previously 
     * or one of the channel is inactive and the other channel has a low energy (<12dB) 
     * or the speech/music classification is different between the 2 channel 
     * and
     *   Both channel rms are low, the difference between the channel rms is low and the 
     *   absolute difference between the L and R correlation to mono is above 0.30
     *   or 
     *   the difference of correlation between the channel is low (<0.15), the correlation 
     *   of the primary channel is between 0.7 and 0.85 and the difference between the 
     *   channel rms is low and the absolute difference between the L and R correlation 
     *   to mono is above 0.30
     * Else 
     * the correlation of both L and R are below 0.95 or L and R long term rms values are below 1000 
     * and L and R long term rms values are below 1000 some threshold computed previously 
     * or the input signal is considered as uncorrelated (LRTD mode =1) 
     * and the normal TD mode is used or
     * the full LRTD is used and 
     *   both L and R instantaneous rms are below the threshold computed above 
     *   or the speech/music classification is different between the 2 channel 
     *----------------------------------------------------------------------------------*/
    if ( hStereoTD->prev_fr_LRTD_TD_dec == 1 && side_can_change == 0 )
    {
        if ( ( hStereoTD->tdm_lt_rms_L <= rms_thd && hStereoTD->tdm_lt_rms_R <= 2 * rms_thd ) ||
@@ -311,13 +344,23 @@ int16_t stereo_tdm_ener_analysis(
    {
        if ( ( ( sts[0]->old_corr < CORR_THRES && sts[1]->old_corr < CORR_THRES ) || ( hStereoTD->tdm_lt_rms_L <= RMS_MIN2 && hStereoTD->tdm_lt_rms_R <= RMS_MIN2 ) ) &&
             ( ( ( hStereoTD->tdm_lt_rms_L <= rms_thd && hStereoTD->tdm_lt_rms_R <= 2 * rms_thd ) || ( hStereoTD->tdm_lt_rms_R <= rms_thd && hStereoTD->tdm_lt_rms_L <= 2 * rms_thd ) ) ||
               ( hCPE->hStereoClassif->lrtd_mode == 1 && ( sts[0]->tdm_LRTD_flag == 0 || ( sts[0]->tdm_LRTD_flag == 1 && ( ( rms_L < 2 * rms_thd && rms_R < 2 * rms_thd ) || ( sts[0]->hSpMusClas->past_dec[0] != sts[1]->hSpMusClas->past_dec[0] ) ) ) ) ) /* Even if the UNCLR is set to 1, the content should be encoded with TD, lower swichting requierment */
               ( hCPE->hStereoClassif->lrtd_mode == 1 && ( sts[0]->tdm_LRTD_flag == 0 || ( sts[0]->tdm_LRTD_flag == 1 && ( ( rms_L < 2 * rms_thd && rms_R < 2 * rms_thd ) 
               || ( sts[0]->hSpMusClas->past_dec[0] != sts[1]->hSpMusClas->past_dec[0] ) ) ) ) ) /* Even if the UNCLR is set to 1, the content should be encoded with TD, lower switching requirement */
               ) )
        {
            side_can_change = 1;
        }
    }

    /*----------------------------------------------------------------*
     * If the input signal is considered as uncorrelated (lrtd_mode == 1) 
     * and the interfering talker score is >= 0.05 
     * and it the normal TD mode is used
     * Then, change into the mixing is not allowed 
     * if last ivas mode was CPE MDCT or if down mix is skip, 
     * related values are initialized
     * else if change into the mixing is allowed or if it is the first frames 
     * the ratio of mixing will be computed 
     *----------------------------------------------------------------*/
    if ( hCPE->hStereoClassif->xtalk_wscore >= 0.05f && hStereoTD->prev_fr_LRTD_TD_dec == 0 && hCPE->hStereoClassif->lrtd_mode == 1 )
    {
        side_can_change = 0;
@@ -335,21 +378,38 @@ int16_t stereo_tdm_ener_analysis(
    }
    else if ( side_can_change || sts[1]->ini_frame <= 1 )
    {
       /*----------------------------------------------------------------*
        * First the difference between of the L channel correlation to mono 
        * and the right channel correlation to mono is bounded and linearized 
        * If the input signal is classified as uncorrelated 
        *   and the last mode is not TD or the last FD frame is < 4
        *   then, the ratio is re-initialized 
        * else 
        *   the mixing ratio is mapped into a cosine function to smooth the 
        *   transition from one extreme to the other
        *----------------------------------------------------------------*/
        ratio_L = max( diff_lt_corr, -RATIO_MAX );
        ratio_L = min( ratio_L, RATIO_MAX );
        ratio_L = 0.667f * ratio_L + 1.0f;

        if ( hCPE->hStereoClassif->lrtd_mode == 1 && ( hCPE->last_element_mode != IVAS_CPE_TD || hStereoTD->tdm_FD2LRTD_SW_cnt < 4 ) )
        {
            ratio_L = hCPE->hStereoTD->tdm_last_ratio; /* note: the last_ratio is set in before in stereo_set_tdm() */
            ratio_L = hCPE->hStereoTD->tdm_last_ratio; /* note: the last_ratio is set before in stereo_set_tdm() */
        }
        else
        {
            ratio_L = ( 1.0f - cosf( EVS_PI * ratio_L / 2.0f ) ) / 2.0f;
        }

       /*----------------------------------------------------------------*
        * in case of LRTD mode or a transition from fd to lrtd with 
        * an ica gain diverging from 0
        *----------------------------------------------------------------*/
        if ( hStereoTD->tdm_LRTD_flag == 1 || ( hCPE->hStereoClassif->lrtd_mode == 1 && ( hCPE->hStereoClassif->prev_lrtd_mode == 0 || abs( hCPE->hStereoTCA->indx_ica_gD - 20 ) > 2 ) ) )
        {
           /*----------------------------------------------------------------*
            * Series of if/else to ensure a small hysteresis and prevent the 
            * primary channel to switch between L and R on small variations 
            *----------------------------------------------------------------*/
            if ( ratio_L >= 0.53f ) /* small hysteresis is used to prevent undesired switching during inactive segment */
            {
                desired_idx = LRTD_STEREO_LEFT_IS_PRIM - 1;
@@ -366,8 +426,8 @@ int16_t stereo_tdm_ener_analysis(
            {
                desired_idx = LRTD_STEREO_RIGHT_IS_PRIM + 1;
            }

            if ( desired_idx != hStereoTD->tdm_prev_desired_idx && hStereoTD->tdm_last_LRTD_frame_cnt == 1 && sts[0]->last_coder_type <= UNVOICED ) /* TD transtionning to FD, we don't want an inversion of channels on the first transition frame */
            /* we don't want an inversion of channels on the first transition frame */
            if ( desired_idx != hStereoTD->tdm_prev_desired_idx && hStereoTD->tdm_last_LRTD_frame_cnt == 1 && sts[0]->last_coder_type <= UNVOICED ) 
            {
                desired_idx = hStereoTD->tdm_prev_desired_idx;
            }
@@ -379,6 +439,13 @@ int16_t stereo_tdm_ener_analysis(
        }
        else
        {
           /*----------------------------------------------------------------*
            * TD only, for high bitrate and inactive content, ratio mixing bound between 0.3,0.7
            * If ICA instantaneous target gain is > 1.2 and target gain > 1,
            * then, minimum ratio mixing value is set to 0.4
            * else if ICA instantaneous target gain is < 0.8 and target gain < 1,
            * then, maximum ratio mixing value is set to 0.6
            *----------------------------------------------------------------*/
            if ( hCPE->element_brate >= IVAS_48k && sts[0]->hVAD->hangover_cnt != 0 && max( hStereoTD->tdm_lt_rms_L, hStereoTD->tdm_lt_rms_R ) < 512.0f )
            {
                ratio_L = check_bounds( ratio_L, 0.3f, 0.7f );
@@ -392,7 +459,12 @@ int16_t stereo_tdm_ener_analysis(
            {
                ratio_L = 0.6f;
            }

           /*----------------------------------------------------------------*
            * ratio mixing is roughly quantized by finding the closest point 
            * in the Q table
            * then a smoothing in the quantization domain is performed to limit 
            * too high quantization steps
            *----------------------------------------------------------------*/
            dist = fabsf( ratio_L - tdm_ratio_tabl[0] );

            desired_idx = 0;
@@ -407,8 +479,13 @@ int16_t stereo_tdm_ener_analysis(

            idx = stereo_smooth_LR_transition( &hStereoTD->tdm_prev_stable_idx, &hStereoTD->tdm_ratio_transition_mov_flag, hStereoTD->tdm_last_ratio_idx, &hStereoTD->tdm_prev_desired_idx, &hStereoTD->tdm_ratio_transition_cnt, tdm_SM_flag_loc, desired_idx );

            /* Change the switching level in case of dual mono (in case the scenario still accept left right switching */
            /* This logic is needed in case the content is exactly the same in the 2 channel and it is expected to get back to LRTD, to prevent the secondary channel to be completely empty */
           /*----------------------------------------------------------------*
            * Change the switching level in case of dual mono (in case the 
            * scenario still accept left right switching 
            * This logic is needed in case the content is exactly the same in 
            * the 2 channel and it is expected to get back to LRTD, 
            * to prevent the secondary channel to be completely empty 
            *----------------------------------------------------------------*/
            if ( hCPE->hStereoClassif->lrtd_mode == 1 )
            {
                if ( idx <= LRTD_STEREO_MID_IS_PRIM )
@@ -429,7 +506,12 @@ int16_t stereo_tdm_ener_analysis(
    {
        idx = hStereoTD->tdm_last_ratio_idx;
    }

   /*----------------------------------------------------------------*
    * Quantizing the instantaneous mixing index that will be used for 
    * bit allocation in case of LRTD
    * If last FD frame was recent, then an equal bit allocation is forced
    * else closest point from the Q table is chosen 
    *----------------------------------------------------------------*/
    hStereoTD->tdm_inst_ratio_idx = LRTD_STEREO_RIGHT_IS_PRIM;
    tdm_LRTD_pri_side = -1;
    if ( hStereoTD->tdm_FD2LRTD_SW_cnt < 5 )
@@ -450,7 +532,12 @@ int16_t stereo_tdm_ener_analysis(
            }
        }
    }

   /*----------------------------------------------------------------*
    * In case noise is likely present, 
    * altered the bit allocation in favor of the secondary channel
    * If the class of both channel is > UNVOICED_CLASS, the bit allocation 
    * is bound between 5,25
    *----------------------------------------------------------------*/
    if ( ( sts[1]->lp_speech - sts[1]->lp_noise ) < 50.0f ) /* likely presence of noisy content */
    {
        /* pointing in the right direction, inverse it else do nothing */
@@ -471,7 +558,13 @@ int16_t stereo_tdm_ener_analysis(
    {
        desired_idx = check_bounds_s( desired_idx, 5, 25 );
    }

   /*----------------------------------------------------------------*
    * In case the down mixing is skip, force instantaneous ratio to mid 
    * and indicated the primary channel will be left
    * if LRTD is used ensure that the specific indexes are chosen 
    * and indicate which channel is primary 
    * else ensure that the specific LRTD indexes are not chosen
    *----------------------------------------------------------------*/
    hStereoTD->tdm_inst_ratio_idx = desired_idx;
    if ( /*hCPE->last_element_mode == IVAS_CPE_MDCT ||*/ hStereoTD->flag_skip_DMX == 1 )
    {
@@ -494,7 +587,9 @@ int16_t stereo_tdm_ener_analysis(
    {
        idx = limit_idx_Dwnmix( idx, ( hCPE->hStereoClassif->unclr_decision || ( sts[0]->flag_noisy_speech_snr == 1 && hCPE->hStereoClassif->xtalk_wscore > 0.1f ) ), desired_idx, hStereoTD->tdm_last_ratio_idx, hStereoTD->tdm_last_LRTD_PriCh_cnt, hStereoTD->tdm_last_LRTD_frame_cnt );
    }

   /*----------------------------------------------------------------*
    * reset counter of primary channel switching or increase counter
    *----------------------------------------------------------------*/
    if ( abs( hStereoTD->tdm_last_ratio_idx - idx ) > LRTD_STEREO_MID_IS_PRIM )
    {
        hStereoTD->tdm_last_LRTD_PriCh_cnt = 0;
@@ -505,6 +600,10 @@ int16_t stereo_tdm_ener_analysis(
    }
    ratio_L = tdm_ratio_tabl[idx];

   /*----------------------------------------------------------------*
    * Updates and NOOP specifics
    *----------------------------------------------------------------*/

    if ( hStereoTD->tdm_SM_modi_flag == 1 && hStereoTD->tdm_LRTD_flag == 0 )
    {
        idx = (int16_t) ( ( hStereoTD->tdm_last_ratio_idx + ( LRTD_STEREO_MID_IS_PRIM + 1 ) ) * 0.5 );