Commit ce428ca7 authored by Adam Mills's avatar Adam Mills
Browse files

Adding in mono detection and processing to stop leakage

parent 481a7b16
Loading
Loading
Loading
Loading
Loading
+18 −0
Original line number Diff line number Diff line
@@ -3375,6 +3375,11 @@ void ivas_dirac_param_est_enc(
    const IVAS_FORMAT ivas_format ,
   const int16_t hodirac_flag,
   const int16_t nchan_fb_in
#ifdef FIX_527_SBA_MONO_INPUT
    ,
    int16_t *mono_frame_count,
    int16_t *dirac_mono_flag
#endif
 );


@@ -4327,6 +4332,9 @@ ivas_error ivas_spar_md_enc_process(
    const int16_t nchan_inp,
    const int16_t sba_order,                                    /* i  : Ambisonic (SBA) order                   */
    float *prior_mixer[IVAS_MAX_FB_MIXER_OUT_CH][IVAS_MAX_SPAR_FB_MIXER_IN_CH]                                  /* i  : prior mixer_matrix     */
#ifdef FIX_527_SBA_MONO_INPUT
    ,const int16_t dirac_mono_flag
#endif
);

void ivas_compute_spar_params(
@@ -4466,6 +4474,11 @@ void ivas_spar_update_md_hist(
    ivas_spar_md_dec_state_t *hMdDec                            /* i/o: SPAR MD decoder handle                  */
);

int16_t ivas_spar_chk_zero_coefs(
    Decoder_Struct *st_ivas,     /* i/o: IVAS decoder handle                                */
    const int16_t sba_order      /* i  : Ambisonic (SBA) order                              */
);

void ivas_spar_smooth_md_dtx(
    ivas_spar_md_dec_state_t *hMdDec,                           /* i/o: SPAR MD decoder handle                  */
    const int16_t num_bands_out,                                /* i  : number of output bands                  */
@@ -5345,6 +5358,11 @@ void computeReferencePower_enc(
    const IVAS_FORMAT ivas_format,                              /* i  : ivas_format                                     */
    int16_t ref_power_w,                                        /* i  : use 0 if hodirac is enabled                     */
    const int16_t nchan_ana                                     /* i  : number of analysis channels                     */
#ifdef FIX_527_SBA_MONO_INPUT
    ,
    int16_t *mono_frame_count,
    int16_t *dirac_mono_flag
#endif
);

ivas_error ivas_mono_dmx_renderer_open(
+3 −0
Original line number Diff line number Diff line
@@ -540,6 +540,9 @@ typedef struct ivas_masa_qmetadata_frame_struct
    int16_t metadata_max_bits; /* maximum allowed number of bits for metadata per frame */
    uint8_t useLowerRes;
    uint8_t useLowerBandRes;
#ifdef FIX_527_SBA_MONO_INPUT
    int16_t dirac_mono_flag;
#endif
    IVAS_SURROUND_COHERENCE_BAND_DATA *surcoh_band_data;

    /* Additional helper values to include all data required for writing to output file */
+2 −1
Original line number Diff line number Diff line
@@ -210,8 +210,9 @@
#define FIX_626_VARIABLE_TYPE_MDCT_CONC                 /* FhG: trivial fix to fix USAN error */
#define FIX_616_DIV_ZERO_MCT                            /*FhG : Fix UBSAN division by zero error of issue 616*/

/* ################## End BE DEVELOPMENT switches ######################### */
#define FIX_527_SBA_MONO_INPUT                          /*Dlb : Fix for mono content in a HOA input format */

/* ################## End BE DEVELOPMENT switches ######################### */

/* #################### Start NON-BE CR switches ########################## */
/* any switch which is non-be wrt operation points tested in selection */
+120 −0
Original line number Diff line number Diff line
@@ -725,6 +725,63 @@ static ivas_error ivas_spar_set_dec_config(
    return IVAS_ERR_OK;
}

/*-----------------------------------------------------------------------------------------*
 * Function ivas_dec_mono_sba_handling()
 *
 *
 *-----------------------------------------------------------------------------------------*/

static void ivas_dec_mono_sba_handling(
    Decoder_Struct *st_ivas, /* i/o: IVAS decoder handle                                */
    const int16_t sba_order  /* i  : Ambisonic (SBA) order                              */
)
{
    int16_t mono_flag, b, block;

    mono_flag = 1;

    for ( b = 0; b < st_ivas->hQMetaData->q_direction[0].cfg.nbands; b++ )
    {
        for ( block = 0; block < MAX_PARAM_SPATIAL_SUBFRAMES; ++block )
        {
            float azimuth = st_ivas->hQMetaData->q_direction[0].band_data[b].azimuth[block];
            float elevation = st_ivas->hQMetaData->q_direction[0].band_data[b].azimuth[block];
            float energy_ratio = st_ivas->hQMetaData->q_direction[0].band_data[0].energy_ratio[block];
            if (
                ( azimuth != 0.0f ) ||
                ( elevation != 0.0f ) ||
                ( energy_ratio > 0.15f ) ) /* 0.15f is just above the lowest quantised value. */
            {
                mono_flag = 0;
            }
        }
    }
    /* Combine the SPAR prediction coefs flag with the azimuth, elevation and energy ratio flag.*/
    mono_flag = mono_flag && ivas_spar_chk_zero_coefs( st_ivas, sba_order );

    if ( mono_flag )
    {
        /* Set Energy Ratio values to be zero */
        for ( b = 0; b < st_ivas->hQMetaData->q_direction[0].cfg.nbands; b++ )
        {
            set_zero( st_ivas->hQMetaData->q_direction[0].band_data[b].energy_ratio, MAX_PARAM_SPATIAL_SUBFRAMES );
        }
        if ( st_ivas->hDirAC != NULL )
        {
            for ( block = 0; block < st_ivas->hSpatParamRendCom->dirac_md_buffer_length; ++block )
            {
                /* Set directional Energy Ratio values to be zero */
                set_zero( st_ivas->hSpatParamRendCom->energy_ratio1[block], st_ivas->hSpatParamRendCom->num_freq_bands );
                if ( st_ivas->hQMetaData->no_directions == 2 )
                {
                    set_zero( st_ivas->hSpatParamRendCom->energy_ratio2[block], st_ivas->hSpatParamRendCom->num_freq_bands );
                }
                /* Set Diffuseness values to be 1.0 */
                set_f( st_ivas->hSpatParamRendCom->diffuseness_vector[block], 1.0f, st_ivas->hSpatParamRendCom->num_freq_bands );
            }
        }
    }
}

/*-----------------------------------------------------------------------------------------*
 * Function ivas_spar_md_dec_process()
@@ -764,6 +821,10 @@ void ivas_spar_md_dec_process(
#endif
                               st_ivas->hQMetaData->sba_inactive_mode, st_ivas->last_active_ivas_total_brate );

#ifdef FIX_527_SBA_MONO_INPUT
    ivas_dec_mono_sba_handling( st_ivas, sba_order );
#endif

#if 0
    {
        char f_name[100];
@@ -1090,7 +1151,66 @@ void ivas_spar_md_dec_process(

    return;
}
/*-----------------------------------------------------------------------------------------*
 * Function ivas_spar_chk_zero_coefs()
 *
 * Check for zeroed SPAR coefficients
 *-----------------------------------------------------------------------------------------*/
int16_t ivas_spar_chk_zero_coefs(
    Decoder_Struct *st_ivas, /* i/o: IVAS decoder handle                                */
    const int16_t sba_order  /* i  : Ambisonic (SBA) order                              */
)
{
    int16_t j, k, b, i_ts;
    ivas_spar_md_dec_state_t *hMdDec;
    int16_t num_md_sub_frames;
    int16_t mono = 1;

    hMdDec = st_ivas->hSpar->hMdDec;

    num_md_sub_frames = ivas_get_spar_dec_md_num_subframes( sba_order, st_ivas->hDecoderConfig->ivas_total_brate
#ifdef VLBR_20MS_MD
                                                            ,
                                                            st_ivas->last_active_ivas_total_brate
#endif
    );

    int16_t ndec = hMdDec->spar_md_cfg.num_decorr_per_band[0];
    int16_t ndm = hMdDec->spar_md_cfg.num_dmx_chans_per_band[0];

    for ( i_ts = 0; i_ts < num_md_sub_frames; i_ts++ )
    {
        for ( b = 0; b < min( hMdDec->spar_md.num_bands, SPAR_DIRAC_SPLIT_START_BAND ); b++ )
        {
            for ( j = 0; j < ndm + ndec - 1; j++ )
            {
                // printf("%e, ", hMdDec->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].pred_re[j]);
                if ( hMdDec->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].pred_re[j] != 0.0f )
                {
                    mono = 0;
                }
            }
            for ( j = 0; j < ndec; j++ )
            {
                for ( k = 0; k < ndm - 1; k++ )
                {
                    if ( hMdDec->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].C_re[j][k] != 0.0f )
                    {
                        mono = 0;
                    }
                }
            }
            for ( j = 0; j < ndec; j++ )
            {
                if ( hMdDec->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].P_re[j] != 0.0f )
                {
                    mono = 0;
                }
            }
        }
    }
    return mono;
}

/*-----------------------------------------------------------------------------------------*
 * Function ivas_spar_smooth_md_dtx()
+171 −5
Original line number Diff line number Diff line
@@ -159,6 +159,7 @@ ivas_error ivas_dirac_enc_open(
    }

    hDirAC->index_buffer_intensity = 0;
    hDirAC->mono_frame_count = 0;

    st_ivas->hDirAC = hDirAC;
    st_ivas->hSpar->enc_param_start_band = st_ivas->hDirAC->hConfig->enc_param_start_band;
@@ -299,7 +300,13 @@ void ivas_dirac_enc(
    int16_t i, j, b, i_ts;
    push_wmops( "ivas_dirac_enc" );

    ivas_dirac_param_est_enc( hDirAC, hQMetaData->q_direction, hQMetaData->useLowerRes, data_f, ppIn_FR_real, ppIn_FR_imag, input_frame, ivas_format, hodirac_flag, hodirac_flag ? HOA2_CHANNELS : FOA_CHANNELS );
    ivas_dirac_param_est_enc( hDirAC, hQMetaData->q_direction, hQMetaData->useLowerRes, data_f, ppIn_FR_real, ppIn_FR_imag, input_frame, ivas_format, hodirac_flag, hodirac_flag ? HOA2_CHANNELS : FOA_CHANNELS
#ifdef FIX_527_SBA_MONO_INPUT
                              ,
                              &( hDirAC->mono_frame_count ),
                              &( hQMetaData->dirac_mono_flag )
#endif
    );

    if ( hQMetaData->q_direction->cfg.nbands > 0 )
    {
@@ -310,6 +317,22 @@ void ivas_dirac_enc(
            /* WB 4TC mode bit  : disable for now*/
            push_next_indice( hMetaData, 0, 1 );

#ifdef FIX_527_SBA_MONO_INPUT
            /* Set Energy Ratio to 0.0 if the mono flag is set, before the metadata is encoded */
            if ( hQMetaData->dirac_mono_flag )
            {
                for ( b = hQMetaData->q_direction->cfg.start_band; b < hQMetaData->q_direction->cfg.nbands; b++ )
                {
                    for ( i_ts = 0; i_ts < ( ( dtx_vad == 1 ) ? hQMetaData->q_direction[0].cfg.nblocks : 1 ); i_ts++ )
                    {
                        hQMetaData->q_direction[0].band_data[b].energy_ratio[i_ts] = 0.0f;
                        hQMetaData->q_direction[0].band_data[b].azimuth[i_ts] = 0.0f;
                        hQMetaData->q_direction[0].band_data[b].elevation[i_ts] = 0.0f;
                    }
                }
            }
#endif

            ivas_qmetadata_enc_encode( hMetaData, hQMetaData, hodirac_flag );
        }
        else
@@ -388,6 +411,125 @@ void ivas_dirac_enc(
    return;
}

/*-------------------------------------------------------------------------
 * ivas_dirac_get_mono_flag()
 *
 *
 *-------------------------------------------------------------------------*/

static int16_t ivas_dirac_get_mono_flag(
    const int16_t *band_grouping,                                       /* i    : Band grouping for estimation    */
    float Cldfb_RealBuffer[DIRAC_MAX_ANA_CHANS][DIRAC_NO_FB_BANDS_MAX], /* i    : Real part of input signal       */
    float Cldfb_ImagBuffer[DIRAC_MAX_ANA_CHANS][DIRAC_NO_FB_BANDS_MAX], /* i    : Imag part of input signal       */
    const int16_t nchan_ana,                                            /* i    : number of analysis channels     */
    int16_t *mono_frame_count )                                         /* i/o  : current number of mono frames count */
{
    int16_t brange[2];
    int16_t i, j, ch_idx;
    float other_ch_band_power;
    float W_band_power;
    int16_t any_mc_band = 0;
    int16_t any_mono_band = 0;
    int16_t local_mono_flag = 0;
    float W_band_power_norm;
    float threshold = 0;

    static const float THRESH_SILENCE = 3e4f;
    static const float NORM_FACTOR = 1e13f;
    static const float ONE_ON_NORM_FACTOR = 1 / 1e13f;
    static const float MAX_THRESH = 1e6f;
    static const float MIN_THRESH = 1e2f;
    static const float MONO_FRAME_THRESH = ( 15 ); /* 30ms */

    /* Banded Power Calculations */
    for ( i = 0; i < DIRAC_MAX_NBANDS; i++ )
    {
        W_band_power = 0;
        other_ch_band_power = 0;

        brange[0] = band_grouping[i];
        brange[1] = band_grouping[i + 1];

        /* Loop over the W channel bins to calculate the power in the band */
        for ( j = brange[0]; j < brange[1]; j++ )
        {
            W_band_power += ( Cldfb_RealBuffer[0][j] * Cldfb_RealBuffer[0][j] ) + ( Cldfb_ImagBuffer[0][j] * Cldfb_ImagBuffer[0][j] );
        }

        /* Loop over the other channels and bins to calculate the power in the band */
        for ( ch_idx = 1; ch_idx < nchan_ana; ch_idx++ )
        {
            /* abs()^2 */
            for ( j = brange[0]; j < brange[1]; j++ )
            {
                other_ch_band_power += ( Cldfb_RealBuffer[ch_idx][j] * Cldfb_RealBuffer[ch_idx][j] ) + ( Cldfb_ImagBuffer[ch_idx][j] * Cldfb_ImagBuffer[ch_idx][j] );
            }
        }
        if ( other_ch_band_power < EPSILON )
        {
            if ( W_band_power > THRESH_SILENCE )
            {
                any_mono_band = 1;
            }
        }
        else
        {
            if ( ( W_band_power > THRESH_SILENCE ) || ( other_ch_band_power > THRESH_SILENCE ) )
            {
                W_band_power_norm = min( W_band_power, NORM_FACTOR ) * ONE_ON_NORM_FACTOR;
                threshold = max( W_band_power_norm * MAX_THRESH, MIN_THRESH );
                if ( W_band_power / other_ch_band_power > threshold )
                {
                    any_mono_band = 1;
                }
                else
                {
                    any_mc_band = 1;
                }
            }
        }
    }
    /* If any band contains multi-channel content it's not mono */
    if ( any_mc_band )
    {
        local_mono_flag = 0;
    }
    else
    {
        /* If any band contains mono content the frame is mono. */
        if ( any_mono_band )
        {
            local_mono_flag = 1;
        }
    }
    /* Hysteresis - only after MONO_FRAME_THRESH frames on mono will the actual mono flag be set */
    if ( local_mono_flag )
    {
        if ( *mono_frame_count < MONO_FRAME_THRESH )
        {
            ( *mono_frame_count )++;
        }
    }
    else
    {
        /* Instantaneously disable actual mono flag if multi-channel content is observed */
        if ( any_mc_band )
        {
            *mono_frame_count = 0;
        }
    }

    /* Final check if there has been mono for MONO_FRAME_THRESH number of frames than the content is declared mono */
    if ( *mono_frame_count == MONO_FRAME_THRESH )
    {
        return 1;
    }
    else
    {
        return 0;
    }
}

/*-------------------------------------------------------------------------
 * computeReferencePower_enc()
 *
@@ -404,6 +546,11 @@ void computeReferencePower_enc(
    const IVAS_FORMAT ivas_format,                                      /* i  : ivas_format                     */
    int16_t ref_power_w,                                                /* i  : use 0 if hodirac is enabled     */
    const int16_t nchan_ana                                             /* i  : number of analysis channels     */
#ifdef FIX_527_SBA_MONO_INPUT
    ,
    int16_t *mono_frame_count, /* i/o: Mono Frame Count                */
    int16_t *dirac_mono_flag   /* i/o: Mono Flag                       */
#endif
)
{
    int16_t brange[2];
@@ -411,10 +558,18 @@ void computeReferencePower_enc(

    float reference_power_W[DIRAC_MAX_NBANDS];

#ifdef FIX_527_SBA_MONO_INPUT
    if ( dirac_mono_flag != NULL )
    {
        *dirac_mono_flag = ivas_dirac_get_mono_flag( band_grouping, Cldfb_RealBuffer, Cldfb_ImagBuffer, nchan_ana, mono_frame_count );
    }
#endif

    for ( i = 0; i < num_freq_bands; i++ )
    {
        brange[0] = band_grouping[i + enc_param_start_band];
        brange[1] = band_grouping[i + enc_param_start_band + 1];

        reference_power[i] = 0;

        reference_power_W[i] = 0;
@@ -446,7 +601,6 @@ void computeReferencePower_enc(
    return;
}


/*-------------------------------------------------------------------------
 * ivas_dirac_param_est_enc()
 *
@@ -463,7 +617,13 @@ void ivas_dirac_param_est_enc(
    const int16_t input_frame,
    const IVAS_FORMAT ivas_format,
    const int16_t hodirac_flag,
    const int16_t nchan_fb_in )
    const int16_t nchan_fb_in
#ifdef FIX_527_SBA_MONO_INPUT
    ,
    int16_t *mono_frame_count,
    int16_t *dirac_mono_flag
#endif
)
{
    int16_t i, d, ts, index, l_ts, num_freq_bands;
    int16_t band_m_idx, block_m_idx;
@@ -571,7 +731,13 @@ void ivas_dirac_param_est_enc(
                num_freq_bands,
                ivas_format,
                hodirac_flag ? 0 : 1,
                FOA_CHANNELS );
                FOA_CHANNELS
#ifdef FIX_527_SBA_MONO_INPUT
                ,
                mono_frame_count,
                dirac_mono_flag
#endif
            );

            computeIntensityVector_enc(
                hDirAC,
Loading