Commit 0c45df10 authored by Tapani Pihlajakuja's avatar Tapani Pihlajakuja
Browse files

Merge branch 'float-1578-omasa-to-foa-hoa-rendering-has-spikes-in-float' into 'main'

[non-be] [rend-non-be] [split-non-be] Resolve "OMASA to FOA/HOA rendering has spikes in float"

See merge request !2595
parents 7e397bd1 bc8f8c5b
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -1216,7 +1216,12 @@ typedef enum
{
    MASA_STEREO_NOT_DEFINED,
    MASA_STEREO_SPACED_MICS,
#ifdef FIX_FLOAT_1578_OMASA_REND_SPIKES
    MASA_STEREO_DOWNMIX,
    MASA_DUAL_MONO
#else
    MASA_STEREO_DOWNMIX
#endif
} MASA_TRANSPORT_SIGNAL_TYPE;

typedef enum
+1 −0
Original line number Diff line number Diff line
@@ -182,6 +182,7 @@
#define FIX_1452_DEFAULT_REVERB                         /* Nokia/Philips/FhG: Fix default room presets and their usage in renderer */
#define FIX_FMSW_DEC                                    /* float issue 1542: fix JBM issue in format switching */
#define FIX_FMSW_DEC_2                                  /* float issue 1575: fix crash for format switching when bitsream starts with EVS */
#define FIX_FLOAT_1578_OMASA_REND_SPIKES                /* Nokia: Float issue 1578: Fix spikes and collapsed perception in OMASA/MASA rendering to FOA/HOA */

/* ##################### End NON-BE switches ########################### */

+4 −0
Original line number Diff line number Diff line
@@ -274,7 +274,11 @@ static ivas_error ivas_dirac_rend_config(
                return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC\n" ) );
            }
        }
#ifdef FIX_FLOAT_1578_OMASA_REND_SPIKES
        ivas_masa_init_stereotype_detection( hDirACRend->masa_stereo_type_detect, ivas_total_brate );
#else
        ivas_masa_init_stereotype_detection( hDirACRend->masa_stereo_type_detect );
#endif
    }
    else
    {
+7 −0
Original line number Diff line number Diff line
@@ -1761,6 +1761,13 @@ void ivas_dirac_dec_compute_directional_responses(
                                direct_response[1] = 1.0f;
                            }
                        }
#ifdef FIX_FLOAT_1578_OMASA_REND_SPIKES
                        else if ( transport_signal_type == MASA_DUAL_MONO )
                        {
                            direct_response[0] = 1.0f;
                            /* direct_response[1] is not adjusted for dual mono input */
                        }
#endif
                        else
                        {
                            set_f( direct_response, 1.0f, hDirACRend->num_protos_ambi );
+227 −0
Original line number Diff line number Diff line
@@ -45,6 +45,15 @@
#endif
#include "wmc_auto.h"

#ifdef FIX_FLOAT_1578_OMASA_REND_SPIKES
/*------------------------------------------------------------------------------------------*
 * Local constants
 *------------------------------------------------------------------------------------------*/

/* Constants for MASA dual mono detection */
#define MASA_DUAL_MONO_TAU1 0.01562500000000000f /* 2^-6, which is about -18 dB */
#define MASA_DUAL_MONO_TAU2 0.00000762939453125f /* 2^-17, which is about -51 dB */
#endif

/*-------------------------------------------------------------------------
 * ivas_dirac_allocate_parameters()
@@ -1328,9 +1337,23 @@ void protoSignalComputation2(

    float interpolatorSpaced = 0.0f;
    float interpolatorDmx = 1.0f;
#ifdef FIX_FLOAT_1578_OMASA_REND_SPIKES
    float interpolatorDualMono = 0.0f;
    int16_t max_band_diff_ene;
#endif

    int16_t dipole_freq_range[2];
#ifdef FIX_FLOAT_1578_OMASA_REND_SPIKES
    float tempSpaced, tempDmx, tempDualMono;

    max_band_diff_ene = IVAS_CLDFB_NO_CHANNELS_MAX;
    if ( stereo_type_detect != NULL )
    {
        max_band_diff_ene = min( stereo_type_detect->max_band_diff_ene, num_freq_bands ); /* Local version for correct maximum */
    }
#else
    float tempSpaced, tempDmx;
#endif

    if ( isloudspeaker )
    {
@@ -1381,6 +1404,11 @@ void protoSignalComputation2(
        right_hi_power = 0.0f;
        total_hi_power = 0.0f;

#ifdef FIX_FLOAT_1578_OMASA_REND_SPIKES
        stereo_type_detect->inst_diff_bb_power = 0.0f;
        stereo_type_detect->inst_total_bb_power = 0.0f;

#endif
        dipole_freq_range[0] = stereo_type_detect->dipole_freq_range[0];
        dipole_freq_range[1] = stereo_type_detect->dipole_freq_range[1];

@@ -1395,11 +1423,27 @@ void protoSignalComputation2(
            {
                interpolatorSpaced = ( (float) ( stereo_type_detect->interpolator ) ) / ( (float) MASA_STEREO_INTERPOLATION_SLOTS );
                interpolatorDmx = 1.0f - interpolatorSpaced;
#ifdef FIX_FLOAT_1578_OMASA_REND_SPIKES
                interpolatorDualMono = interpolatorDmx;
#endif
            }
#ifdef FIX_FLOAT_1578_OMASA_REND_SPIKES
            else if ( stereo_type_detect->type_change_direction == MASA_DUAL_MONO )
            {
                interpolatorDualMono = ( (float) ( stereo_type_detect->interpolator ) ) / ( (float) MASA_STEREO_INTERPOLATION_SLOTS );
                interpolatorDmx = 1.0f - interpolatorDualMono;
                interpolatorSpaced = interpolatorDmx;
            }
            else /* MASA_STEREO_DOWNMIX */
#else
            else
#endif
            {
                interpolatorDmx = ( (float) ( stereo_type_detect->interpolator ) ) / ( (float) MASA_STEREO_INTERPOLATION_SLOTS );
                interpolatorSpaced = 1.0f - interpolatorDmx;
#ifdef FIX_FLOAT_1578_OMASA_REND_SPIKES
                interpolatorDualMono = interpolatorSpaced;
#endif
            }
        }

@@ -1439,18 +1483,39 @@ void protoSignalComputation2(
                sum_total_ratio[l] = stereo_type_detect->sum_power[l] / ( stereo_type_detect->total_power[l] + EPSILON );
            }

#ifdef FIX_FLOAT_1578_OMASA_REND_SPIKES
            if ( l < max_band_diff_ene )
            {
                RealSubtract = RealBuffer[0][0][l] - RealBuffer[1][0][l];
                ImagSubtract = ImagBuffer[0][0][l] - ImagBuffer[1][0][l];
                stereo_type_detect->inst_diff_bb_power += RealSubtract * RealSubtract + ImagSubtract * ImagSubtract;

                if ( l == max_band_diff_ene - 1 )
                {
                    /* Stores sum of reference power (i.e., sum of transport channels) up to max_band_diff_ene - 1. */
                    stereo_type_detect->inst_total_bb_power = total_bb_power; /* Total energy before smoothing */
                }

                if ( l == 0 )
                {
                    stereo_type_detect->subtract_power_y += RealSubtract * RealSubtract + ImagSubtract * ImagSubtract;
                }
            }
#else
            if ( l == 0 )
            {
                RealSubtract = RealBuffer[0][0][l] - RealBuffer[1][0][l];
                ImagSubtract = ImagBuffer[0][0][l] - ImagBuffer[1][0][l];
                stereo_type_detect->subtract_power_y += RealSubtract * RealSubtract + ImagSubtract * ImagSubtract;
            }
#endif

            /* Compute protos (and their power) for direct sound rendering */

            /* W prototype */
            if ( stereo_type_detect->interpolator > 0 )
            {
#ifndef FIX_FLOAT_1578_OMASA_REND_SPIKES
                if ( l < ( dipole_freq_range[1] - 1 ) || l >= MASA_SUM_PROTO_START_BIN )
                {
                    Real_aux = interpolatorSpaced * 0.5f * Real_aux + interpolatorDmx * Real_aux;
@@ -1467,6 +1532,59 @@ void protoSignalComputation2(
                    p_proto_buffer[2 * l] = interpolatorSpaced * RealBuffer[0][0][l] + interpolatorDmx * Real_aux;
                    p_proto_buffer[2 * l + 1] = interpolatorSpaced * ImagBuffer[0][0][l] + interpolatorDmx * Imag_aux;
                }
#else
                if ( ( stereo_type_detect->type_change_direction == MASA_STEREO_SPACED_MICS && stereo_type_detect->current_stereo_type == MASA_STEREO_DOWNMIX ) ||
                     ( stereo_type_detect->type_change_direction == MASA_STEREO_DOWNMIX && stereo_type_detect->current_stereo_type == MASA_STEREO_SPACED_MICS ) )
                {
                    if ( l < ( dipole_freq_range[1] - 1 ) || l >= MASA_SUM_PROTO_START_BIN )
                    {
                        Real_aux = interpolatorSpaced * 0.5f * Real_aux + interpolatorDmx * Real_aux;
                        Imag_aux = interpolatorSpaced * 0.5f * Imag_aux + interpolatorDmx * Imag_aux;
                        proto_power_smooth[l] += Real_aux * Real_aux + Imag_aux * Imag_aux;
                        p_proto_buffer[2 * l] = Real_aux;
                        p_proto_buffer[2 * l + 1] = Imag_aux;
                    }
                    else
                    {
                        tempSpaced = RealBuffer[0][0][l] * RealBuffer[0][0][l] + ImagBuffer[0][0][l] * ImagBuffer[0][0][l];
                        tempDmx = Real_aux * Real_aux + Imag_aux * Imag_aux;
                        proto_power_smooth[l] += interpolatorSpaced * tempSpaced + interpolatorDmx * tempDmx;
                        p_proto_buffer[2 * l] = interpolatorSpaced * RealBuffer[0][0][l] + interpolatorDmx * Real_aux;
                        p_proto_buffer[2 * l + 1] = interpolatorSpaced * ImagBuffer[0][0][l] + interpolatorDmx * Imag_aux;
                    }
                }
                else if ( ( stereo_type_detect->type_change_direction == MASA_STEREO_SPACED_MICS && stereo_type_detect->current_stereo_type == MASA_DUAL_MONO ) ||
                          ( stereo_type_detect->type_change_direction == MASA_DUAL_MONO && stereo_type_detect->current_stereo_type == MASA_STEREO_SPACED_MICS ) )
                {
                    if ( l < ( dipole_freq_range[1] - 1 ) || l >= MASA_SUM_PROTO_START_BIN )
                    {
                        Real_aux *= 0.5f;
                        Imag_aux *= 0.5f;
                        proto_power_smooth[l] += Real_aux * Real_aux + Imag_aux * Imag_aux;
                        p_proto_buffer[2 * l] = Real_aux;
                        p_proto_buffer[2 * l + 1] = Imag_aux;
                    }
                    else
                    {
                        tempSpaced = RealBuffer[0][0][l] * RealBuffer[0][0][l] + ImagBuffer[0][0][l] * ImagBuffer[0][0][l];
                        Real_aux *= 0.5f;
                        Imag_aux *= 0.5f;
                        tempDualMono = Real_aux * Real_aux + Imag_aux * Imag_aux;
                        proto_power_smooth[l] += interpolatorSpaced * tempSpaced + interpolatorDualMono * tempDualMono;
                        p_proto_buffer[2 * l] = interpolatorSpaced * RealBuffer[0][0][l] + interpolatorDualMono * Real_aux;
                        p_proto_buffer[2 * l + 1] = interpolatorSpaced * ImagBuffer[0][0][l] + interpolatorDualMono * Imag_aux;
                    }
                }
                else /* MASA_STEREO_DOWNMIX <-> MASA_DUAL_MONO */
                {
                    /* Both use same proto but dual mono has 0.5 scaling */
                    Real_aux = interpolatorDualMono * 0.5f * Real_aux + interpolatorDmx * Real_aux;
                    Imag_aux = interpolatorDualMono * 0.5f * Imag_aux + interpolatorDmx * Imag_aux;
                    proto_power_smooth[l] += Real_aux * Real_aux + Imag_aux * Imag_aux;
                    p_proto_buffer[2 * l] = Real_aux;
                    p_proto_buffer[2 * l + 1] = Imag_aux;
                }
#endif
            }
            else if ( stereo_type_detect->masa_stereo_type == MASA_STEREO_SPACED_MICS )
            {
@@ -1485,7 +1603,19 @@ void protoSignalComputation2(
                    p_proto_buffer[2 * l + 1] = ImagBuffer[0][0][l];
                }
            }
#ifdef FIX_FLOAT_1578_OMASA_REND_SPIKES
            else if ( stereo_type_detect->masa_stereo_type == MASA_DUAL_MONO )
            {
                Real_aux *= 0.5f;
                Imag_aux *= 0.5f;
                proto_power_smooth[l] += Real_aux * Real_aux + Imag_aux * Imag_aux;
                p_proto_buffer[2 * l] = Real_aux;
                p_proto_buffer[2 * l + 1] = Imag_aux;
            }
            else /* MASA_STEREO_DOWNMIX */
#else
            else
#endif
            {
                proto_power_smooth[l] += Real_aux * Real_aux + Imag_aux * Imag_aux;
                p_proto_buffer[2 * l] = Real_aux;
@@ -1495,6 +1625,7 @@ void protoSignalComputation2(
            /* Y prototype */
            if ( stereo_type_detect->interpolator > 0 )
            {
#ifndef FIX_FLOAT_1578_OMASA_REND_SPIKES
                if ( l < ( dipole_freq_range[0] ) )
                {
                    p_proto_buffer[2 * num_freq_bands + 2 * l] = interpolatorSpaced * p_proto_buffer[2 * l] + interpolatorDmx * ( RealBuffer[0][0][l] - RealBuffer[1][0][l] );
@@ -1510,6 +1641,51 @@ void protoSignalComputation2(
                    p_proto_buffer[2 * num_freq_bands + 2 * l] = interpolatorSpaced * p_proto_buffer[2 * l] + interpolatorDmx * ( RealBuffer[0][0][l] - RealBuffer[1][0][l] );
                    p_proto_buffer[2 * num_freq_bands + 2 * l + 1] = interpolatorSpaced * p_proto_buffer[2 * l + 1] + interpolatorDmx * ( ImagBuffer[0][0][l] - ImagBuffer[1][0][l] );
                }
#else
                if ( ( stereo_type_detect->type_change_direction == MASA_STEREO_SPACED_MICS && stereo_type_detect->current_stereo_type == MASA_STEREO_DOWNMIX ) ||
                     ( stereo_type_detect->type_change_direction == MASA_STEREO_DOWNMIX && stereo_type_detect->current_stereo_type == MASA_STEREO_SPACED_MICS ) )
                {
                    if ( l < ( dipole_freq_range[0] ) )
                    {
                        p_proto_buffer[2 * num_freq_bands + 2 * l] = interpolatorSpaced * p_proto_buffer[2 * l] + interpolatorDmx * ( RealBuffer[0][0][l] - RealBuffer[1][0][l] );
                        p_proto_buffer[2 * num_freq_bands + 2 * l + 1] = interpolatorSpaced * p_proto_buffer[2 * l + 1] + interpolatorDmx * ( ImagBuffer[0][0][l] - ImagBuffer[1][0][l] );
                    }
                    else if ( l < ( dipole_freq_range[1] ) )
                    {
                        p_proto_buffer[2 * num_freq_bands + 2 * l] = interpolatorSpaced * ( ImagBuffer[0][0][l] - ImagBuffer[1][0][l] ) + interpolatorDmx * ( RealBuffer[0][0][l] - RealBuffer[1][0][l] );
                        p_proto_buffer[2 * num_freq_bands + 2 * l + 1] = interpolatorSpaced * ( -( RealBuffer[0][0][l] - RealBuffer[1][0][l] ) ) + interpolatorDmx * ( ImagBuffer[0][0][l] - ImagBuffer[1][0][l] );
                    }
                    else
                    {
                        p_proto_buffer[2 * num_freq_bands + 2 * l] = interpolatorSpaced * p_proto_buffer[2 * l] + interpolatorDmx * ( RealBuffer[0][0][l] - RealBuffer[1][0][l] );
                        p_proto_buffer[2 * num_freq_bands + 2 * l + 1] = interpolatorSpaced * p_proto_buffer[2 * l + 1] + interpolatorDmx * ( ImagBuffer[0][0][l] - ImagBuffer[1][0][l] );
                    }
                }
                else if ( ( stereo_type_detect->type_change_direction == MASA_STEREO_SPACED_MICS && stereo_type_detect->current_stereo_type == MASA_DUAL_MONO ) ||
                          ( stereo_type_detect->type_change_direction == MASA_DUAL_MONO && stereo_type_detect->current_stereo_type == MASA_STEREO_SPACED_MICS ) )
                {
                    if ( l < ( dipole_freq_range[0] ) )
                    {
                        p_proto_buffer[2 * num_freq_bands + 2 * l] = p_proto_buffer[2 * l];
                        p_proto_buffer[2 * num_freq_bands + 2 * l + 1] = p_proto_buffer[2 * l + 1];
                    }
                    else if ( l < ( dipole_freq_range[1] ) )
                    {
                        p_proto_buffer[2 * num_freq_bands + 2 * l] = interpolatorSpaced * ( ImagBuffer[0][0][l] - ImagBuffer[1][0][l] ) + interpolatorDualMono * p_proto_buffer[2 * l];
                        p_proto_buffer[2 * num_freq_bands + 2 * l + 1] = interpolatorSpaced * ( -( RealBuffer[0][0][l] - RealBuffer[1][0][l] ) ) + interpolatorDualMono * p_proto_buffer[2 * l + 1];
                    }
                    else
                    {
                        p_proto_buffer[2 * num_freq_bands + 2 * l] = p_proto_buffer[2 * l];
                        p_proto_buffer[2 * num_freq_bands + 2 * l + 1] = p_proto_buffer[2 * l + 1];
                    }
                }
                else /* MASA_STEREO_DOWNMIX <-> MASA_DUAL_MONO */
                {
                    p_proto_buffer[2 * num_freq_bands + 2 * l] = interpolatorDualMono * p_proto_buffer[2 * l] + interpolatorDmx * ( RealBuffer[0][0][l] - RealBuffer[1][0][l] );
                    p_proto_buffer[2 * num_freq_bands + 2 * l + 1] = interpolatorDualMono * p_proto_buffer[2 * l + 1] + interpolatorDmx * ( ImagBuffer[0][0][l] - ImagBuffer[1][0][l] );
                }
#endif
                proto_power_smooth[l + num_freq_bands] += p_proto_buffer[2 * num_freq_bands + 2 * l] * p_proto_buffer[2 * num_freq_bands + 2 * l] + p_proto_buffer[2 * num_freq_bands + 2 * l + 1] * p_proto_buffer[2 * num_freq_bands + 2 * l + 1];
            }
            else if ( stereo_type_detect->masa_stereo_type == MASA_STEREO_SPACED_MICS )
@@ -1533,7 +1709,18 @@ void protoSignalComputation2(
                    proto_power_smooth[l + num_freq_bands] = proto_power_smooth[l];
                }
            }
#ifdef FIX_FLOAT_1578_OMASA_REND_SPIKES
            else if ( stereo_type_detect->masa_stereo_type == MASA_DUAL_MONO )
            {
                /* proto = W */
                p_proto_buffer[2 * num_freq_bands + 2 * l] = p_proto_buffer[2 * l];
                p_proto_buffer[2 * num_freq_bands + 2 * l + 1] = p_proto_buffer[2 * l + 1];
                proto_power_smooth[l + num_freq_bands] = proto_power_smooth[l];
            }
            else /* MASA_STEREO_DOWNMIX */
#else
            else
#endif
            {
                p_proto_buffer[2 * num_freq_bands + 2 * l] = RealBuffer[0][0][l] - RealBuffer[1][0][l];
                p_proto_buffer[2 * num_freq_bands + 2 * l + 1] = ImagBuffer[0][0][l] - ImagBuffer[1][0][l];
@@ -1821,7 +2008,12 @@ void computeDirectionAngles(
 *------------------------------------------------------------------------*/

void ivas_masa_init_stereotype_detection(
#ifdef FIX_FLOAT_1578_OMASA_REND_SPIKES
    MASA_STEREO_TYPE_DETECT *stereo_type_detect,
    int32_t ivas_total_brate )
#else
    MASA_STEREO_TYPE_DETECT *stereo_type_detect )
#endif
{
    stereo_type_detect->masa_stereo_type = MASA_STEREO_DOWNMIX;
    stereo_type_detect->current_stereo_type = MASA_STEREO_DOWNMIX;
@@ -1853,6 +2045,17 @@ void ivas_masa_init_stereotype_detection(
    stereo_type_detect->min_sum_total_ratio_db = 0.0f;
    stereo_type_detect->subtract_target_ratio_db = 0.0f;

#ifdef FIX_FLOAT_1578_OMASA_REND_SPIKES
    stereo_type_detect->inst_diff_bb_power = 0.0f;
    stereo_type_detect->inst_total_bb_power = 0.0f;

    stereo_type_detect->max_band_diff_ene = IVAS_CLDFB_NO_CHANNELS_MAX;
    if ( ivas_total_brate < IVAS_48k )
    {
        stereo_type_detect->max_band_diff_ene = 18;
    }

#endif
    return;
}

@@ -1879,7 +2082,23 @@ void ivas_masa_stereotype_detection(
    float min_sum_temp;
    float lr_total_bb_temp;
    float lr_total_hi_temp;
#ifdef FIX_FLOAT_1578_OMASA_REND_SPIKES
    int16_t change_to_dual_mono_selection;
    float diffPerSum;

    /* Determine if broadband energy and ratio between difference broadband energy and broadband energy indicate
     * that the signal type is dual mono */
    change_to_dual_mono_selection = 0;
    if ( stereo_type_detect->inst_total_bb_power > 1.0f )
    {
        diffPerSum = stereo_type_detect->inst_diff_bb_power / stereo_type_detect->inst_total_bb_power;
        if ( diffPerSum < ( stereo_type_detect->current_stereo_type == MASA_DUAL_MONO ? MASA_DUAL_MONO_TAU1 : MASA_DUAL_MONO_TAU2 ) )
        {
            change_to_dual_mono_selection = 1;
        }
    }

#endif
    /* Determine if the determined features match the spaced mic type */
    change_to_spaced_selection = 0;
    if ( subtract_target_ratio_db < -3.0f )
@@ -1933,7 +2152,15 @@ void ivas_masa_stereotype_detection(
    }
    else
    {
#ifdef FIX_FLOAT_1578_OMASA_REND_SPIKES
        if ( change_to_dual_mono_selection == 1 )
        {
            stereo_type_detect->masa_stereo_type = MASA_DUAL_MONO;
        }
        else if ( change_to_spaced_selection == 1 )
#else
        if ( change_to_spaced_selection == 1 )
#endif
        {
            stereo_type_detect->masa_stereo_type = MASA_STEREO_SPACED_MICS;
        }
Loading