Commit bb3eca92 authored by Arthur Tritthart's avatar Arthur Tritthart
Browse files

Changes for IVAS-BASOP Ticket 1009: Complexity: High Complexity Overhead for...

Changes for IVAS-BASOP Ticket 1009: Complexity: High Complexity Overhead for ParamISM decoding to binaural

lib_com/basop_util.c:
Tuned instrumentation of 32-Bit division routine, now 24 bit accuracy
instead of 32 bit (cadence version)

lib_com/fft_fx.c:
Tuned instrumentation of small helper functions get_min_scalefactor and
L_norm_arr

lib_com/options.h:
Defined a macro for this fix 1009. It is only used in binaural renderer
for simplifiying divisions (32x32).

lib_com/tools.c:
Tuned instrumentation of small helper functions s_minimum etc.

lib_rend/ivas_dirac_dec_binaural_functions.c:
Defined precalculated values for EPSILON with full precision
Replaced division by constants by multiplications
Replaced square root of a division (division + sqrt) by ISqrt32+Mul
Simplified all matrix multiplication functions

Best regards
Arthur Tritthart, Fraunhofer IIS, 29-NOV-2024
parent 34e96575
Loading
Loading
Loading
Loading
Loading
+69 −0
Original line number Diff line number Diff line
@@ -1038,8 +1038,76 @@ Word32 div_w( Word32 L_num, Word32 L_den )
    }
}

Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bits)
{
    Word32 z;
    Word16 sx;
    Word16 sy;
    Word32 sign;
    Word16 iteration;
    Flag   Carry;
    Word16 s_val;

    unset_carry(&Carry);

    /* assert (x >= (Word32)0); */
    assert( y != (Word32) 0 );

    IF( x == (Word32) 0 )
    {
        *s = -31;
        move16();
        return ( (Word32) 0 );
    }

    sign = L_shr(L_xor(x,y), 31);

    sx = norm_l( x );
    x = L_shl( x, sx );
    x = L_shr( x, 1 );
    s_val = sub( 1, sx );
    if( x < 0 )
    {
        x = L_negate( x );
    }

    sy = norm_l( y );
    y = L_shl( y, sy );
    y = L_shr( y, 1 );
    s_val = add(s_val, sy );
    if( y >= 0 )
    {
        y = L_negate( y );
    }

    *s = s_val;
    move16();

    z = L_sub(x, x);   // z = 0

    for ( iteration = (Word16) 0; iteration < (Word16) bits; iteration++ )
    {
        if ( L_add(x, y) >= 0 )
        {
            x = DEPR_L_add_c(x, y, &Carry);  // sets always carry=1
        }
        z = DEPR_L_add_c( z, z, &Carry );    // sets always carry=0
        x = L_add(x, x);
    }

    if ( sign != 0 )
    {
        z = L_negate( z );
    }
    return L_shl(z, sub(31, bits));
}


Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
{
#if 1
    return BASOP_Util_Divide3232_Scale_FhG(x,y,s,24);
#else
    Word32 z;
    Word16 sx;
    Word16 sy;
@@ -1088,6 +1156,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
    }

    return z;
#endif
}

Word16 BASOP_Util_Divide3232_Scale( Word32 x, Word32 y, Word16 *s )
+22 −0
Original line number Diff line number Diff line
@@ -7299,15 +7299,26 @@ Word16 L_norm_arr( Word32 *arr, Word16 size )
    Word16 q = 31;
    move16();
    FOR( Word16 i = 0; i < size; i++ )
#if 0
    IF( arr[i] != 0 )
    {
        q = s_min( q, norm_l( arr[i] ) );
    }
#else
    {
        Word16 q_tst;
        q_tst = norm_l(arr[i]);
        if (arr[i] != 0)
           q = s_min(q, q_tst);
    }
    
#endif
    return q;
}

Word16 get_min_scalefactor( Word32 x, Word32 y )
{
#if 0
    Word16 scf = Q31;
    move16();
    test();
@@ -7324,6 +7335,16 @@ Word16 get_min_scalefactor( Word32 x, Word32 y )
        scf = s_min( scf, norm_l( y ) );
    }
    return scf;
#else
    Word16 scf = Q31;
    Word16 scf_y;
    if (x != 0)
        scf = norm_l( x );
    scf_y = norm_l( y );
    if (y != 0)
        scf = s_min(scf_y, scf);
    return scf;
#endif
}

Flag is_zero_arr( Word32 *arr, Word16 size )
@@ -7335,4 +7356,5 @@ Flag is_zero_arr( Word32 *arr, Word16 size )
    }

    return 1;

}
+5 −1
Original line number Diff line number Diff line
@@ -54,7 +54,7 @@

#define SUPPORT_JBM_TRACEFILE                   /* Support for JBM tracefile, which is needed for 3GPP objective/subjective testing, but not relevant for real-world implementations */

/*#define WMOPS*/                                   /* Activate complexity and memory counters */
#define WMOPS                                   /* Activate complexity and memory counters */
#ifdef WMOPS
/*#define WMOPS_PER_FRAME*/                     /* Output per-frame complexity (writes one float value per frame to the file "wmops_analysis") */
/*#define MEM_COUNT_DETAILS*/                   /* Output detailed memory analysis for the worst-case frame (writes to the file "mem_analysis.csv") */
@@ -196,6 +196,10 @@
#define FIX_953_WRONG_ENERGY_RATIO_MASA_EXT     /* Nok: Fix 953 wrong energy ratio value after shift and cast to Word8 */
#define FIX_982_WRONG_DECODED_ENERGY_RATIO      /* Nokia: Fix 982 wrong energy in EXT mode and in second direction when present */
#define FIX_999_WRONG_ISM_EXTENDED_METADATA     /* VA: fix 999: fix ISM extended metadata decoding */

#define FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC   /* FhG: Reduce workload of binaural rendering: replace 1./tmp & sqrt by Isqrt32 */
                                                /*      Replace computations with constants by setting of constants */
                                                /*      Simplify matrix multiplications and some external helper routines */
/* ################## End DEVELOPMENT switches ######################### */

/* clang-format on */
+4 −11
Original line number Diff line number Diff line
@@ -917,30 +917,23 @@ Word16 minimum_s(
    Word16 *min_val    /* o  : minimum value in the input vector */
)
{
    Word16 i, ind, tmp;

    Word16 i, ind;
    ind = 0;
    move16();
    tmp = vec[0];
    move16();

    FOR( i = 1; i < lvec; i++ )
    {
        IF( LT_16( vec[i], tmp ) )
        if( LT_16( vec[i], vec[ind] ) )
        {
            ind = i;
            move16();
            tmp = vec[i];
            move16();
            ind = add(i, 0);
        }
    }

    if ( min_val != NULL )
    {
        *min_val = tmp;
        *min_val = vec[ind];
        move16();
    }

    return ind;
}
#else
+229 −277

File changed.

Preview size limit exceeded, changes collapsed.