Commit 73a85a89 authored by vasilache's avatar vasilache Committed by emerit
Browse files

bring DCT_FX to FP

parent e79e78e0
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -2819,7 +2819,7 @@ const Word32 dct12_fx[12 * 12] = { // Q31
    619978560, -335436960, 0, 335436960, -619978560, 810030848, -876602816, 810030848, -619978560, 335436960, 619978560, -869301376, 846752832,
    -810030848, 759350208, -695569984, 619978560, -533649696, 438301408, -335436960, 226989024, -114460880
};
#endif
#else
const float dct4[4*4] = 
{
    0.5000f,  0.6533f,  0.5000f,  0.2706f,
@@ -2864,7 +2864,7 @@ const float dct12[12*12]=
    0.2887f, -0.3772f, 0.2887f, -0.1562f, -0.0000f, 0.1562f, -0.2887f, 0.3772f, -0.4082f, 0.3772f, -0.2887f, 0.1562f, 
    0.2887f, -0.4048f, 0.3943f, -0.3772f, 0.3536f, -0.3239f, 0.2887f, -0.2485f, 0.2041f, -0.1562f, 0.1057f, -0.0533f
};
#endif
/*----------------------------------------------------------------------------------*
 * ISM ROM tables
 *----------------------------------------------------------------------------------*/
+2 −2
Original line number Diff line number Diff line
@@ -330,12 +330,12 @@ extern const Word32 dct4_fx[];
extern const Word32 dct5_fx[];
extern const Word32 dct8_fx[];
extern const Word32 dct12_fx[];
#endif
#else
extern const float dct4[];
extern const float dct5[];
extern const float dct8[];
extern const float dct12[];

#endif
/*----------------------------------------------------------------------------------*
 * ISM ROM tables
 *----------------------------------------------------------------------------------*/
+42 −36
Original line number Diff line number Diff line
@@ -1312,7 +1312,7 @@ Word16 matrix_product_fx(
                {
                    x_idx = k + i * rowsX;                               /*Q0*/
                    y_idx = k + j * rowsY;                               /*Q0*/
                    ( *Zp_fx ) = *Zp_fx + Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); /*Qx + Qy - 31*/
                    ( *Zp_fx ) = *Zp_fx + ( X_fx[x_idx] * Y_fx[y_idx] ); /*Qx + Qy - 31*/
                }
                Zp_fx++;
            }
@@ -1333,7 +1333,7 @@ Word16 matrix_product_fx(
                {
                    x_idx = i + k * rowsX;                                   /*Q0*/
                    y_idx = j + k * rowsY;                                   /*Q0*/
                    ( *Zp_fx ) = ( *Zp_fx ) + Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); /*Qx + Qy - 31*/
                    ( *Zp_fx ) = ( *Zp_fx ) + ( X_fx[x_idx] * Y_fx[y_idx] ); /*Qx + Qy - 31*/
                }
                Zp_fx++;
            }
@@ -1350,12 +1350,13 @@ Word16 matrix_product_fx(
            for ( i = 0; i < colsX; ++i )
            {
                ( *Zp_fx ) = 0;

                move32();
                for ( k = 0; k < colsX; ++k )
                {
                    x_idx = k + i * rowsX;                                           /*Q0*/
                    y_idx = j + k * rowsY;                                           /*Q0*/
                    ( *Zp_fx ) = ( *Zp_fx ) + Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); /*Qx + Qy - 31*/
                    x_idx = add( k, imult1616( i, rowsX ) );                             /*Q0*/
                    y_idx = add( j, imult1616( k, rowsY ) );                             /*Q0*/
                    ( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ) ); /*Qx + Qy - 31*/
                    move32();
                }

                Zp_fx++;
@@ -1364,7 +1365,7 @@ Word16 matrix_product_fx(
    }
    else /* Regular case */
    {
        if ( colsX != rowsY )
        if ( NE_16( colsX, rowsY ) )
        {
            return EXIT_FAILURE;
        }
@@ -1377,10 +1378,10 @@ Word16 matrix_product_fx(

                for ( k = 0; k < colsX; ++k )
                {
                    x_idx = i + k * rowsX;                                           /*Q0*/
                    y_idx = k + j * rowsY;                                           /*Q0*/
                    ( *Zp_fx ) = ( *Zp_fx ) + Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); /*Qx + Qy - 31  L_sat_add() */
                    /* TODO: overflow of Z_fx to be checked */
                    x_idx = add( i, imult1616( k, rowsX ) );                                 /*Q0*/
                    y_idx = add( k, imult1616( j, rowsY ) );                                 /*Q0*/
                    ( *Zp_fx ) = L_add_sat( *Zp_fx, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ) ); /*Qx + Qy - 31*/
                    // TODO: overflow of Z_fx to be checked
                    move32();
                }
                Zp_fx++;
@@ -1406,7 +1407,7 @@ Word16 matrix_product_q30_fx(
    Word16 i, j, k;
    Word16 x_idx, y_idx;
    Word32 *Zp_fx = Z_fx;
    int64_t W_tmp;
    long long int W_tmp;

    /* Processing */
    test();
@@ -1422,15 +1423,16 @@ Word16 matrix_product_q30_fx(
        {
            for ( i = 0; i < colsX; ++i )
            {
                //( *Zp_fx ) = 0;
                W_tmp = 0;
                for ( k = 0; k < rowsX; ++k )
                {
                    /*( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[k + j * rowsY] ) ); */
                    //( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[k + j * rowsY] ) );
                    x_idx = k + i * rowsX;              /* Q0  */
                    y_idx = k + j * rowsY;              /* Q0  */
                    W_tmp += ( (int64_t) X_fx[x_idx] * (int64_t) Y_fx[y_idx] ); /* Q56 */
                    W_tmp += X_fx[x_idx] * Y_fx[y_idx]; /* Q56 */
                }
                W_tmp = W_tmp * 64; /*  W_shl( W_tmp, 6 ); */                           /*Q62*/
                W_tmp = W_tmp << 6; /*  W_shl( W_tmp, 6 ); */                           /*Q62*/
                ( *Zp_fx ) = ( W_tmp + 0x80000000 ) >> 32; /*  W_round64_L( W_tmp ); */ /*Q30*/
                Zp_fx++;
            }
@@ -1446,15 +1448,17 @@ Word16 matrix_product_q30_fx(
        {
            for ( i = 0; i < rowsX; ++i )
            {
                //( *Zp_fx ) = 0;
                W_tmp = 0;

                for ( k = 0; k < colsX; ++k )
                {
                    /* ( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[j + k * rowsY] ) ); */
                    //( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[j + k * rowsY] ) );
                    x_idx = i + k * rowsX;              /*Q0*/
                    y_idx = j + k * rowsY;              /*Q0*/
                    W_tmp += ( (int64_t) X_fx[x_idx] * (int64_t) Y_fx[y_idx] ); /* Q56 */
                    W_tmp += X_fx[x_idx] * Y_fx[y_idx]; /* Q56 */
                }
                W_tmp = W_tmp * 64;                        /*Q62*/
                W_tmp = W_tmp << 6;                        /*Q62*/
                ( *Zp_fx ) = ( W_tmp + 0x80000000 ) >> 32; /*Q30*/
                Zp_fx++;
            }
@@ -1470,15 +1474,16 @@ Word16 matrix_product_q30_fx(
        {
            for ( i = 0; i < colsX; ++i )
            {
                //( *Zp_fx ) = 0;
                W_tmp = 0;
                for ( k = 0; k < colsX; ++k )
                {
                    /* ( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[j + k * rowsY] ) ); */
                    //( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[j + k * rowsY] ) );
                    x_idx = k + i * rowsX;              /*Q0*/
                    y_idx = j + k * rowsY;              /*Q0*/
                    W_tmp += ( (int64_t) X_fx[x_idx] * (int64_t) Y_fx[y_idx] ); /* Q56*/
                    W_tmp += X_fx[x_idx] * Y_fx[y_idx]; // Q56
                }
                W_tmp = W_tmp * 64;                        /*Q62*/
                W_tmp = W_tmp << 6;                        /*Q62*/
                ( *Zp_fx ) = ( W_tmp + 0x80000000 ) >> 32; /*Q30*/

                Zp_fx++;
@@ -1496,16 +1501,17 @@ Word16 matrix_product_q30_fx(
        {
            for ( i = 0; i < rowsX; ++i )
            {
                //( *Zp_fx ) = 0;
                W_tmp = 0;

                for ( k = 0; k < colsX; ++k )
                {
                    /* ( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[k + j * rowsY] ) ); */
                    //( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[k + j * rowsY] ) );
                    x_idx = i + k * rowsX;              /*Q0*/
                    y_idx = k + j * rowsY;              /*Q0*/
                    W_tmp += ( (int64_t) X_fx[x_idx] * (int64_t) Y_fx[y_idx] ); /* Q56*/
                    W_tmp += X_fx[x_idx] * Y_fx[y_idx]; // Q56
                }
                W_tmp = W_tmp * 64;                        /*Q62*/
                W_tmp = W_tmp << 6;                        /*Q62*/
                ( *Zp_fx ) = ( W_tmp + 0x80000000 ) >> 32; /*Q30*/

                Zp_fx++;
+27 −40
Original line number Diff line number Diff line
@@ -4348,16 +4348,16 @@ static void read_stream_dct_coeffs_omasa(

    /* deindex */
#ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL
    q_dct_data_fx[0] = (Word32) ( ( step_fx * q_idx[0] ) >> 6 ); /* Q25 */
    for ( i = 1; i < len_stream; i++ )
    q_dct_data_fx[0] = L_shl( Mpy_32_16_1( step, shl( q_idx[0], 7 ) ), 2 ); // q = 25
    FOR( i = 1; i < len_stream; i++ )
    {
        if ( ( q_idx[i] & 1 ) == 0 )
        IF( s_and( q_idx[i], 1 ) == 0 )
        {
            q_dct_data_fx[i] = (Word32) ( ( step_fx * ( -q_idx[i] ) ) >> 7 ); /* Q25 */
            q_dct_data_fx[i] = L_shl( Mpy_32_16_1( step, negate( shl( q_idx[i], 6 ) ) ), 2 ); /*Q25*/
        }
        ELSE
        {
            q_dct_data_fx[i] = (Word32) ( ( step_fx * ( q_idx[i] + 1 ) ) >> 7 ); /* Q25 */
            q_dct_data_fx[i] = L_shl( Mpy_32_16_1( step, shl( q_idx[i] + 1, 6 ) ), 2 ); /*Q25*/
        }
    }
#else
@@ -4430,19 +4430,19 @@ void ivas_omasa_decode_masa_to_total(
    {
        case 4:
            matrix_product_q30_fx( dct4_fx, nblocks, nblocks, 1, q_dct_data_fx, nblocks, 1, 0, dct_data_tmp_fx );
            mvl2l( dct_data_tmp_fx, q_dct_data_fx, nblocks ); /*Q30*/
            Copy32( dct_data_tmp_fx, q_dct_data_fx, nblocks ); /*Q30*/
            BREAK;
        case 5:
            matrix_product_q30_fx( dct5_fx, nbands, nbands, 1, q_dct_data_fx, nbands, 1, 0, dct_data_tmp_fx );
            mvl2l( dct_data_tmp_fx, q_dct_data_fx, nbands ); /*Q30*/
            Copy32( dct_data_tmp_fx, q_dct_data_fx, nbands ); /*Q30*/
            BREAK;
        case 8:
            matrix_product_q30_fx( dct8_fx, nbands, nbands, 1, q_dct_data_fx, nbands, 1, 0, dct_data_tmp_fx );
            mvl2l( dct_data_tmp_fx, q_dct_data_fx, nbands ); /*Q30*/
            Copy32( dct_data_tmp_fx, q_dct_data_fx, nbands ); /*Q30*/
            BREAK;
        case 12:
            matrix_product_q30_fx( dct12_fx, nbands, nbands, 1, q_dct_data_fx, nbands, 1, 0, dct_data_tmp_fx );
            mvl2l( dct_data_tmp_fx, q_dct_data_fx, nbands ); /*Q30*/
            Copy32( dct_data_tmp_fx, q_dct_data_fx, nbands ); /*Q30*/
            BREAK;
        case 20:
            matrix_product_fx( dct5_fx, nbands, nbands, 1, q_dct_data_fx, nbands, nblocks, 0, dct_data_tmp_fx );
@@ -4456,49 +4456,40 @@ void ivas_omasa_decode_masa_to_total(
            printf( "Incorrect number of coefficients for OMASA.\n" );
            BREAK;
    }

    /* this is to make sure the comparison to the threshold 0.98 will go the same way in */
    /* fixed point and floating point without having to drag the fixed point values to the */
    /* comparison place in the code; 1052266987 is 0.98 in Q30 it is not needed in the fixed point*/

    for ( i = 0; i < nblocks * nbands; i++ )
    {
        if ( q_dct_data_fx[i] >= 1052266987 && q_dct_data_fx[i] < 1052400000 )
        {
            q_dct_data_fx[i] = 1052400000;
        }
    }

    k = 0;
    for ( i = 0; i < nblocks; i++ )
    move16();
    FOR( i = 0; i < nblocks; i++ )
    {
        for ( j = 0; j < nbands; j++ )
        FOR( j = 0; j < nbands; j++ )
        {
            masa_to_total_energy_ratio[i][j] = q_dct_data_fx[k] / (float) ( 1 << 30 );
            masa_to_total_energy_ratio[i][j] = max( 0.0f, masa_to_total_energy_ratio[i][j] );
            masa_to_total_energy_ratio[i][j] = min( 1.0f, masa_to_total_energy_ratio[i][j] );
            k++;
            masa_to_total_energy_ratio_fx[i][j] = L_max( 0, q_dct_data_fx[k] ); // Q30
            move32();
            masa_to_total_energy_ratio_fx[i][j] = L_min( ONE_IN_Q30, masa_to_total_energy_ratio_fx[i][j] ); /*Q30*/
            move32();
            k = add( k, 1 );
        }
    }

    if ( nblocks == 1 )
    IF( EQ_16( nblocks, 1 ) )
    {
        for ( i = 1; i < MAX_PARAM_SPATIAL_SUBFRAMES; i++ )
        FOR( i = 1; i < MAX_PARAM_SPATIAL_SUBFRAMES; i++ )
        {
            for ( j = 0; j < nbands; j++ )
            FOR( j = 0; j < nbands; j++ )
            {
                masa_to_total_energy_ratio[i][j] = masa_to_total_energy_ratio[0][j];
                masa_to_total_energy_ratio_fx[i][j] = masa_to_total_energy_ratio_fx[0][j]; /*Q30*/
                move32();
            }
        }
    }

    if ( nbands == 1 )
    IF( EQ_16( nbands, 1 ) )
    {
        for ( j = 1; j < 5; j++ )
        FOR( j = 1; j < 5; j++ )
        {
            for ( i = 0; i < nblocks; i++ )
            FOR( i = 0; i < nblocks; i++ )
            {
                masa_to_total_energy_ratio[i][j] = masa_to_total_energy_ratio[i][0];
                masa_to_total_energy_ratio_fx[i][j] = masa_to_total_energy_ratio_fx[i][0]; /*Q30*/
                move32();
            }
        }
    }
@@ -4572,10 +4563,6 @@ void ivas_omasa_decode_masa_to_total(
            for ( i = 0; i < nblocks; i++ )
            {
                masa_to_total_energy_ratio[i][j] = masa_to_total_energy_ratio[i][0];
#ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL
                int_tmp = (int32_t) ( MASA_SUR_COH_PRECISION * masa_to_total_energy_ratio[i][j] );
                masa_to_total_energy_ratio[i][j] = (float) ( int_tmp * MASA_SUR_COH_THRESHOLD );
#endif
            }
        }
    }