Commit 5ff54f09 authored by vasilache's avatar vasilache
Browse files

bring DCT_FX to FP

parent 9016d88e
Loading
Loading
Loading
Loading
+43 −1
Original line number Diff line number Diff line
@@ -2778,6 +2778,48 @@ const int32_t sep_object_brate[][MAX_NUM_OBJECTS] =
};
/* column wise DCT matrices for 4 5, and 8 dim */
#ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL 
const Word32 dct4_fx[4 * 4] = { // Q31
    1073741824, 1402951040, 1073741824, 581109056,
    1073741824, 581109056, -1073741824, -1402951040,
    1073741824, -581109056, -1073741824, 1402951040,
    1073741824, -1402951040, 1073741824, -581109056
};
const Word32 dct5_fx[5 * 5] = { // Q31
    960354688, 1291711360, 1098867328, 798219648, 419618304,
    960354688, 798219648, -419618304, -1291711360, -1098867328,
    960354688, 0, -1358283392, 0, 1358283392,
    960354688, -798219648, -419618304, 1291711360, -1098867328,
    960354688, -1291711360, 1098867328, -798219648, 419618304
};
const Word32 dct8_fx[8 * 8] = { // Q31
    759350208, 1053125952, 991922688, 892708928, 759350208, 596570944, 410813632, 209379648,
    759350208, 892708928, 410813632, -209379648, -759350208, -1053125952, -991922688, -596570944,
    759350208, 596570944, -410813632, -1053125952, -759350208, 209379648, 991922688, 892708928,
    759350208, 209379648, -991922688, -596570944, 759350208, 892708928, -410813632, -1053125952,
    759350208, -209379648, -991922688, 596570944, 759350208, -892708928, -410813632, 1053125952,
    759350208, -596570944, -410813632, 1053125952, -759350208, -209379648, 991922688, -892708928,
    759350208, -892708928, 410813632, 209379648, -759350208, 1053125952, -991922688, 596570944,
    759350208, -1053125952, 991922688, -892708928, 759350208, -596570944, 410813632, -209379648
};
const Word32 dct12_fx[12 * 12] = { // Q31
    619978560, 869301376, 846752832, 810030848, 759350208, 695569984, 619978560, 533649696, 438301408, 335436960, 226989024, 114460880,
    619978560, 810030848, 619978560, 335436960, 0, -335436960, -619978560, -810030848, -876602816, -810030848, -619978560, -335436960,
    619978560, 695569984, 226989024, -335436960, -759350208, -869301376, -619978560, -114460880, 438301408, 810030848, 846752832, 533649696,
    619978560, 533649696, -226989024, -810030848, -759350208, -114460880, 619978560, 869301376, 438301408, -335436960, -846752832, -695569984,
    619978560, 335436960, -619978560, -810030848, 0, 810030848, 619978560, -335436960, -876602816, -335436960, 619978560, 810030848, 619978560,
    114460880, -846752832, -335436960, 759350208, 533649696, -619978560, -695569984, 438301408, 810030848, -226989024, -869301376, 619978560,
    -114460880, -846752832, 335436960, 759350208, -533649696, -619978560, 695569984, 438301408, -810030848, -226989024, 869301376, 619978560,
    -335436960, -619978560, 810030848, 0, -810030848, 619978560, 335436960, -876602816, 335436960, 619978560, -810030848, 619978560, -533649696,
    -226989024, 810030848, -759350208, 114460880, 619978560, -869301376, 438301408, 335436960, -846752832, 695569984, 619978560, -695569984,
    226989024, 335436960, -759350208, 869301376, -619978560, 114460880, 438301408, -810030848, 846752832, -533649696, 619978560, -810030848,
    619978560, -335436960, 0, 335436960, -619978560, 810030848, -876602816, 810030848, -619978560, 335436960, 619978560, -869301376, 846752832,
    -810030848, 759350208, -695569984, 619978560, -533649696, 438301408, -335436960, 226989024, -114460880
};
#else
const float dct4[4*4] = 
{
    0.5000f,  0.6533f,  0.5000f,  0.2706f,
@@ -2822,7 +2864,7 @@ const float dct12[12*12]=
    0.2887f, -0.3772f, 0.2887f, -0.1562f, -0.0000f, 0.1562f, -0.2887f, 0.3772f, -0.4082f, 0.3772f, -0.2887f, 0.1562f, 
    0.2887f, -0.4048f, 0.3943f, -0.3772f, 0.3536f, -0.3239f, 0.2887f, -0.2485f, 0.2041f, -0.1562f, 0.1057f, -0.0533f
};
#endif
/*----------------------------------------------------------------------------------*
 * ISM ROM tables
 *----------------------------------------------------------------------------------*/
+7 −1
Original line number Diff line number Diff line
@@ -325,11 +325,17 @@ extern const float McMASA_LFEGain_vectors[64];
 *----------------------------------------------------------------------------------*/

extern const int32_t sep_object_brate[][MAX_NUM_OBJECTS];
#ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL
extern const Word32 dct4_fx[];
extern const Word32 dct5_fx[];
extern const Word32 dct8_fx[];
extern const Word32 dct12_fx[];
#else
extern const float dct4[];
extern const float dct5[];
extern const float dct8[];
extern const float dct12[];

#endif
/*----------------------------------------------------------------------------------*
 * ISM ROM tables
 *----------------------------------------------------------------------------------*/
+245 −0
Original line number Diff line number Diff line
@@ -1274,3 +1274,248 @@ float rand_triangular_signed(
        return 0.5f - 0.5f * sqrtf( 1.0f - rand_val );
    }
}
#ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL
Word16 matrix_product_fx(
    const Word32 *X_fx,   /* i  : left hand matrix                                                                       Qx*/
    const Word16 rowsX,   /* i  : number of rows of the left hand matrix                                                 Q0*/
    const Word16 colsX,   /* i  : number of columns of the left hand matrix                                              Q0*/
    const Word16 transpX, /* i  : flag indicating the transposition of the left hand matrix prior to the multiplication  Q0*/
    const Word32 *Y_fx,   /* i  : right hand matrix                                                                      Qy*/
    const Word16 rowsY,   /* i  : number of rows of the right hand matrix                                                Q0*/
    const Word16 colsY,   /* i  : number of columns of the right hand matrix                                             Q0*/
    const Word16 transpY, /* i  : flag indicating the transposition of the right hand matrix prior to the multiplication Q0*/
    Word32 *Z_fx          /* o  : resulting matrix after the matrix multiplication                                       Qx + Qy - 31*/
)
{
    Word16 i, j, k;
    Word16 x_idx, y_idx;
    Word32 *Zp_fx = Z_fx;

    /* Processing */
 
    if( transpX == 1 && transpY == 0 ) /* We use X transpose */
    {
        if( rowsX != rowsY )
        {
            return EXIT_FAILURE;
        }
        for( j = 0; j < colsY; ++j )
        {
            for( i = 0; i < colsX; ++i )
            {
                ( *Zp_fx ) = 0;
           
                for( k = 0; k < rowsX; ++k )
                {
                    x_idx = k + i*rowsX;                             /*Q0*/
                    y_idx = k + j*rowsY;                             /*Q0*/
                    ( *Zp_fx ) =  *Zp_fx + ( X_fx[x_idx] * Y_fx[y_idx] ); /*Qx + Qy - 31*/
                }
                Zp_fx++;
            }
        }
    }
    else if( transpX == 0 &&  transpY == 1 ) /* We use Y transpose */
    {
        if(  colsX != colsY )
        {
            return EXIT_FAILURE;
        }
        for( j = 0; j < rowsY; ++j )
        {
            for( i = 0; i < rowsX; ++i )
            {
                ( *Zp_fx ) = 0;
                for( k = 0; k < colsX; ++k )
                {
                    x_idx = i + k*rowsX;                             /*Q0*/
                    y_idx = j + k*rowsY;                             /*Q0*/
                    ( *Zp_fx ) =  (*Zp_fx) +  ( X_fx[x_idx] * Y_fx[y_idx] ); /*Qx + Qy - 31*/
                }
                Zp_fx++;
            }
        }
    }
    else if( transpX == 1 && transpY == 1 ) /* We use both transpose */
    {
        if(  rowsX != colsY )
        {
            return EXIT_FAILURE;
        }
        for( j = 0; j < rowsY; ++j )
        {
            for( i = 0; i < colsX; ++i )
            {
                ( *Zp_fx ) = 0;
                move32();
                for( k = 0; k < colsX; ++k )
                {
                    x_idx = add( k, imult1616( i, rowsX ) );                             /*Q0*/
                    y_idx = add( j, imult1616( k, rowsY ) );                             /*Q0*/
                    ( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ) ); /*Qx + Qy - 31*/
                    move32();
                }

                Zp_fx++;
            }
        }
    }
    else /* Regular case */
    {
        if( NE_16( colsX, rowsY ) )
        {
            return EXIT_FAILURE;
        }

        for( j = 0; j < colsY; ++j )
        {
            for( i = 0; i < rowsX; ++i )
            {
                ( *Zp_fx ) = 0;
            
                for( k = 0; k < colsX; ++k )
                {
                    x_idx = add( i, imult1616( k, rowsX ) );                                 /*Q0*/
                    y_idx = add( k, imult1616( j, rowsY ) );                                 /*Q0*/
                    ( *Zp_fx ) = L_add_sat( *Zp_fx, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ) ); /*Qx + Qy - 31*/
                    // TODO: overflow of Z_fx to be checked
                    move32();
                }
                Zp_fx++;
            }
        }
    }

    return EXIT_SUCCESS;
}

Word16 matrix_product_q30_fx(
    const Word32 *X_fx,   /* i  : left hand matrix                                                                       Q31*/
    const Word16 rowsX,   /* i  : number of rows of the left hand matrix                                                 Q0*/
    const Word16 colsX,   /* i  : number of columns of the left hand matrix                                              Q0*/
    const Word16 transpX, /* i  : flag indicating the transposition of the left hand matrix prior to the multiplication  Q0*/
    const Word32 *Y_fx,   /* i  : right hand matrix                                                                      Q25*/
    const Word16 rowsY,   /* i  : number of rows of the right hand matrix                                                Q0*/
    const Word16 colsY,   /* i  : number of columns of the right hand matrix                                             Q0*/
    const Word16 transpY, /* i  : flag indicating the transposition of the right hand matrix prior to the multiplication Q0*/
    Word32 *Z_fx          /* o  : resulting matrix after the matrix multiplication                                       Q30*/
)
{
    Word16 i, j, k;
    Word16 x_idx, y_idx;
    Word32 *Zp_fx = Z_fx;
    long long int W_tmp;

    /* Processing */
    test();
    test();
    test();
    if( transpX == 1 && transpY == 0 ) /* We use X transpose */
    {
        if(  rowsX != rowsY )
        {
            return EXIT_FAILURE;
        }
        for( j = 0; j < colsY; ++j )
        {
            for( i = 0; i < colsX; ++i )
            {
                //( *Zp_fx ) = 0;
                W_tmp = 0;
                for( k = 0; k < rowsX; ++k )
                {
                    //( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[k + j * rowsY] ) );
                    x_idx = k + i * rowsX;                                 /* Q0  */
                    y_idx = k + j * rowsY;                                 /* Q0  */
                    W_tmp += X_fx[x_idx] * Y_fx[y_idx];                    /* Q56 */
                }
                W_tmp = W_tmp << 6;         /*  W_shl( W_tmp, 6 ); */         /*Q62*/
                ( *Zp_fx ) = ( W_tmp + 0x80000000 ) >> 32;    /*  W_round64_L( W_tmp ); */ /*Q30*/
                Zp_fx++;
            }
        }
    }
    else if( transpX == 0 && transpY ==1 ) /* We use Y transpose */
    {
        if( colsX != colsY )
        {
            return EXIT_FAILURE;
        }
        for( j = 0; j < rowsY; ++j )
        {
            for( i = 0; i < rowsX; ++i )
            {
                //( *Zp_fx ) = 0;
                W_tmp = 0;

                for( k = 0; k < colsX; ++k )
                {
                    //( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[j + k * rowsY] ) );
                    x_idx = i + k * rowsX;                            /*Q0*/
                    y_idx = j + k * rowsY;                            /*Q0*/
                    W_tmp +=  X_fx[x_idx]*Y_fx[y_idx];               /* Q56 */
                }
                W_tmp = W_tmp << 6;                                  /*Q62*/
                ( *Zp_fx ) = ( W_tmp + 0x80000000 ) >> 32;          /*Q30*/
                Zp_fx++;
            }
        }
    }
    else if( transpX == 1 && transpY ==1 ) /* We use both transpose */
    {
        if( rowsX != colsY )
        {
            return EXIT_FAILURE;
        }
        for( j = 0; j < rowsY; ++j )
        {
            for( i = 0; i < colsX; ++i )
            {
                //( *Zp_fx ) = 0;
                W_tmp = 0;
                for( k = 0; k < colsX; ++k )
                {
                    //( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[j + k * rowsY] ) );
                    x_idx = k + i * rowsX;                                             /*Q0*/
                    y_idx = j + k * rowsY;                                             /*Q0*/
                    W_tmp += X_fx[x_idx] * Y_fx[y_idx];                     // Q56
                }
                W_tmp = W_tmp << 6;                        /*Q62*/
                ( *Zp_fx ) = ( W_tmp + 0x80000000 ) >> 32; /*Q30*/

                Zp_fx++;
            }
        }
    }
    else /* Regular case */
    {
        if(  colsX != rowsY )
        {
            return EXIT_FAILURE;
        }

        for( j = 0; j < colsY; ++j )
        {
            for( i = 0; i < rowsX; ++i )
            {
                //( *Zp_fx ) = 0;
                W_tmp = 0;
   
                for( k = 0; k < colsX; ++k )
                {
                    //( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[k + j * rowsY] ) );
                    x_idx = i + k * rowsX;                                /*Q0*/
                    y_idx = k + j * rowsY;                                /*Q0*/
                    W_tmp += X_fx[x_idx] * Y_fx[y_idx];              // Q56
                }
                W_tmp = W_tmp << 6;                        /*Q62*/
                ( *Zp_fx ) = ( W_tmp + 0x80000000 ) >> 32; /*Q30*/
                
                Zp_fx++;
            }
        }
    }

    return EXIT_SUCCESS;
}
#endif
 No newline at end of file
+109 −8
Original line number Diff line number Diff line
@@ -97,7 +97,13 @@ static int16_t read_surround_coherence_hr( uint16_t *bitstream, int16_t *p_bit_p

static int16_t read_coherence_data_hr_512( uint16_t *bitstream, int16_t *p_bit_pos, IVAS_QMETADATA *hQMetaData, const int16_t idx_dir, const int16_t nbits_coh );

static void read_stream_dct_coeffs_omasa( int16_t *q_idx, float *q_dct_data, const int16_t len_stream, uint16_t *bit_stream, int16_t *index, const int16_t first_line );
static void read_stream_dct_coeffs_omasa( int16_t *q_idx, 
#ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL 
    Word32 * q_dct_data_fx,
#else    
    float *q_dct_data, 
#endif    
    const int16_t len_stream, uint16_t *bit_stream, int16_t *index, const int16_t first_line );


/*-----------------------------------------------------------------------*
@@ -4248,7 +4254,11 @@ static void decode_combined_index(

static void read_stream_dct_coeffs_omasa(
    int16_t *q_idx,
#ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL 
    Word32 * q_dct_data_fx,
#else
    float *q_dct_data,
#endif
    const int16_t len_stream,
    uint16_t *bit_stream,
    int16_t *index,
@@ -4325,6 +4335,20 @@ static void read_stream_dct_coeffs_omasa(
    }

    /* deindex */
#ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL 
    q_dct_data_fx[0] = L_shl( Mpy_32_16_1( step, shl( q_idx[0], 7 ) ), 2 ); // q = 25
    FOR( i = 1; i < len_stream; i++ )
    {
        IF( s_and( q_idx[i], 1 ) == 0 )
        {
            q_dct_data_fx[i] = L_shl( Mpy_32_16_1( step, negate( shl( q_idx[i], 6 ) ) ), 2 ); /*Q25*/
        }
        ELSE
        {
            q_dct_data_fx[i] = L_shl( Mpy_32_16_1( step, shl( q_idx[i] + 1, 6 ) ), 2 ); /*Q25*/
        }
    }
#else
    q_dct_data[0] = q_idx[0] * step;
    for ( i = 1; i < len_stream; i++ )
    {
@@ -4337,7 +4361,7 @@ static void read_stream_dct_coeffs_omasa(
            q_dct_data[i] = ( ( q_idx[i] + 1 ) >> 1 ) * step;
        }
    }

#endif
    return;
}

@@ -4356,8 +4380,13 @@ void ivas_omasa_decode_masa_to_total(
{
    int16_t i, j, k;
    int16_t q_idx[MAX_PARAM_SPATIAL_SUBFRAMES * MASA_FREQUENCY_BANDS];
#ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL
    Word32 q_dct_data_fx[MAX_PARAM_SPATIAL_SUBFRAMES * MASA_FREQUENCY_BANDS],
        dct_data_tmp_fx[MAX_PARAM_SPATIAL_SUBFRAMES * MASA_FREQUENCY_BANDS];
#else
    float q_dct_data[MAX_PARAM_SPATIAL_SUBFRAMES * MASA_FREQUENCY_BANDS],
        dct_data_tmp[MAX_PARAM_SPATIAL_SUBFRAMES * MASA_FREQUENCY_BANDS];
#endif
    int16_t n_streams, len_stream;
#ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL
    int32_t int_tmp;
@@ -4374,10 +4403,86 @@ void ivas_omasa_decode_masa_to_total(
    set_s( q_idx, 0, nbands * nblocks );
    for ( i = 0; i < n_streams; i++ )
    {
        read_stream_dct_coeffs_omasa( &q_idx[i * len_stream], &q_dct_data[i * len_stream], len_stream, bit_stream, index, i == 0 );
        read_stream_dct_coeffs_omasa( &q_idx[i * len_stream],
#ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL 
   &q_dct_data_fx[i * len_stream],
#else           
            &q_dct_data[i * len_stream],
#endif    
            len_stream, bit_stream, index, i == 0 );
    }

    /* inverse DCT2 transform */
#ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL 
     SWITCH( len_stream )
    {
        case 4:
            matrix_product_q30_fx( dct4_fx, nblocks, nblocks, 1, q_dct_data_fx, nblocks, 1, 0, dct_data_tmp_fx );
            Copy32( dct_data_tmp_fx, q_dct_data_fx, nblocks ); /*Q30*/
            BREAK;
        case 5:
            matrix_product_q30_fx( dct5_fx, nbands, nbands, 1, q_dct_data_fx, nbands, 1, 0, dct_data_tmp_fx );
            Copy32( dct_data_tmp_fx, q_dct_data_fx, nbands ); /*Q30*/
            BREAK;
        case 8:
            matrix_product_q30_fx( dct8_fx, nbands, nbands, 1, q_dct_data_fx, nbands, 1, 0, dct_data_tmp_fx );
            Copy32( dct_data_tmp_fx, q_dct_data_fx, nbands ); /*Q30*/
            BREAK;
        case 12:
            matrix_product_q30_fx( dct12_fx, nbands, nbands, 1, q_dct_data_fx, nbands, 1, 0, dct_data_tmp_fx );
            Copy32( dct_data_tmp_fx, q_dct_data_fx, nbands ); /*Q30*/
            BREAK;
        case 20:
            matrix_product_fx( dct5_fx, nbands, nbands, 1, q_dct_data_fx, nbands, nblocks, 0, dct_data_tmp_fx );
            matrix_product_q30_fx( dct_data_tmp_fx, nbands, nblocks, 0, dct4_fx, nblocks, nblocks, 0, q_dct_data_fx ); /* reuse of variable*/
            BREAK;
        case 32:
            matrix_product_fx( dct8_fx, nbands, nbands, 1, q_dct_data_fx, nbands, nblocks, 0, dct_data_tmp_fx );
            matrix_product_q30_fx( dct_data_tmp_fx, nbands, nblocks, 0, dct4_fx, nblocks, nblocks, 0, q_dct_data_fx );
            BREAK;
        default:
            printf( "Incorrect number of coefficients for OMASA.\n" );
            BREAK;
    }
    k = 0;
    move16();
    FOR( i = 0; i < nblocks; i++ )
    {
        FOR( j = 0; j < nbands; j++ )
        {
            masa_to_total_energy_ratio_fx[i][j] = L_max( 0, q_dct_data_fx[k] ); // Q30
            move32();
            masa_to_total_energy_ratio_fx[i][j] = L_min( ONE_IN_Q30, masa_to_total_energy_ratio_fx[i][j] ); /*Q30*/
            move32();
            k = add( k, 1 );
        }
    }

    IF( EQ_16( nblocks, 1 ) )
    {
        FOR( i = 1; i < MAX_PARAM_SPATIAL_SUBFRAMES; i++ )
        {
            FOR( j = 0; j < nbands; j++ )
            {
                masa_to_total_energy_ratio_fx[i][j] = masa_to_total_energy_ratio_fx[0][j]; /*Q30*/
                move32();
            }
        }
    }

    IF( EQ_16( nbands, 1 ) )
    {
        FOR( j = 1; j < 5; j++ )
        {
            FOR( i = 0; i < nblocks; i++ )
            {
                masa_to_total_energy_ratio_fx[i][j] = masa_to_total_energy_ratio_fx[i][0]; /*Q30*/
                move32();
            }
        }
    }

#else
    switch ( len_stream )
    {
        case 4:
@@ -4446,13 +4551,9 @@ void ivas_omasa_decode_masa_to_total(
            for ( i = 0; i < nblocks; i++ )
            {
                masa_to_total_energy_ratio[i][j] = masa_to_total_energy_ratio[i][0];
#ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL
                int_tmp = (int32_t) ( MASA_SUR_COH_PRECISION * masa_to_total_energy_ratio[i][j] );
                masa_to_total_energy_ratio[i][j] = (float) ( int_tmp * MASA_SUR_COH_THRESHOLD );
#endif
            }
        }
    }

#endif
    return;
}