Commit 56307a6f authored by Nishant S Kulgod's avatar Nishant S Kulgod
Browse files

omasa fixes

parent 821f314a
Loading
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -4763,6 +4763,18 @@ Word16 matrix_product_fx(
    Word32 *Z_fx                                                    /* o  : resulting matrix after the matrix multiplication                                       */
);

Word16 matrix_product_q30_fx(
    const Word32 *X_fx,   /* i  : left hand matrix                                                                       */
    const Word16 rowsX,   /* i  : number of rows of the left hand matrix                                                 */
    const Word16 colsX,   /* i  : number of columns of the left hand matrix                                              */
    const Word16 transpX, /* i  : flag indicating the transposition of the left hand matrix prior to the multiplication  */
    const Word32 *Y_fx,   /* i  : right hand matrix                                                                      */
    const Word16 rowsY,   /* i  : number of rows of the right hand matrix                                                */
    const Word16 colsY,   /* i  : number of columns of the right hand matrix                                             */
    const Word16 transpY, /* i  : flag indicating the transposition of the right hand matrix prior to the multiplication */
    Word32 *Z_fx          /* o  : resulting matrix after the matrix multiplication                                       */
);

Word16 matrix_product_mant_exp(
  const Word32 *X_fx,    /* i  : left hand matrix                                                                       */
  const Word16 *X_e,     /* i  : left hand matrix                                                                       */
+121 −0
Original line number Diff line number Diff line
@@ -1354,6 +1354,127 @@ Word16 matrix_product_fx(
    return EXIT_SUCCESS;
}

Word16 matrix_product_q30_fx(
    const Word32 *X_fx,   /* i  : left hand matrix                                                                       */
    const Word16 rowsX,   /* i  : number of rows of the left hand matrix                                                 */
    const Word16 colsX,   /* i  : number of columns of the left hand matrix                                              */
    const Word16 transpX, /* i  : flag indicating the transposition of the left hand matrix prior to the multiplication  */
    const Word32 *Y_fx,   /* i  : right hand matrix                                                                      */
    const Word16 rowsY,   /* i  : number of rows of the right hand matrix                                                */
    const Word16 colsY,   /* i  : number of columns of the right hand matrix                                             */
    const Word16 transpY, /* i  : flag indicating the transposition of the right hand matrix prior to the multiplication */
    Word32 *Z_fx          /* o  : resulting matrix after the matrix multiplication                                       */
)
{
    Word16 i, j, k;
    Word32 *Zp_fx = Z_fx;
    Word64 W_tmp;

    /* Processing */
    test();
    test();
    test();
    IF( EQ_16( transpX, 1 ) && EQ_16( transpY, 0 ) ) /* We use X transpose */
    {
        IF( NE_16( rowsX, rowsY ) )
        {
            return EXIT_FAILURE;
        }
        FOR( j = 0; j < colsY; ++j )
        {
            FOR( i = 0; i < colsX; ++i )
            {
                //( *Zp_fx ) = 0;
                W_tmp = 0;
                move64();
                FOR( k = 0; k < rowsX; ++k )
                {
                    //( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[k + j * rowsY] ) );
                    W_tmp = W_add( W_tmp, W_mult0_32_32( X_fx[k + i * rowsX], Y_fx[k + j * rowsY] ) );//Q56
                }
                W_tmp = W_shl( W_tmp, 6 );
                ( *Zp_fx ) = L_sub(W_round64_L( W_tmp ), 64); //adjusting for precision
                Zp_fx++;
            }
        }
    }
    ELSE IF( EQ_16( transpX, 0 ) && EQ_16( transpY, 1 ) ) /* We use Y transpose */
    {
        IF( NE_16( colsX, colsY ) )
        {
            return EXIT_FAILURE;
        }
        FOR( j = 0; j < rowsY; ++j )
        {
            FOR( i = 0; i < rowsX; ++i )
            {
                //( *Zp_fx ) = 0;
                W_tmp = 0;
                move64();
                FOR( k = 0; k < colsX; ++k )
                {
                    //( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[j + k * rowsY] ) );
                    W_tmp = W_add( W_tmp, W_mult0_32_32( X_fx[i + k * rowsX], Y_fx[j + k * rowsY] ) ); // Q56
                }
                W_tmp = W_shl( W_tmp, 6 );
                ( *Zp_fx ) = L_sub( W_round64_L( W_tmp ), 64 ); // adjusting for precision
                Zp_fx++;
            }
        }
    }
    ELSE IF( EQ_16( transpX, 1 ) && EQ_16( transpY, 1 ) ) /* We use both transpose */
    {
        IF( NE_16( rowsX, colsY ) )
        {
            return EXIT_FAILURE;
        }
        FOR( j = 0; j < rowsY; ++j )
        {
            FOR( i = 0; i < colsX; ++i )
            {
                //( *Zp_fx ) = 0;
                W_tmp = 0;
                move64();
                FOR( k = 0; k < colsX; ++k )
                {
                    //( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[j + k * rowsY] ) );
                    W_tmp = W_add( W_tmp, W_mult0_32_32( X_fx[k + i * rowsX], Y_fx[j + k * rowsY] ) ); // Q56
                }

                W_tmp = W_shl( W_tmp, 6 );
                ( *Zp_fx ) = L_sub( W_round64_L( W_tmp ), 64 ); // adjusting for precision
                Zp_fx++;
            }
        }
    }
    ELSE /* Regular case */
    {
        IF( NE_16( colsX, rowsY ) )
        {
            return EXIT_FAILURE;
        }

        FOR( j = 0; j < colsY; ++j )
        {
            FOR( i = 0; i < rowsX; ++i )
            {
                //( *Zp_fx ) = 0;
                W_tmp = 0;
                move64();
                FOR( k = 0; k < colsX; ++k )
                {
                    //( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[k + j * rowsY] ) );
                    W_tmp = W_add( W_tmp, W_mult0_32_32( X_fx[i + k * rowsX], Y_fx[k + j * rowsY] ) ); // Q56
                }
                W_tmp = W_shl( W_tmp, 6 );
                ( *Zp_fx ) = L_sub( W_round64_L( W_tmp ), 64 ); // adjusting for precision
                Zp_fx++;
            }
        }
    }

    return EXIT_SUCCESS;
}
/*takes input matrices in mantissa and exponent forms*/
Word16 matrix_product_mant_exp(
    const Word32 *X_fx,   /* i  : left hand matrix                                                                       */
+9 −7
Original line number Diff line number Diff line
@@ -8562,28 +8562,28 @@ void ivas_omasa_decode_masa_to_total_fx(
    SWITCH( len_stream )
    {
        case 4:
            matrix_product_fx( dct4_fx, nblocks, nblocks, 1, q_dct_data_fx, nblocks, 1, 0, dct_data_tmp_fx );
            matrix_product_q30_fx( dct4_fx, nblocks, nblocks, 1, q_dct_data_fx, nblocks, 1, 0, dct_data_tmp_fx );
            Copy32( dct_data_tmp_fx, q_dct_data_fx, nblocks );
            BREAK;
        case 5:
            matrix_product_fx( dct5_fx, nbands, nbands, 1, q_dct_data_fx, nbands, 1, 0, dct_data_tmp_fx );
            matrix_product_q30_fx( dct5_fx, nbands, nbands, 1, q_dct_data_fx, nbands, 1, 0, dct_data_tmp_fx );
            Copy32( dct_data_tmp_fx, q_dct_data_fx, nbands );
            BREAK;
        case 8:
            matrix_product_fx( dct8_fx, nbands, nbands, 1, q_dct_data_fx, nbands, 1, 0, dct_data_tmp_fx );
            matrix_product_q30_fx( dct8_fx, nbands, nbands, 1, q_dct_data_fx, nbands, 1, 0, dct_data_tmp_fx );
            Copy32( dct_data_tmp_fx, q_dct_data_fx, nbands );
            BREAK;
        case 12:
            matrix_product_fx( dct12_fx, nbands, nbands, 1, q_dct_data_fx, nbands, 1, 0, dct_data_tmp_fx );
            matrix_product_q30_fx( dct12_fx, nbands, nbands, 1, q_dct_data_fx, nbands, 1, 0, dct_data_tmp_fx );
            Copy32( dct_data_tmp_fx, q_dct_data_fx, nbands );
            BREAK;
        case 20:
            matrix_product_fx( dct5_fx, nbands, nbands, 1, q_dct_data_fx, nbands, nblocks, 0, dct_data_tmp_fx );
            matrix_product_fx( dct_data_tmp_fx, nbands, nblocks, 0, dct4_fx, nblocks, nblocks, 0, q_dct_data_fx ); /* reuse of variable*/
            matrix_product_q30_fx( dct_data_tmp_fx, nbands, nblocks, 0, dct4_fx, nblocks, nblocks, 0, q_dct_data_fx ); /* reuse of variable*/
            BREAK;
        case 32:
            matrix_product_fx( dct8_fx, nbands, nbands, 1, q_dct_data_fx, nbands, nblocks, 0, dct_data_tmp_fx );
            matrix_product_fx( dct_data_tmp_fx, nbands, nblocks, 0, dct4_fx, nblocks, nblocks, 0, q_dct_data_fx );
            matrix_product_q30_fx( dct_data_tmp_fx, nbands, nblocks, 0, dct4_fx, nblocks, nblocks, 0, q_dct_data_fx );
            BREAK;
        default:
            printf( "Incorrect number of coefficients for OMASA.\n" );
@@ -8595,7 +8595,9 @@ void ivas_omasa_decode_masa_to_total_fx(
    {
        FOR( j = 0; j < nbands; j++ )
        {
            masa_to_total_energy_ratio_fx[i][j] = L_max( 0, L_shr( L_shl_sat( q_dct_data_fx[k], 31 - 25 ), 1 ) ); // Q30
            masa_to_total_energy_ratio_fx[i][j] = L_max( 0, q_dct_data_fx[k] ); // Q30
            move32();
            masa_to_total_energy_ratio_fx[i][j] = L_min( ONE_IN_Q30, masa_to_total_energy_ratio_fx[i][j] );
            move32();
            k++;
        }