Commit 63b1f65b authored by Fabian Bauer's avatar Fabian Bauer
Browse files

Merge branch '1072-complexity-high-complexity-overhead-for-osba-decoding' of...

Merge branch '1072-complexity-high-complexity-overhead-for-osba-decoding' of ssh://forge.3gpp.org:29419/sa4/audio/ivas-basop into 1072-complexity-high-complexity-overhead-for-osba-decoding
parents 74f66bf8 43633d49
Loading
Loading
Loading
Loading
+21 −4
Original line number Diff line number Diff line
@@ -506,7 +506,7 @@ stages:
.ivas-pytest-sanitizers-anchor: &ivas-pytest-sanitizers-anchor
  stage: test
  needs: ["build-codec-linux-make"]
  timeout: "300 minutes"
  timeout: "420 minutes"
  rules:
    - if: $CI_PIPELINE_SOURCE == 'push'
      when: never
@@ -522,7 +522,7 @@ stages:
    - *build-reference-and-dut-binaries
    - make clean
    - make -j CLANG=$CLANG_NUM
    - if [[ $CLANG_NUM == 3 ]]; then export UBSAN_OPTIONS="suppressions=scripts/ubsan.supp,report_error_type=1"; fi
    - if [[ $CLANG_NUM == 3 ]]; then export UBSAN_OPTIONS="suppressions=scripts/ubsan_basop.supp,report_error_type=1"; fi
    - testcase_timeout=$TESTCASE_TIMEOUT_LTV_SANITIZERS
    - python3 -m pytest $TEST_SUITE -v --tb=no --update_ref 1 --html=report.html --self-contained-html --junit-xml=report-junit.xml --testcase_timeout $testcase_timeout --ref_encoder_path $DUT_ENCODER_PATH --ref_decoder_path $DUT_DECODER_PATH
  artifacts:
@@ -1253,8 +1253,25 @@ voip-be-on-merge-request:
  - unzip artifacts.zip || true # this may fail on first run, when there are no artifacts there and the zip file is actually just "404"-html
  - ls
  - public_dir="$CI_JOB_NAME-public"

  # if is needed to catch case when no artifact is there (first run), similarly as above
  - if [[ -d $public_dir ]]; then mv $public_dir/* wmops/;  fi
  - if [[ -d $public_dir ]]; then
  -   mv $public_dir/* wmops/
  # check here if we have the split-by-levels files present - if not, fake them up with the existing global one
  # this is needed for the first run with split graphs on a branch where the global version did run previously
  # NOTE: checking only for level_1 file here as this should already be sufficient
  # NOTE2: also not chechking for RAM for same reason
  -   wmops_all_global="wmops/log_wmops_all.txt"
  -   ram_all_global="wmops/log_ram_all.txt"
  -   if [ -f "${wmops_all_global}" ] && [ ! -f "wmops/log_wmops_all_level_1.txt" ]; then
  -     declare -a suffixes=("level_1" "level_2" "level_3" "rate_sw")
  -     for suffix in "${suffixes[@]}"; do
  -       cp ${wmops_all_global} wmops/log_wmops_all_${suffix}.txt
  -       cp ${ram_all_global} wmops/log_ram_all_${suffix}.txt
  -     done
  -   fi
  - fi

  - ls wmops
  - rm artifacts.zip
  - rm -rf $public_dir
@@ -1271,7 +1288,7 @@ voip-be-on-merge-request:
  &complexity-measurements-prepare-artifacts # prepare artifacts -> move to public directory
  - public_dir="$CI_JOB_NAME-public"
  - mkdir $public_dir
  - mv -f wmops/log_*_all.txt ./*.js ${public_dir}/
  - mv -f wmops/log_*_all*.txt ./*.js ${public_dir}/
  # move logfiles for links
  - mkdir $public_dir/logs
  # first move logs
+1 −1
Original line number Diff line number Diff line
@@ -1213,7 +1213,7 @@ void cldfbSynthesis_ivas_fx(
            /*cplxMult(&iBuffer[2*i], &iBuffer[2*i+1],-imagBuffer[k][2*i], imagBuffer[k][M1-1-2*i], rot_vctr_re[i], rot_vctr_im[i]);*/
            iBuffer_fx[2 * i] = Msub_32_32( Mpy_32_32( ( L_negate( imagBuffer_fx[k][2 * i] ) ), rot_vctr_re_fx[i] ), imagBuffer_fx[k][( M1 - 1 ) - ( i * 2 )], rot_vctr_im_fx[i] ); // Qx
            move32();
            iBuffer_fx[2 * i + 1] = Madd_32_32( Mpy_32_32( ( L_negate( imagBuffer_fx[k][2 * i] ) ), rot_vctr_im_fx[i] ), imagBuffer_fx[k][( M1 - 1 ) - ( i * 2 )], rot_vctr_re_fx[i] ); // Qx
            iBuffer_fx[2 * i + 1] = Msub_32_32( Mpy_32_32( imagBuffer_fx[k][( M1 - 1 ) - ( i * 2 )], rot_vctr_re_fx[i] ), imagBuffer_fx[k][2 * i], rot_vctr_im_fx[i] ); // Qx
            move32();
        }

+9 −11
Original line number Diff line number Diff line
@@ -960,7 +960,6 @@ void computeDiffuseness_fixed(
    q_intensity = add( q_factor_intensity[0], min_q_shift2 );
    move16();

    push_wmops( "(IDR) LOOP1 DirACparams computeDiffuseness B <<-|" );
    FOR( i = 0; i < DIRAC_NO_COL_AVG_DIFF; ++i )
    {
        /* Energy slow */
@@ -976,13 +975,13 @@ void computeDiffuseness_fixed(
        if( shift_q < 0 )
        {
            shiftEquiv = L_lshl( 0x80000000, shift_q );
            shift_qtotal = sub( min_q_shift1, 0 );
        }
        if( shift_q >= 0 )
        {
            shiftEquiv = L_add( 0x7FFFFFFF, 0 );
            shift_qtotal = sub( min_q_shift1, shift_q );
        }
        shift_qtotal = sub( min_q_shift1, s_max( shift_q, 0 ) );

        FOR( k = 0; k < num_freq_bands; k++ )
        {
            tmp = L_shl( p_tmp_c[k], shift_qtotal );
@@ -1019,17 +1018,17 @@ void computeDiffuseness_fixed(
        q_tmp = add( q_factor_intensity[i], min_q_shift2 );

        shift_q = sub( q_tmp, q_intensity );
#ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB
        if( shift_q >= 0 )
        {
            shiftEquiv = L_lshl( 0x7FFFFFFF, 0 );
            shift_qtotal = sub( min_q_shift2, shift_q );
        }
#ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESS
        if ( shift_q < 0 )
        {
            shiftEquiv = L_lshl( 0x80000000, shift_q );
            shift_qtotal = sub( min_q_shift2, 0 );
        }
        if ( shift_q >= 0 )
        {
            shiftEquiv = L_lshl( 0x7FFFFFFF, 0 );
        }
        shift_qtotal = sub( min_q_shift2, s_max( shift_q, 0 ) );

        FOR( j = 0; j < DIRAC_NUM_DIMS; ++j )
        {
            p_tmp = buffer_intensity[j][i];
@@ -1071,7 +1070,6 @@ void computeDiffuseness_fixed(

        q_intensity = s_min( q_intensity, q_tmp );
    }
    pop_wmops(); /*push_wmops( "(IDR) LOOP1 DirACparams computeDiffuseness B <<-|" );/*/

    min_q_shift1 = getScaleFactor32( intensity_slow, i_mult( DIRAC_NUM_DIMS, num_freq_bands ) );
    min_q_shift1 = sub( min_q_shift1, idiv1616( add( find_guarded_bits_fx( DIRAC_NUM_DIMS ), 1 ), 2 ) );
+24 −38
Original line number Diff line number Diff line
@@ -131,24 +131,23 @@ static void interpTargetChannel_fx(
    Word32 spread_factor2_fx;
    Word64 tempD1_fx, tempD2_fx;

    d = negate( sub( currShift, prevShift ) );
    IF( d >= 0 )
    d = sub( prevShift, currShift );

    IF( d == 0 )
    {
        /* this can happen in DFT->TD switching */
        return;
    }

    signShift = 1;
    move16();
    }
    ELSE

    if ( d < 0 )
    {
        signShift = -1;
        move16();
    }

    IF( d == 0 )
    {
        /* this can happen in DFT->TD switching */
        return;
    }

    N = L_shift_adapt;
    move16();
    Word32 *table_pointer = NULL;
@@ -207,7 +206,7 @@ static void interpTargetChannel_fx(

            FOR( j = lim1; j <= lim2; j++ )
            {
                ptr2_fx[i] = L_add( Mpy_32_32( win_fx[j * INTERP_FACTOR1 - i], ptr1_fx[j] ), ptr2_fx[i] ); // qsynth
                ptr2_fx[i] = Madd_32_32( ptr2_fx[i], win_fx[j * INTERP_FACTOR1 - i], ptr1_fx[j] ); // qsynth
                move32();
            }
        }
@@ -225,44 +224,31 @@ static void interpTargetChannel_fx(
    tempD1_fx = W_deposit32_l( table_D1_pointer[abs( d )] );    // Q35
    tempD2_fx = W_mult0_32_32( 3, table_D1_pointer[abs( d )] ); // Q35

    IF( EQ_16( signShift, 1 ) )
    {
        tempF1_fx = ONE_IN_Q12; // Q12
        move32();
    }
    ELSE
    {
    tempF1_fx = -ONE_IN_Q12; // Q12
    move32();
    }
    tempF1_fx = L_sub( imult3216( factor_fx, d ), tempF1_fx ); // Q12
    FOR( k = 0; k < sub( N, 1 ); k++ )
    {
        Word32 local = L_sub( W_extract_l( W_shr( W_mult0_32_32( tempF1_fx, spread_factor2_fx ), 31 ) ), ONE_IN_Q12 ); // Q12
        Word32 sign_local;
        IF( local > 0 )

    if ( EQ_16( signShift, 1 ) )
    {
            sign_local = 1;
        tempF1_fx = ONE_IN_Q12; // Q12
        move32();
    }
        ELSE

    tempF1_fx = L_sub( imult3216( factor_fx, d ), tempF1_fx ); // Q12
    FOR( k = 0; k < N - 1; k++ )
    {
            sign_local = -1;
            move32();
        }
        Word32 local_int = W_extract_l( W_shr( W_abs( local ), 12 ) ); // Q0
        Word32 local = Madd_32_32( -ONE_IN_Q12, tempF1_fx, spread_factor2_fx ); // Q12
        Word32 local_int = L_shr( local, 12 );                                  // Q0
        Word32 res_a1, res_a2, res_a3;
        Word32 res_b1, res_b2, res_b3;
        Word32 res_c1, res_c2, res_c3;
        Word32 res_d1, res_d2, res_d3;
        Word64 local_int_scaled;
        Word32 local_int_scaled;
        Word64 res_a, res_b, res_c, res_d;
        Word64 tempa, tempb;
        Word64 mult_a_D1, mult_b_D2;
        local_int = W_extract_l( W_mult0_32_32( sign_local, local_int ) ); // Q0
        local_int_scaled = W_deposit32_l( L_shl( local_int, 12 ) );        // Q12
        local_int_scaled = L_shl( local_int, 12 );  // Q12
        lim1 = extract_l( local_int );              // Q0
        IF( W_sub( local_int_scaled, local ) > 0 )                         // Q21
        if ( L_sub( local_int_scaled, local ) > 0 ) // Q12
        {
            lim1 = sub( lim1, 1 ); // Q0
        }
@@ -387,7 +373,7 @@ static void targetCh_AlignStereoDFT_fx(
        }
        FOR( i = 0; i < L_shift_adapt; i++ )
        {
            target_fx[i] = L_add( Mpy_32_32( alpha_fx, fadeInBuff_fx[i] ), Mpy_32_32( L_sub( ONE_IN_Q31, alpha_fx ), fadeOutBuff_fx[i] ) ); // qsynth
            target_fx[i] = Madd_32_32( Mpy_32_32( alpha_fx, fadeInBuff_fx[i] ), L_sub( ONE_IN_Q31, alpha_fx ), fadeOutBuff_fx[i] ); // qsynth
            move32();

            alpha_fx = L_add_sat( alpha_fx, winSlope_fx ); // Q31
+81 −43
Original line number Diff line number Diff line
@@ -942,6 +942,12 @@ Word16 matrix_product_mant_exp_fx(
    Word16 *Zp_fx_e = out_e;
    Word16 row, col;
    Word16 x_idx, y_idx;
    Word64 temp;
    Word16 temp_e;
    Word16 prod_e = add( X_fx_e, Y_fx_e );

    Word16 max_exp = -31;
    move16();

    /* Processing */
    test();
@@ -957,17 +963,28 @@ Word16 matrix_product_mant_exp_fx(
        {
            FOR( i = 0; i < colsX; ++i )
            {
                ( *Zp_fx ) = 0;
                move32();
                ( *Zp_fx_e ) = 0;
                move16();
                temp = 0;
                move64();

                FOR( k = 0; k < rowsX; ++k )
                {
                    x_idx = add( k, imult1616( i, rowsX ) );
                    y_idx = add( k, imult1616( j, rowsY ) );
                    ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); /*Q31 - Zp_fx_e*/
                    move32();
                    x_idx = k + i * rowsX;
                    y_idx = k + j * rowsY;
                    temp = W_mac_32_32( temp, X_fx[x_idx], Y_fx[y_idx] ); // X_fx_e + Y_fx_e
                }
                /* Maximize accumulated value to 32-bit */
                temp_e = W_norm( temp );
                temp = W_shl( temp, temp_e );
                if ( 0 == temp )
                {
                    temp_e = prod_e;
                    move16();
                }
                *Zp_fx_e = sub( prod_e, temp_e );
                move16();
                ( *Zp_fx ) = W_extract_h( temp );
                move32();
                max_exp = s_max( max_exp, *Zp_fx_e ); // Find the max exp
                Zp_fx++;
                Zp_fx_e++;
            }
@@ -987,17 +1004,27 @@ Word16 matrix_product_mant_exp_fx(
        {
            FOR( i = 0; i < rowsX; ++i )
            {
                ( *Zp_fx ) = 0;
                move32();
                ( *Zp_fx_e ) = 0;
                move16();
                temp = 0;
                move64();
                FOR( k = 0; k < colsX; ++k )
                {
                    x_idx = add( i, imult1616( k, rowsX ) );
                    y_idx = add( j, imult1616( k, rowsY ) );
                    ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); /*Q31 - Zp_fx_e*/
                    move32();
                    x_idx = i + k * rowsX;
                    y_idx = j + k * rowsY;
                    temp = W_mac_32_32( temp, X_fx[x_idx], Y_fx[y_idx] ); // X_fx_e + Y_fx_e
                }
                /* Maximize accumulated value to 32-bit */
                temp_e = W_norm( temp );
                temp = W_shl( temp, temp_e );
                if ( 0 == temp )
                {
                    temp_e = prod_e;
                    move16();
                }
                *Zp_fx_e = sub( prod_e, temp_e );
                move16();
                ( *Zp_fx ) = W_extract_h( temp );
                move32();
                max_exp = s_max( max_exp, *Zp_fx_e ); // Find the max exp
                Zp_fx++;
                Zp_fx_e++;
            }
@@ -1017,18 +1044,27 @@ Word16 matrix_product_mant_exp_fx(
        {
            FOR( i = 0; i < colsX; ++i )
            {
                ( *Zp_fx ) = 0;
                move32();
                ( *Zp_fx_e ) = 0;
                move16();
                temp = 0;
                move64();
                FOR( k = 0; k < colsX; ++k )
                {
                    x_idx = add( k, imult1616( i, rowsX ) );
                    y_idx = add( j, imult1616( k, rowsY ) );
                    ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); /*Q31 - Zp_fx_e*/
                    move32();
                    x_idx = k + i * rowsX;
                    y_idx = j + k * rowsY;
                    temp = W_mac_32_32( temp, X_fx[x_idx], Y_fx[y_idx] ); // X_fx_e + Y_fx_e
                }

                /* Maximize accumulated value to 32-bit */
                temp_e = W_norm( temp );
                temp = W_shl( temp, temp_e );
                if ( 0 == temp )
                {
                    temp_e = prod_e;
                    move16();
                }
                *Zp_fx_e = sub( prod_e, temp_e );
                move16();
                ( *Zp_fx ) = W_extract_h( temp );
                move32();
                max_exp = s_max( max_exp, *Zp_fx_e ); // Find the max exp
                Zp_fx++;
                Zp_fx_e++;
            }
@@ -1049,17 +1085,26 @@ Word16 matrix_product_mant_exp_fx(
        {
            FOR( i = 0; i < rowsX; ++i )
            {
                ( *Zp_fx ) = 0;
                move32();
                ( *Zp_fx_e ) = 0;
                move16();
                temp = 0;
                move64();
                FOR( k = 0; k < colsX; ++k )
                {
                    x_idx = add( i, imult1616( k, rowsX ) );
                    y_idx = add( k, imult1616( j, rowsY ) );
                    ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); /*Q31 - Zp_fx_e*/
                    move32();
                    x_idx = i + k * rowsX;
                    y_idx = k + j * rowsY;
                    temp = W_mac_32_32( temp, X_fx[x_idx], Y_fx[y_idx] ); // X_fx_e + Y_fx_e
                }
                /* Maximize accumulated value to 32-bit */
                temp_e = W_norm( temp );
                temp = W_shl( temp, temp_e );
                if ( 0 == temp )
                {
                    temp_e = prod_e;
                }
                *Zp_fx_e = sub( prod_e, temp_e );
                move16();
                ( *Zp_fx ) = W_extract_h( temp );
                move32();
                max_exp = s_max( max_exp, *Zp_fx_e ); // Find the max exp
                Zp_fx++;
                Zp_fx_e++;
            }
@@ -1070,18 +1115,11 @@ Word16 matrix_product_mant_exp_fx(
        move16();
    }
    Zp_fx = Z_fx; /*Q31 - Zp_fx_e*/

    Zp_fx_e = out_e;
    Word16 max_exp = -31;
    move16();
    FOR( j = 0; j < row; ++j )
    {
        FOR( i = 0; i < col; ++i )
        {
            max_exp = s_max( max_exp, *Zp_fx_e );
            Zp_fx_e++;
        }
    }
    Zp_fx_e = out_e;


    *Z_fx_e = max_exp;
    move16();
    FOR( j = 0; j < row; ++j )
Loading