diff --git a/lib_enc/ivas_sns_enc_fx.c b/lib_enc/ivas_sns_enc_fx.c index dfd207c3e4d30c8841684d661401349efb8553a1..22cd98cad50bd82142bd6e1ff364be63b330d256 100644 --- a/lib_enc/ivas_sns_enc_fx.c +++ b/lib_enc/ivas_sns_enc_fx.c @@ -59,115 +59,215 @@ static Word16 sns_1st_cod_fx( Word32 *snsq_fx /* o : quantized sns Q16 */ ) { - Word16 index, i; - const Word16 split_len = M / 2; - move16(); - const Word16 *means; - const Word16 means_fix = 2; // Q15 - move16(); - /* remove means */ - means = NULL; - SWITCH( L_frame ) - { - case L_FRAME16k: - means = &sns_1st_means_16k[core - 1][0]; - break; - case L_FRAME25_6k: - means = &sns_1st_means_25k6[core - 1][0]; - break; - case L_FRAME32k: - means = &sns_1st_means_32k[core - 1][0]; - break; - default: - assert( !"illegal frame length in sns_1st_cod" ); - } - Word16 exp_snsq_buffer[M] = { 0 }, exp_snsq = 0; - move16(); - move16(); - FOR( i = 0; i < M; ++i ) + IF( exp_sns == Q15 ) { - Word32 tmp = L_mult( means[i], means_fix ); // Q16 - exp_snsq_buffer[i] = 0; + Word16 index; + const Word16 split_len = M / 2; move16(); - snsq_fx[i] = BASOP_Util_Add_Mant32Exp( sns_fx[i], exp_sns, L_negate( tmp ), 15, &exp_snsq_buffer[i] ); - move32(); - } - FOR( i = 0; i < M; i++ ) - { - exp_snsq = s_max( exp_snsq_buffer[i], exp_snsq ); - } - FOR( i = 0; i < M; i++ ) - { - snsq_fx[i] = L_shr( snsq_fx[i], exp_snsq - exp_snsq_buffer[i] ); - move32(); - } + const Word16 *means; + const Word16 means_fix = 2; // Q15 + move16(); + /* remove means */ + means = NULL; + SWITCH( L_frame ) + { + case L_FRAME16k: + means = &sns_1st_means_16k[core - 1][0]; // Q14 + break; + case L_FRAME25_6k: + means = &sns_1st_means_25k6[core - 1][0]; // Q14 + break; + case L_FRAME32k: + means = &sns_1st_means_32k[core - 1][0]; // Q14 + break; + default: + assert( !"illegal frame length in sns_1st_cod" ); + } + FOR( Word16 i = 0; i < M; ++i ) + { + Word32 tmp = L_mult( means[i], means_fix ); // Q14->Q16 + snsq_fx[i] = L_sub( sns_fx[i], tmp ); // Q16 + move32(); + } - index = 0; - move16(); - FOR( Word16 split = 0; split < 2; ++split ) - { - const Word16 *cdbk_ptr; - Word16 j0, j1, index_split; - Word32 dist_min_fx; - const Word16 cdbk_fix = 8; // 1.f / powf( 2, SNS_CDBKS_BITS_4_FRAC ) in Q15 + index = 0; move16(); - const Word16 *const cdbk = &sns_1st_cdbk[split][core - 1][0]; + FOR( Word16 split = 0; split < 2; ++split ) + { + const Word16 *cdbk_ptr; + Word16 j0, j1; + Word16 index_split; + Word32 dist_min_fx; + const Word16 cdbk_fix = 8; // 1.f / powf( 2, SNS_CDBKS_BITS_4_FRAC ) in Q15 + move16(); + const Word16 *const cdbk = &sns_1st_cdbk[split][core - 1][0]; // Q12 - j0 = imult1616( split, split_len ); - j1 = add( j0, split_len ); + j0 = imult1616( split, split_len ); + j1 = add( j0, split_len ); - cdbk_ptr = cdbk; - dist_min_fx = MAXVAL_WORD32; - Word16 exp_dist_min = 31; - index_split = 0; - FOR( i = 0; i < 32; ++i ) - { - Word32 dist_fx = 0; + cdbk_ptr = cdbk; + dist_min_fx = MAXVAL_WORD32; + index_split = 0; move32(); - Word16 exp_dist = 0; move16(); - FOR( Word16 j = j0; j < j1; ++j ) + FOR( Word16 i = 0; i < 32; ++i ) { - Word32 tmp_fx; - Word16 exp_tmp = 0; - move16(); - Word32 tmp_1 = L_mult( ( *cdbk_ptr++ ), cdbk_fix ); // Q16 - tmp_fx = BASOP_Util_Add_Mant32Exp( snsq_fx[j], exp_snsq, L_negate( tmp_1 ), 15, &exp_tmp ); - Word32 tmp_2 = Mpy_32_32( tmp_fx, tmp_fx ); // exp_tmp*2 - dist_fx = BASOP_Util_Add_Mant32Exp( dist_fx, exp_dist, tmp_2, exp_tmp * 2, &exp_dist ); // exp_tmp*2 + Word32 dist_fx = 0; + move32(); + FOR( Word16 j = j0; j < j1; ++j ) // j1-j0=split_len. split_len=M/2. M=16 + { + Word32 tmp; + Word32 dist; + + tmp = L_mult( *cdbk_ptr++, cdbk_fix ); // Q12->Q16 + dist = L_sub( snsq_fx[j], tmp ); // Q16 + dist = L_shl( dist, 11 ); // cdbk_ptr is a 16 bit LUT with 3.12 values, used as 3.16. assumption: snsq_fx has the same representation. thus, the subtraction results are in 4.16, which leaves 11 bit headroom. + dist = Mpy_32_32( dist, dist ); + dist = L_shr( dist, 3 ); // make sure that the sum of 8 values does not overflow + dist_fx = L_add( dist_fx, dist ); + } + + IF( LT_32( dist_fx, dist_min_fx ) ) + { + dist_min_fx = dist_fx; + move32(); + index_split = i; + move16(); + } } - IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( dist_fx, exp_dist, dist_min_fx, exp_dist_min ), -1 ) ) + /* set quantized vector */ + cdbk_ptr = &cdbk[imult1616( index_split, split_len )]; + FOR( Word16 j = j0; j < j1; ++j ) { - dist_min_fx = dist_fx; + Word32 tmp_3 = L_mult( means[j], means_fix ); // Q16 + Word32 tmp_4 = L_mult( *cdbk_ptr++, cdbk_fix ); // Q16 + snsq_fx[j] = L_add( tmp_4, tmp_3 ); // Q16 move32(); - exp_dist_min = exp_dist; - move16(); - index_split = i; - move16(); } - } - /* set quantized vector */ - cdbk_ptr = &cdbk[imult1616( index_split, split_len )]; - FOR( Word16 j = j0; j < j1; ++j ) + /* for second split shift by five bits to store both indices as one 10 bit value */ + if ( EQ_16( split, 1 ) ) + { + index_split = shl( index_split, 5 ); + } + + index = add( index, index_split ); + } + return index; + } + ELSE + { + Word16 index, i; + const Word16 split_len = M / 2; + move16(); + const Word16 *means; + const Word16 means_fix = 2; // Q15 + move16(); + /* remove means */ + means = NULL; + SWITCH( L_frame ) + { + case L_FRAME16k: + means = &sns_1st_means_16k[core - 1][0]; + break; + case L_FRAME25_6k: + means = &sns_1st_means_25k6[core - 1][0]; + break; + case L_FRAME32k: + means = &sns_1st_means_32k[core - 1][0]; + break; + default: + assert( !"illegal frame length in sns_1st_cod" ); + } + Word16 exp_snsq_buffer[M] = { 0 }, exp_snsq = 0; + move16(); + move16(); + FOR( i = 0; i < M; ++i ) { - Word32 tmp_3 = L_mult( means[j], means_fix ); // Q16 - Word32 tmp_4 = L_mult( ( *cdbk_ptr++ ), cdbk_fix ); // Q16 - snsq_fx[j] = L_add( tmp_4, tmp_3 ); // Q16 + Word32 tmp = L_mult( means[i], means_fix ); // Q16 + exp_snsq_buffer[i] = 0; + move16(); + snsq_fx[i] = BASOP_Util_Add_Mant32Exp( sns_fx[i], exp_sns, L_negate( tmp ), 15, &exp_snsq_buffer[i] ); + move32(); + } + FOR( i = 0; i < M; i++ ) + { + exp_snsq = s_max( exp_snsq_buffer[i], exp_snsq ); + } + FOR( i = 0; i < M; i++ ) + { + snsq_fx[i] = L_shr( snsq_fx[i], exp_snsq - exp_snsq_buffer[i] ); move32(); } - /* for second split shift by five bits to store both indices as one 10 bit value */ - IF( EQ_16( split, 1 ) ) + index = 0; + move16(); + FOR( Word16 split = 0; split < 2; ++split ) { - index_split = shl( index_split, 5 ); + const Word16 *cdbk_ptr; + Word16 j0, j1, index_split; + Word32 dist_min_fx; + const Word16 cdbk_fix = 8; // 1.f / powf( 2, SNS_CDBKS_BITS_4_FRAC ) in Q15 + move16(); + const Word16 *const cdbk = &sns_1st_cdbk[split][core - 1][0]; + + j0 = imult1616( split, split_len ); + j1 = add( j0, split_len ); + + cdbk_ptr = cdbk; + dist_min_fx = MAXVAL_WORD32; + Word16 exp_dist_min = 31; + index_split = 0; + FOR( i = 0; i < 32; ++i ) + { + Word32 dist_fx = 0; + move32(); + Word16 exp_dist = 0; + move16(); + FOR( Word16 j = j0; j < j1; ++j ) + { + Word32 tmp_fx; + Word16 exp_tmp = 0; + move16(); + Word32 tmp_1 = L_mult( ( *cdbk_ptr++ ), cdbk_fix ); // Q16 + tmp_fx = BASOP_Util_Add_Mant32Exp( snsq_fx[j], exp_snsq, L_negate( tmp_1 ), 15, &exp_tmp ); + Word32 tmp_2 = Mpy_32_32( tmp_fx, tmp_fx ); // exp_tmp*2 + dist_fx = BASOP_Util_Add_Mant32Exp( dist_fx, exp_dist, tmp_2, exp_tmp * 2, &exp_dist ); // exp_tmp*2 + } + + IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( dist_fx, exp_dist, dist_min_fx, exp_dist_min ), -1 ) ) + { + dist_min_fx = dist_fx; + move32(); + exp_dist_min = exp_dist; + move16(); + index_split = i; + move16(); + } + } + + /* set quantized vector */ + cdbk_ptr = &cdbk[imult1616( index_split, split_len )]; + FOR( Word16 j = j0; j < j1; ++j ) + { + Word32 tmp_3 = L_mult( means[j], means_fix ); // Q16 + Word32 tmp_4 = L_mult( ( *cdbk_ptr++ ), cdbk_fix ); // Q16 + snsq_fx[j] = L_add( tmp_4, tmp_3 ); // Q16 + move32(); + } + + /* for second split shift by five bits to store both indices as one 10 bit value */ + IF( EQ_16( split, 1 ) ) + { + index_split = shl( index_split, 5 ); + } + + index = add( index, index_split ); } - index = add( index, index_split ); + return index; } - - return index; } /*-------------------------------------------------------------------