Merge remote-tracking branch 'remotes/origin/main' into 1319-stack-size-in-sba-decoder (6dfa00be) · Commits · IVAS Codec Public Collaboration / IVAS Codec

apps/decoder.c

+4 −0

Original line number	Diff line number	Diff line
		@@ -298,7 +298,11 @@ int main(
		if ( arg.hrtfReaderEnabled )
		{
		/* sanity check */
		#ifdef NONBE_1293_SR_HRTF
		if ( arg.outputConfig != IVAS_AUDIO_CONFIG_BINAURAL && arg.outputConfig != IVAS_AUDIO_CONFIG_BINAURAL_ROOM_IR && arg.outputConfig != IVAS_AUDIO_CONFIG_BINAURAL_ROOM_REVERB && arg.outputConfig != IVAS_AUDIO_CONFIG_BINAURAL_SPLIT_CODED && arg.outputConfig != IVAS_AUDIO_CONFIG_BINAURAL_SPLIT_PCM )
		#else
		if ( arg.outputConfig != IVAS_AUDIO_CONFIG_BINAURAL && arg.outputConfig != IVAS_AUDIO_CONFIG_BINAURAL_ROOM_IR && arg.outputConfig != IVAS_AUDIO_CONFIG_BINAURAL_ROOM_REVERB )
		#endif
		{
		arg.hrtfReaderEnabled = false;
		fprintf( stderr, "\nError: HRTF binary file cannot be used in this output configuration.\n\n" );

lib_com/options.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -174,6 +174,7 @@
		#define NONBE_1244_FIX_SWB_BWE_MEMORY /* VA: issue 1244: fix to SWB BWE memory in case of switching from FB coding - pending a review by Huawei */
		#define NONBE_1122_KEEP_EVS_MODE_UNCHANGED /* FhG: Disables fix for issue 1122 in EVS mode to keep BE tests green. This switch should be removed once the 1122 fix is added to EVS via a CR. */
		#define NONBE_1300_TDREND_LARGE_ITD /* Eri: issue 1300: There was a bug feeding 1.25 ms frames to the TD renderer, causing out-of-buffer access. This was resolved. However, it is still possible that modeled HRTF with large ITDs could trigger out-of-buffer access. This adds a check to prevent this.*/
		#define NONBE_1293_SR_HRTF /* VA: issue 1293: add support of external HRTFs in split rendering */

		/* ##################### End NON-BE switches ########################### */

scripts/pyaudio3dtools/audioarray.py

+134 −127

Original line number	Diff line number	Diff line
		@@ -306,38 +306,7 @@ def compare(

		framesize = fs // 50

		lengths_differ = ref.shape[0] != test.shape[0]

		test_orig = test.copy()
		ref_orig = ref.copy()

		if lengths_differ:
		if handle_differing_lengths == "fail":
		raise RuntimeError(
		f"Input signals have different lengths: ref - {ref.shape[0]}, test - {test.shape[0]}"
		)
		elif handle_differing_lengths == "cut":
		min_len = min(ref.shape[0], test.shape[0])
		ref = ref[:min_len, :]
		test = test[:min_len, :]
		elif handle_differing_lengths == "pad":
		max_len = max(ref.shape[0], test.shape[0])
		ref = np.pad(
		ref,
		((0, max_len - ref.shape[0]), (0, 0)),
		mode="constant",
		constant_values=0,
		)
		test = np.pad(
		test,
		((0, max_len - test.shape[0]), (0, 0)),
		mode="constant",
		constant_values=0,
		)

		diff = abs(test - ref)

		max_diff = int(diff.max())
		# Init result dict
		result = {
		"bitexact": True,
		"max_abs_diff": 0,
		@@ -360,52 +329,7 @@ def compare(
		result["nframes_diff"] = 0
		result["nframes_diff_percentage"] = 0.0

		if max_diff != 0:
		if diff.ndim == 1:
		nsamples_total = diff.shape
		nchannels = 1
		else:
		nsamples_total, nchannels = diff.shape
		max_diff_pos = np.nonzero(diff == max_diff)
		max_diff_pos = [
		max_diff_pos[0][0],
		max_diff_pos[0][0] // framesize,
		max_diff_pos[1][0],
		]

		first_diff_pos = np.nonzero(diff)
		first_diff_pos = [
		first_diff_pos[0][0],
		first_diff_pos[0][0] // framesize,
		first_diff_pos[1][0],
		]

		nsamples_diff = np.nonzero(diff)[0].size
		nsamples_diff_percentage = nsamples_diff / (nsamples_total * nchannels) * 100.0
		nframes = nsamples_total // framesize
		nframes_diff = 0

		result = {
		"bitexact": False,
		"max_abs_diff": max_diff,
		"max_abs_diff_pos_sample": max_diff_pos[0],
		"max_abs_diff_pos_channel": max_diff_pos[2],
		"nsamples_diff": nsamples_diff,
		"nsamples_diff_percentage": nsamples_diff_percentage,
		"first_diff_pos_sample": first_diff_pos[0],
		"first_diff_pos_channel": first_diff_pos[2],
		"first_diff_pos_frame": first_diff_pos[1],
		}

		if per_frame:
		for fr in range(nframes):
		diff_fr = diff[fr * framesize : ((fr + 1) * framesize), :]
		nframes_diff += 1 if diff_fr.nonzero()[0].size > 0 else 0
		nframes_diff_percentage = nframes_diff / nframes * 100.0
		result["max_abs_diff_pos_frame"] = max_diff_pos[1]
		result["nframes_diff"] = nframes_diff
		result["nframes_diff_percentage"] = nframes_diff_percentage

		# MLD (wav-diff) tool is run first, since it uses the input signals without length difference check for JBM test cases.
		if get_mld:

		def parse_wav_diff(proc: subprocess.CompletedProcess) -> float:
		@@ -428,7 +352,9 @@ def compare(
		)

		search_path = toolsdir.joinpath(curr_platform.replace("Windows", "Win32"))
		wdiff = search_path.joinpath("wav-diff").with_suffix(".exe" if curr_platform == "Windows" else "")
		wdiff = search_path.joinpath("wav-diff").with_suffix(
		".exe" if curr_platform == "Windows" else ""
		)

		if not wdiff.exists():
		wdiff = shutil.which("wav-diff")
		@@ -442,19 +368,21 @@ def compare(
		tmpfile_test = Path(tmpdir).joinpath("test.wav")

		### need to resample to 48kHz for MLD computation to be correct
		### write out and delete tmp variables to reduce memory usage
		if fs != 48000:
		ref_tmp = np.clip(
		resample(ref_orig.astype(float), fs, 48000), -32768, 32767
		)
		resample(ref.astype(float), fs, 48000), -32768, 32767
		).astype(np.int16)
		wavfile.write(str(tmpfile_ref), 48000, ref_tmp)
		del ref_tmp
		test_tmp = np.clip(
		resample(test_orig.astype(float), fs, 48000), -32768, 32767
		)
		resample(test.astype(float), fs, 48000), -32768, 32767
		).astype(np.int16)
		wavfile.write(str(tmpfile_test), 48000, test_tmp)
		del test_tmp
		else:
		ref_tmp = ref_orig.copy()
		test_tmp = test_orig.copy()

		wavfile.write(str(tmpfile_ref), 48000, ref_tmp.astype(np.int16))
		wavfile.write(str(tmpfile_test), 48000, test_tmp.astype(np.int16))
		wavfile.write(str(tmpfile_ref), 48000, ref.astype(np.int16))
		wavfile.write(str(tmpfile_test), 48000, test.astype(np.int16))

		cmd = [
		str(wdiff),
		@@ -479,6 +407,82 @@ def compare(

		result["MLD"] = mld_max

		# Run remanining tests after checking if the lenght differs

		lengths_differ = ref.shape[0] != test.shape[0]

		if lengths_differ:
		if handle_differing_lengths == "fail":
		raise RuntimeError(
		f"Input signals have different lengths: ref - {ref.shape[0]}, test - {test.shape[0]}"
		)
		elif handle_differing_lengths == "cut":
		min_len = min(ref.shape[0], test.shape[0])
		ref = ref[:min_len, :]
		test = test[:min_len, :]
		elif handle_differing_lengths == "pad":
		max_len = max(ref.shape[0], test.shape[0])
		ref = np.pad(
		ref,
		((0, max_len - ref.shape[0]), (0, 0)),
		mode="constant",
		constant_values=0,
		)
		test = np.pad(
		test,
		((0, max_len - test.shape[0]), (0, 0)),
		mode="constant",
		constant_values=0,
		)

		diff = abs(test - ref)

		max_diff = int(diff.max())

		if max_diff != 0:
		if diff.ndim == 1:
		nsamples_total = diff.shape
		nchannels = 1
		else:
		nsamples_total, nchannels = diff.shape
		max_diff_pos = np.nonzero(diff == max_diff)
		max_diff_pos = [
		max_diff_pos[0][0],
		max_diff_pos[0][0] // framesize,
		max_diff_pos[1][0],
		]

		first_diff_pos = np.nonzero(diff)
		first_diff_pos = [
		first_diff_pos[0][0],
		first_diff_pos[0][0] // framesize,
		first_diff_pos[1][0],
		]

		nsamples_diff = np.nonzero(diff)[0].size
		nsamples_diff_percentage = nsamples_diff / (nsamples_total * nchannels) * 100.0
		nframes = nsamples_total // framesize
		nframes_diff = 0

		result["bitexact"] = False
		result["max_abs_diff"] = max_diff
		result["max_abs_diff_pos_sample"] = max_diff_pos[0]
		result["max_abs_diff_pos_channel"] = max_diff_pos[2]
		result["nsamples_diff"] = nsamples_diff
		result["nsamples_diff_percentage"] = nsamples_diff_percentage
		result["first_diff_pos_sample"] = first_diff_pos[0]
		result["first_diff_pos_channel"] = first_diff_pos[2]
		result["first_diff_pos_frame"] = first_diff_pos[1]

		if per_frame:
		for fr in range(nframes):
		diff_fr = diff[fr * framesize : ((fr + 1) * framesize), :]
		nframes_diff += 1 if diff_fr.nonzero()[0].size > 0 else 0
		nframes_diff_percentage = nframes_diff / nframes * 100.0
		result["max_abs_diff_pos_frame"] = max_diff_pos[1]
		result["nframes_diff"] = nframes_diff
		result["nframes_diff_percentage"] = nframes_diff_percentage

		if get_ssnr:
		# length of segment is always 20ms
		len_seg = int(0.02 * fs)
		@@ -619,7 +623,9 @@ def limiter(x: np.ndarray, fs: int):
		fr_sig[idx_min] = -32768


		def get_framewise(x: np.ndarray, chunk_size: int, zero_pad=False) -> np.ndarray:
		def get_framewise(
		x: np.ndarray, chunk_size: int, zero_pad=False, scale_fac=1.0
		) -> np.ndarray:
		"""Generator to yield a signal frame by frame
		If array size is not a multiple of chunk_size, last frame contains the remainder

		@@ -631,6 +637,8 @@ def get_framewise(x: np.ndarray, chunk_size: int, zero_pad=False) -> np.ndarray:
		Size of frames to yield
		zero_pad: bool
		Whether to zero pad the last chunk if there are not enough samples
		scale_fac: float
		scale returned chunks with this factor

		Yields
		-------
		@@ -639,9 +647,9 @@ def get_framewise(x: np.ndarray, chunk_size: int, zero_pad=False) -> np.ndarray:
		"""
		n_frames = x.shape[0] // chunk_size
		for i in range(n_frames):
		yield x[i * chunk_size : (i + 1) * chunk_size, :]
		yield x[i * chunk_size : (i + 1) * chunk_size, :] * scale_fac
		if x.shape[0] % chunk_size:
		last_chunk = x[n_frames * chunk_size :, :]
		last_chunk = x[n_frames * chunk_size :, :] * scale_fac
		if zero_pad:
		yield np.pad(
		last_chunk, [[0, chunk_size - (x.shape[0] % chunk_size)], [0, 0]]
		@@ -675,29 +683,28 @@ def ssnr(
		"""
		Calculate Segmental SNR for test_sig to ref_sig as defined in ISO/IEC 14496-4
		"""
		ss = list()

		ref_sig_norm = ref_sig / -np.iinfo(np.int16).min
		test_sig_norm = test_sig / -np.iinfo(np.int16).min

		# check if diff of signal is zero already, then SNR is infinite, since no noise
		diff_sig_norm = ref_sig_norm - test_sig_norm
		if np.all(diff_sig_norm == 0):
		return np.asarray([np.inf] * ref_sig_norm.shape[1])
		signals_equal = (ref_sig == test_sig).all()
		if signals_equal:
		return np.asarray([np.inf] * ref_sig.shape[1])

		channels_identical_idx = np.sum(np.abs(diff_sig_norm), axis=0) == 0
		n_channels = ref_sig.shape[1]
		channels_identical_idx = np.asarray(
		[(ref_sig[:, c] == test_sig[:, c]).all() for c in range(n_channels)]
		)

		# iterate over test signal too to allow power comparison to threshold
		ss = list()
		denom_add = 10*-13 len_seg
		segment_counter = np.zeros(ref_sig.shape[1])

		# iterate over test signal too to allow power comparison to threshold
		for ref_seg, diff_seg, test_seg in zip(
		get_framewise(ref_sig_norm, len_seg, zero_pad=True),
		get_framewise(diff_sig_norm, len_seg, zero_pad=True),
		get_framewise(test_sig_norm, len_seg, zero_pad=True),
		# apply normalization factor on the chunks to avoid big reallocation of the whole signal
		norm_fac = 1 / -np.iinfo(np.int16).min
		for ref_seg, test_seg in zip(
		get_framewise(ref_sig, len_seg, zero_pad=True, scale_fac=norm_fac),
		get_framewise(test_sig, len_seg, zero_pad=True, scale_fac=norm_fac),
		):
		nrg_ref = np.sum(ref_seg**2, axis=0)
		nrg_diff = np.sum(diff_seg**2, axis=0)
		nrg_diff = np.sum((test_seg - ref_seg) ** 2, axis=0)

		ss_seg = np.log10(1 + nrg_ref / (denom_add + nrg_diff))

tests/constants.py

+1 −1

Original line number	Diff line number	Diff line
		@@ -22,7 +22,7 @@ DMX_MLD = "DMX_MLD"
		DMX_SSNR = "DMX_SSNR"

		# regex patterns for parsing the output from comparisons -> mainly for BASOP ci
		MLD_PATTERN = r"MLD: ([\d\.]*)"
		MLD_PATTERN = r"MLD: (\d+\.\d[eE][-+]\d*)"
		MAX_DIFF_PATTERN = r"MAX_ABS_DIFF: (\d*)"
		ODG_PATTERN_PQEVALAUDIO = r"Objective Difference Grade: (-\d\.\d*)"
		ODG_PATTERN = r"(?<!Delta-)ODG: (-\d\.\d*)"