Merge branch 'ci/refactor-histogram-creation' into kiene/tmp-branch-for-ltv-split-testing (6379031b) · Commits · IVAS Codec Public Collaboration / IVAS Codec

scripts/create_histogram_summary.py

deleted100644 → 0

+0 −184

Original line number	Diff line number	Diff line
		#!/usr/bin/env python3

		import argparse
		import math
		import numpy as np

		# These next three lines are added as a precaution in case the gitlab runner
		# needs DISPLAY to render the plots, even if they are written to file.
		import matplotlib

		matplotlib.use("Agg")
		import matplotlib.pyplot as plt
		import csv
		import os
		from parse_xml_report import IVAS_FORMATS, EVS_FORMATS, IVAS_CATEGORIES, EVS_CATEGORIES

		"""
		Parses a CSV report and creates a summary report.
		"""


		# Main routine
		if __name__ == "__main__":
		parser = argparse.ArgumentParser(
		description="Parses a CSV report and creates a summary report."
		)
		parser.add_argument(
		"csv_report",
		type=str,
		help="CSV report file of test cases, e.g. report.csv",
		)
		parser.add_argument(
		"csv_summary", type=str, help="Output CSV file, e.g. summary.csv"
		)
		parser.add_argument(
		"csv_image",
		type=str,
		nargs="?",
		help="Summary image file, e.g. summary.png",
		default=None,
		)
		parser.add_argument(
		"--measure",
		type=str,
		nargs=1,
		help="Measure, any of: MLD, DIFF, SSNR, ODG, default: MLD",
		default=["MLD"],
		)
		parser.add_argument(
		"--evs",
		action="store_true",
		help="Parse using EVS 26.444 formats",
		default=False,
		)
		parser.add_argument(
		"--diff",
		action="store_true",
		help="Use limits for diff scores",
		default=False,
		)
		args = parser.parse_args()
		csv_report = args.csv_report
		csv_summary = args.csv_summary
		csv_image = args.csv_image
		measure = args.measure[0]
		if args.evs:
		FORMATS = EVS_FORMATS
		CATEGORIES = EVS_CATEGORIES
		else:
		FORMATS = IVAS_FORMATS
		CATEGORIES = IVAS_CATEGORIES
		if args.diff:
		limits_per_measure = {
		"MLD": ("MLD", None),
		"DIFF": ("MAXIMUM ABS DIFF", None),
		"SSNR": ("MIN_SSNR", None),
		"ODG": ("MIN_ODG", None),
		"DELTA_ODG": ("DELTA_ODG", None),
		}
		else:
		limits_per_measure = {
		"MLD": ("MLD", [0, 1, 2, 3, 4, 5, 10, 20, math.inf]),
		"DIFF": (
		"MAXIMUM ABS DIFF",
		[0, 16, 256, 1024, 2048, 4096, 8192, 16384, 32769],
		),
		"SSNR": ("MIN_SSNR", [-math.inf, 0, 10, 20, 30, 40, 40, 50, 60, 100]),
		"ODG": (
		"MIN_ODG",
		[-5, -4, -3, -2, -1, -0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.5],
		),
		"DELTA_ODG": (
		"DELTA_ODG",
		[-5, -4, -3, -2, -1, -0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.5],
		),
		}
		(measure_label, limits) = limits_per_measure[measure]

		# Load CSV report
		results_sorted = {}
		with open(csv_report, "r") as fp:
		reader = csv.reader(fp, delimiter=";")
		header = next(reader)
		keys = header[1:]
		for row in reader:
		testcase = row[0]
		results_sorted[testcase] = {}
		for k, val in zip(keys, row[1:]):
		results_sorted[testcase][k] = val

		if limits is None:
		vals = [
		float(x)
		for x in [
		m[measure_label]
		for m in results_sorted.values()
		if m[measure_label] != "None" and m[measure_label] != ""
		]
		]
		start = min(vals)
		f = 10 ** (2 - int(np.floor(np.log10(abs(start)))) - 1)
		start = np.floor(start * f) / f
		step = (max(vals) - start) / 10
		f = 10 ** (2 - int(np.floor(np.log10(abs(step)))) - 1)
		step = np.ceil(step * f) / f
		limits = np.arange(start, 10 * step, step)

		# Output CSV file
		with open(csv_summary, "w") as fp:
		limits_labels = [f"{a:g}" for a in limits] + [
		"",
		"None",
		] # Put None cases in separate bin
		headerline = f"Format;Category;" + ";".join(limits_labels) + "\n"
		fp.write(headerline)

		for fmt in FORMATS:
		fig, ax = plt.subplots()
		bottom = np.zeros(len(limits_labels))
		for cat in CATEGORIES:
		values = [
		x
		for x in [
		m[measure_label]
		for m in results_sorted.values()
		if m["Format"] == fmt and m["Category"] == cat
		]
		]
		# Create separate bin for None (errors)
		val = [float(x) for x in values if x != "None" and x != ""]
		none = [sum([1 for x in values if x == "None" or x == ""])]
		hist, _ = np.histogram(val, limits)
		data = np.array(list(hist) + [0] + none + [0])

		# CSV output
		line = f"{fmt};{cat};{'; '.join(map(str,data))}\n"
		fp.write(line)

		# Matplotlib histogram
		ax.bar(
		limits_labels,
		data,
		1,
		align="edge",
		edgecolor="black",
		linewidth=0.5,
		label=cat,
		bottom=bottom,
		)
		bottom += data

		# Histogram layout
		ax.set_title(fmt)
		ax.legend(loc="best")
		ax.set_xlabel(measure_label)
		if "DIFF" in measure_label:
		ax.set_xticks(range(len(limits_labels)), limits_labels, rotation=35)
		ax.set_ylabel("Number of test cases")

		fig.set_figheight(4)
		fig.set_figwidth(6)
		if csv_image:
		base, ext = os.path.splitext(csv_image)
		plt.savefig(f"{base}_{fmt}{ext}")

scripts/create_histograms.py

0 → 100644

+141 −0

Original line number	Diff line number	Diff line
		#!/usr/bin/env python3

		import argparse
		import math
		import pathlib
		import pandas as pd
		import numpy as np
		import matplotlib.pyplot as plt
		from typing import List


		BINS_FOR_MEASURES = {
		"MLD": [0, 1, 2, 3, 4, 5, 10, 20, math.inf],
		"MAX_ABS_DIFF": [0, 16, 256, 1024, 2048, 4096, 8192, 16384, 32769],
		"MIN_SSNR": [-math.inf, 0, 10, 20, 30, 40, 40, 50, 60, 100],
		"MIN_ODG": [-5, -4, -3, -2, -1, -0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.5],
		"DELTA_ODG": [-5, -4, -3, -2, -1, -0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.5],
		}

		DEFAULT_MEASURES = ["MAX_ABS_DIFF", "MLD", "MIN_SSNR", "MIN_ODG"]


		def get_bins_for_diff(data: pd.Series):
		return np.linspace(data.min(), data.max(), num=10)


		def create_histograms(
		df: pd.DataFrame,
		measures: List[str],
		output_folder: pathlib.Path,
		display_only: bool,
		bins_for_measures=BINS_FOR_MEASURES,
		prefix="",
		):
		formats = df["format"].unique()
		categories = df["category"].unique()

		if not display_only:
		output_folder.mkdir(exist_ok=True, parents=True)

		for measure in measures:
		measure_in_df = prefix + measure
		bins = bins_for_measures.get(measure, get_bins_for_diff(df[measure_in_df]))
		x = [f"{x}" for x in bins] + ["", "ERROR"]
		for fmt in formats:
		fig, ax = plt.subplots()
		ax.xaxis.set_major_formatter("{x:.1f}")
		bottom = np.zeros(len(x))
		for cat in categories:
		data_mask = np.logical_and(df["format"] == fmt, df["category"] == cat)
		df_slice = df[data_mask]
		error_mask = df_slice["result"] == "ERROR"
		n_errors = np.sum(error_mask)
		df_hist = df_slice[np.logical_not(error_mask)]

		counts, _ = np.histogram(df_hist[measure_in_df], bins)

		data = np.concat([counts, [0], [n_errors], [0]])
		ax.bar(
		x,
		data,
		1,
		align="edge",
		edgecolor="black",
		linewidth=0.5,
		label=cat,
		bottom=bottom,
		)
		bottom += data

		# Histogram layout
		ax.set_title(fmt)
		ax.legend(loc="best")
		ax.set_xlabel(measure)
		if "DIFF" in measure:
		ax.set_xticks(range(len(x)), x, rotation=35)
		ax.set_ylabel("Number of test cases")

		fig.set_figheight(4)
		fig.set_figwidth(6)
		plt.tight_layout()

		if not display_only:
		image_file = f"histogram_{measure}_{fmt}.png"
		image_path = output_folder.joinpath(image_file)
		plt.savefig(image_path)

		if display_only:
		plt.show()


		if __name__ == "__main__":
		parser = argparse.ArgumentParser(
		description="Parses a csv file generated by parse_xml_report and creates histograms for the given measures."
		)
		parser.add_argument(
		"csv_report",
		type=str,
		help="CSV report file as generated by parse_xml_report.py",
		)
		parser.add_argument(
		"output_folder", type=pathlib.Path, help="Output folder for writing the "
		)
		parser.add_argument(
		"--display-only",
		action="store_true",
		help="Do not write the output files, but display the graphs instead.",
		)
		parser.add_argument(
		"--no-bins",
		action="store_true",
		help="""Do not use the hardcoded bins for creating the spectrograms.
		Use this for visualising diff scores.""",
		)
		allowed_measures = " ".join(BINS_FOR_MEASURES.keys())
		parser.add_argument(
		"--measures",
		nargs="+",
		default=DEFAULT_MEASURES,
		help=f"Measures to plot from the csv file. One of {allowed_measures}",
		)
		parser.add_argument(
		"--prefix",
		default="",
		help="Common suffix to use when collecting measures from the input csv file",
		)
		args = parser.parse_args()
		df = pd.read_csv(args.csv_report)

		bins_for_measures = BINS_FOR_MEASURES
		if args.no_bins:
		bins_for_measures = {}

		create_histograms(
		df,
		args.measures,
		args.output_folder,
		args.display_only,
		bins_for_measures,
		args.prefix,
		)

scripts/parse_xml_report.py

+121 −185

Original line number	Diff line number	Diff line
		#!/usr/bin/env python3

		import argparse
		import re
		import pandas as pd
		from xml.etree import ElementTree
		from collections import Counter


		SPLIT_STRING = "_split"
		WHOLE_STRING = "_whole"


		class TestcaseParser(dict):
		def __init__(self, testcases: list):
		super().__init__()

		for tc in testcases:
		self.parse_testcase(tc)

		def parse_testcase(self, testcase):
		"""
		Parse a junit report and create a summary report.
		Get all properties + name for a testcase
		"""

		PROPERTIES = ["MLD", "MAXIMUM ABS DIFF", "MIN_SSNR", "MIN_ODG"]

		IVAS_FORMATS = {
		"Stereo": r"stereo",
		"ISM": r"ISM",
		"Multichannel": r"Multi-channel\|MC",
		"MASA": r"(?<!O)MASA",
		"SBA": r"(?<!O)SBA",
		"OSBA": r"OSBA",
		"OMASA": r"OMASA",
		"Renderer": r"renderer",
		}
		filename = testcase.get(
		"file", testcase.get("classname").replace(".", "/") + ".py"
		)
		fulltestname = filename + "::" + testcase.get("name")

		EVS_FORMATS = {
		"AMRWBIO_dec": r"Readme_AMRWB_IO_dec",
		"AMRWBIO_enc": r"Readme_AMRWB_IO_enc",
		"EVS_dec": r"Readme_EVS_dec",
		"EVS_enc": r"Readme_EVS_enc",
		"EVS_JBM_dec": r"Readme_JBM_dec",
		result = get_result_from_testcase(testcase)
		# for ERRORS, two testcases are recorded, one with FAIL and one with ERROR
		# if we already have this testcase, do a sanity check and set result to ERROR
		if fulltestname in self:
		results = [self[fulltestname]["result"], result]
		assert any(r == "ERROR" for r in results)
		self[fulltestname]["result"] = "ERROR"
		return

		ret = {}
		ret["testcase"] = fulltestname
		ret["result"] = result
		properties = {
		p.get("name"): p.get("value") for p in testcase.findall(".//property")
		}

		NO_FORMATS = {"Default": r".*"}
		### handle split comparison results
		split_props = {k: v for k, v in properties.items() if SPLIT_STRING in k}
		whole_props = {k: v for k, v in properties.items() if WHOLE_STRING in k}

		IVAS_CATEGORIES = {
		"Normal operation": r".*",
		"DTX": r"DTX",
		"PLC": r"%",
		"Bitrate switching": r"br sw\|bitrate switching",
		"JBM": r"JBM",
		}
		if len(split_props) > 0 and len(whole_props) > 0:
		measures_from_split = set(
		[m.split(SPLIT_STRING)[0] for m in split_props.keys()]
		)
		measures_from_whole = set(
		[m.split(WHOLE_STRING)[0] for m in whole_props.keys()]
		)
		assert measures_from_split == measures_from_whole
		measures = measures_from_whole

		# collect existing split suffixes by evaluating one of the measures only
		m_tmp = measures.pop()
		splits = sorted(
		[
		k.split(SPLIT_STRING)[-1]
		for k in split_props.keys()
		if k.startswith(m_tmp)
		]
		)

		EVS_CATEGORIES = {
		"Normal operation": r".*",
		"DTX": r"DTX",
		"PLC": r"b10\|f06\|EPF",
		"Bitrate switching": r"sw",
		"JBM": r"JBM",
		# record each split under a separate key
		# the dict per key has the same fulltestname and an additional key "split"
		# this way, the resulting DataFrame in the end can be split by testnames
		for s in splits:
		split_key = f"{fulltestname} - {s}"
		ret_split = {"testcase": fulltestname, "split": s}
		for m in measures:
		ret_split.update({m: split_props[m + SPLIT_STRING + f"{s}"]})
		self[split_key] = ret_split

		# it can be the case that there are no splits defined in the pytest suite, e.g. for the renderer
		# then, there are only "_whole" values recorded where we only need to remove the suffix
		# this if also handles the split case - if there are splits, there was also a "_whole" comparison done
		if len(whole_props) > 0:
		properties = {
		k.replace(WHOLE_STRING, ""): v for k, v in whole_props.items()
		}
		properties["split"] = "whole"

		NO_CATEGORIES = {"N/A": r".*"}
		ret.update(properties)
		self[fulltestname] = ret

		def to_df(self) -> pd.DataFrame:
		testcases = list(self.values())
		df = pd.DataFrame(testcases)
		return df

		def get_format_from_fulltestname(fulltestname: str) -> str:
		# For the format, favor the earliest match in the test case name
		fmt = min(
		[
		(f, re.search(FORMATS[f], fulltestname, re.IGNORECASE).end())
		for f in FORMATS
		if re.search(FORMATS[f], fulltestname, re.IGNORECASE) is not None
		],
		key=lambda x: x[1],
		)[0]
		return fmt

		def xml_to_dataframe(xml_report: str) -> pd.DataFrame:
		tree = ElementTree.parse(xml_report)
		root = tree.getroot()

		testcases = root[0].findall("testcase")
		testcases = [tc for tc in testcases if tc.find("skipped") is None]

		testcase_parser = TestcaseParser(testcases)
		testcase_df = testcase_parser.to_df()

		def get_category_from_fulltestname(fulltestname: str) -> str:
		cat = [
		c for c in CATEGORIES if re.search(CATEGORIES[c], fulltestname, re.IGNORECASE)
		][-1]
		return cat
		return testcase_df


		def get_testresult(testcase: ElementTree.Element) -> str:
		def get_result_from_testcase(testcase: ElementTree.Element) -> str:
		if testcase.find("failure") is not None:
		testresult = "FAIL"
		elif testcase.find("error") is not None:
		@@ -81,10 +119,21 @@ def get_testresult(testcase: ElementTree.Element) -> str:
		return testresult


		# Main routine
		def main(xml_report, csv_file):
		df = xml_to_dataframe(xml_report)
		df.to_csv(csv_file, index=False)

		n_testcases = len(df)
		count = Counter(df["result"])

		print(
		f"Parsed testsuite with {n_testcases} tests: {count['PASS']} passes, {count['FAIL']} failures and {count['ERROR']} errors."
		)


		if __name__ == "__main__":
		parser = argparse.ArgumentParser(
		description="Parse a junit report and create an MLD summary report."
		description="Parse junit report from IVAS pytest suite and convert to csv file"
		)
		parser.add_argument(
		"xml_report",
		@@ -92,119 +141,6 @@ if __name__ == "__main__":
		help="XML junit report input file, e.g. report-junit.xml",
		)
		parser.add_argument("csv_file", type=str, help="Output CSV file, e.g. report.csv")
		parser.add_argument(
		"--evs",
		action="store_true",
		help="Parse using EVS 26.444 formats",
		)
		parser.add_argument(
		"--clipping",
		action="store_true",
		help="Extract clipping information. Available if encoder has been run with DEBUGGING active.",
		)
		parser.add_argument(
		"--delta_odg",
		action="store_true",
		help="Extract Delta ODG information.",
		)
		parser.add_argument(
		"--skip_formats",
		action="store_true",
		help="Parse without formats and categories. Suitable for general tests which do not match the IVAS categories.",
		)
		args = parser.parse_args()
		xml_report = args.xml_report
		csv_file = args.csv_file
		FORMATS = IVAS_FORMATS
		CATEGORIES = IVAS_CATEGORIES
		if args.evs:
		FORMATS = EVS_FORMATS
		CATEGORIES = EVS_CATEGORIES
		else:
		FORMATS = IVAS_FORMATS
		CATEGORIES = IVAS_CATEGORIES
		if args.clipping:
		PROPERTIES += ["ENC_CORE_OVL", "MAX_OVL", "MIN_OVL"]
		if args.delta_odg:
		PROPERTIES += ["DELTA_ODG"]
		if args.skip_formats:
		FORMATS = NO_FORMATS
		CATEGORIES = NO_CATEGORIES

		tree = ElementTree.parse(xml_report)

		testsuite = tree.find(".//testsuite")
		testcases = tree.findall(".//testcase")

		# Prepare result structure
		results = {}
		for fmt in FORMATS:
		results[fmt] = {}
		for cat in CATEGORIES:
		results[fmt][cat] = {}
		count = {"PASS": 0, "FAIL": 0, "ERROR": 0}

		# filter out skipped testcases
		testcases = [tc for tc in testcases if tc.find(".//skipped") is None]

		for testcase in testcases:
		filename = testcase.get(
		"file", testcase.get("classname").replace(".", "/") + ".py"
		)
		fulltestname = filename + "::" + testcase.get("name")

		# only include the properties listed above
		# we need to find all occurences with any suffixes to also handle the split-comparison
		# runs correctly
		properties_found = {
		p.get("name"): p.get("value")
		for p in testcase.findall(".//property")
		if "CHANNEL" not in p.get("name")
		and any(p_listed in p.get("name") for p_listed in PROPERTIES)
		}

		# Identify format and category (mode of operation)
		# For the format, favor the earliest match in the test case name
		fmt = get_format_from_fulltestname(fulltestname)
		# Note that only one category is selected, even though several may match, e.g. bitrate switching + JBM. Here the last match is picked.
		cat = get_category_from_fulltestname(fulltestname)

		testresult = get_testresult(testcase)

		# get all present suffixes
		pattern = re.compile("\|".join(PROPERTIES))
		suffixes = set(pattern.sub("", p) for p in properties_found)

		# record the result for all suffixes
		# For ERROR cases, both a FAIL and an ERROR result is generated.
		# Here, a FAIL would be overwritten with an ERROR result since it has the same name.
		for s in suffixes:
		fulltestname_suffix = f"{fulltestname}{s}"
		results[fmt][cat][fulltestname_suffix] = {"Result": testresult}
		for propertyname in PROPERTIES:
		results[fmt][cat][fulltestname_suffix][propertyname] = properties_found[
		f"{propertyname}{s}"
		]
		count[testresult] += 1

		header = ["testcase", "Format", "Category", "Result"] + PROPERTIES

		# Write CSV file
		with open(csv_file, "w") as outfile:
		headerline = ";".join(header) + "\n"
		outfile.write(headerline)
		for fmt in FORMATS:
		for cat in CATEGORIES:
		results[fmt][cat] = dict(sorted(results[fmt][cat].items()))
		for test in results[fmt][cat]:
		line = (
		";".join(
		[test, fmt, cat] + list(results[fmt][cat][test].values())
		)
		+ "\n"
		)
		outfile.write(line)

		print(
		f"Parsed testsuite with {count['PASS']+count['FAIL']+count['ERROR']} tests: {count['PASS']} passes, {count['FAIL']} failures and {count['ERROR']} errors."
		)
		args = parser.parse_args()
		main(args.xml_report, args.csv_file)

tests/codec_be_on_mr_nonselection/test_param_file.py

+28 −5

Original line number	Diff line number	Diff line
		@@ -54,6 +54,7 @@ from tests.conftest import (
		compare_dmx_signals,
		log_dbg_msg,
		get_split_idx,
		get_format_from_enc_opts,
		)
		from tests.testconfig import PARAM_FILE
		from tests.constants import (
		@@ -61,6 +62,11 @@ from tests.constants import (
		MAX_ENC_STATS_DIFF,
		SCRIPTS_DIR,
		MAX_ENC_DIFF,
		CAT_NORMAL,
		CAT_DTX,
		CAT_BITRATE_SWITCHING,
		CAT_JBM,
		CAT_PLC,
		)
		from tests.renderer.utils import check_and_makedir, binauralize_input_and_output

		@@ -298,6 +304,10 @@ def run_test(
		"All non-passthrough modes are skipped when --compare-to-input is set"
		)

		testcase_props = {}
		testcase_props["format"] = get_format_from_enc_opts(enc_opts)
		testcase_props["category"] = CAT_NORMAL

		tag_str = convert_test_string_to_tag(test_tag)

		# evaluate encoder options
		@@ -317,6 +327,9 @@ def run_test(
		bitrate = enc_split.pop()
		in_sr = sampling_rate

		if "-dtx" in enc_opts:
		testcase_props["category"] = CAT_DTX

		# bitrate can be a filename: change it to an absolute path
		if not bitrate.isdigit():
		if compare_enc_dmx:
		@@ -324,11 +337,20 @@ def run_test(
		"Rate switching + --compare_enc_dmx currently skipped due to DEBUGGING code limitations with varying number of transport channels"
		)
		bitrate = Path(bitrate[3:]).absolute()
		testcase_props["category"] = CAT_BITRATE_SWITCHING

		testv_base = testv_file.split("/")[-1]
		if testv_base.endswith(".pcm"):
		testv_base = testv_base[:-4]

		if sim_opts != "":
		testcase_props["category"] = CAT_JBM
		if eid_opts != "":
		testcase_props["category"] = CAT_PLC

		for k, v in testcase_props.items():
		dut_encoder_frontend.record_property(k, v)

		assert bitstream_file == "bit"
		# in the parameter file, only "bit" is used as bitstream file name
		# -> construct bitstream filename
		@@ -375,8 +397,8 @@ def run_test(

		# avoid double recording of the encoder diff
		if encoder_only:
		props = parse_properties(cmp_result_msg, False, [MAX_ENC_DIFF])
		for k, v in props.items():
		result_props = parse_properties(cmp_result_msg, False, [MAX_ENC_DIFF])
		for k, v in result_props.items():
		dut_encoder_frontend.record_property(k, v)

		if encoder_only:
		@@ -417,7 +439,6 @@ def run_test(
		)

		# check for eid-xor command line

		if eid_opts != "":
		eid_split = eid_opts.split()
		assert len(eid_split) >= 3, "eid-xor expects at least 3 parameters"
		@@ -654,8 +675,10 @@ def run_test(
		for output_differs, reason, suffix in zip(
		output_differs_parts, reason_parts, prop_suffix
		):
		props = parse_properties(reason, output_differs, props_to_record, suffix)
		for k, v in props.items():
		result_props = parse_properties(
		reason, output_differs, props_to_record, suffix
		)
		for k, v in result_props.items():
		dut_decoder_frontend.record_property(k, v)

		metadata_differs = False

tests/codec_be_on_mr_nonselection/test_sba.py

+54 −1

File changed.

Preview size limit exceeded, changes collapsed.