Merge branch '20250131-update-diff-complexity' into 'main' (e0a9007c) · Commits · IVAS Codec Public Collaboration / IVAS Codec

scripts/diff_complexity.py

+245 −137

Original line number	Diff line number	Diff line
		@@ -28,6 +28,13 @@ submitted to and settled by the final, binding jurisdiction of the courts of Mun
		accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
		the United Nations Convention on Contracts on the International Sales of Goods.
		"""
		import argparse
		import re
		from io import StringIO
		from pathlib import Path
		from shutil import get_terminal_size

		import pandas as pd

		"""
		Script to diff IVAS logs produced by WMC tool instrumented binaries
		@@ -38,47 +45,65 @@ the United Nations Convention on Contracts on the International Sales of Goods.
		This allows:
		cdiff <BSL> <CUT>
		"""
		import argparse
		import re
		from io import StringIO
		from shutil import get_terminal_size

		import pandas as pd

		REGEX_WMOPS_TABLE = r"\s?\w+(\s+\w+\.\w+)(\s+\w+\.\w+){3,6}"
		REGEX_ROM = (
		r"((\w+\s+\w+)\|(\w+\s+\w+\s+)\(.+\))\s?size\s+\(.+\/(\w+)\/.+\)\:\s(\d+)\s+\w+"
		)
		# without WMC_AUTO, can collide with manual instrumentation
		# REGEX_WMOPS_TABLE = r"(\w+)(?:\[WMC_AUTO\])?((\s+\d+\.\d+){4,})"
		REGEX_WMOPS_TABLE = r"(\w+(\[WMC_AUTO\])?)((\s+\d+\.\d+){4,})"
		REGEX_ROM = r"(\w+\s+ROM.+)size.+\/([\w\_]+)\/.+:\s(\d+)"
		REGEX_MAX_MEM = r"(Maximum\s+.+)\s+size\:\s+(\d+)"

		SUMMARY_COL_NAMES = ["BSL", "CUT", "CUT - BSL"]

		SORT_DICT = {
		"min": "WMOPs min",
		"avg": "WMOPs avg",
		"max": "WMOPs max",
		"cmin": "WMOPs(cum) min",
		"cavg": "WMOPs(cum) avg",
		"cmax": "WMOPs(cum) max",
		"calls": "Calls",
		}

		PD_STRING_KWARGS = {
		"index": False,
		"index": True,
		"justify": "center",
		"max_colwidth": 30,
		}

		NOCOLOUR = "\x1b[0m"
		RED = "\x1b[31m"
		GREEN = "\x1b[32m"
		BLUE = "\x1b[34m"

		ROUTINE_NAME_MAP = {"_ivas_fx": "", "_fx": "", "_w32_x": ""}


		def log2df(log_file):
		def sanitize_routine_names(df):
		# apply the mapping to remove or change routine names
		for k, v in ROUTINE_NAME_MAP.items():
		df["Routine"] = df["Routine"].str.replace(k, v)
		return df


		def log2df(log_file, rename=False):
		"""
		Parse a WMC tool logfile to a pandas dataframe
		"""
		with open(log_file, "r") as log:
		logfile = "".join(line for line in log)

		# apply regexes
		wmops = [
		re.sub(r"\s+", ",", w.group().strip())
		re.sub(r"\s+", ",", w.expand(r"\1\3"))
		for w in re.finditer(REGEX_WMOPS_TABLE, logfile)
		]
		memory = [m.expand(r"\1 (\4), \5") for m in re.finditer(REGEX_ROM, logfile)]
		memory = [m.expand(r"\1(\2),\3") for m in re.finditer(REGEX_ROM, logfile)]
		memory.extend([m.expand(r"\1,\2") for m in re.finditer(REGEX_MAX_MEM, logfile)])

		if not wmops or not memory:
		raise ValueError(f"Error parsing {log_file}!")

		# convert to dataframe
		wmops = pd.read_csv(
		StringIO("\n".join(wmops)),
		header=None,
		@@ -94,26 +119,47 @@ def log2df(log_file):
		],
		)
		memory = pd.read_csv(
		StringIO("\n".join(memory)), header=None, names=["Type", "Words"]
		)
		StringIO("\n".join(memory)), header=None, names=["Type", "Bytes"]
		).set_index("Type")
		memory["Bytes"] = memory["Bytes"].astype("int")

		# sanitize names
		if rename:
		wmops = sanitize_routine_names(wmops)

		return wmops, memory


		def main(bsl, cut, out_file, quiet=False, verbose=False):
		if not quiet:
		print(GREEN + f"Baseline conditon: {bsl}" + NOCOLOUR)
		print(RED + f"Condition under test: {cut}" + NOCOLOUR)
		def diff_wmops(bsl, cut):
		# get total values
		def get_tot(df):
		return df[df["Routine"] == "total"].set_index("Routine").iloc[:, 1:4]

		bsl_wmops, bsl_mem = log2df(bsl)
		cut_wmops, cut_mem = log2df(cut)
		bsl_wmops_tot = get_tot(bsl)
		cut_wmops_tot = get_tot(cut)

		if verbose:
		PD_STRING_KWARGS["line_width"] = get_terminal_size()[0]
		# build the wmops and memory tables
		table_wmops = pd.concat(
		[bsl_wmops_tot, cut_wmops_tot, cut_wmops_tot - bsl_wmops_tot]
		).T
		table_wmops.columns = SUMMARY_COL_NAMES

		return table_wmops


		def diff_mem(bsl, cut):
		table_mem = pd.concat([bsl, cut, cut - bsl], axis=1)
		table_mem.columns = SUMMARY_COL_NAMES

		return table_mem


		def diff_routines(bsl, cut):
		# outer merge on routines, only identical rows are tagged "BOTH"
		merge = (
		pd.merge(
		cut_wmops.set_index("Routine").drop("total").reset_index(),
		bsl_wmops.set_index("Routine").drop("total").reset_index(),
		cut.set_index("Routine").drop("total").reset_index(),
		bsl.set_index("Routine").drop("total").reset_index(),
		how="outer",
		indicator="Source",
		)
		@@ -122,95 +168,142 @@ def main(bsl, cut, out_file, quiet=False, verbose=False):
		)
		merge.index = merge.index.rename_categories(
		{
		"left_only": RED + "CUT",
		"right_only": GREEN + "BSL",
		"both": BLUE + "BOTH",
		"left_only": "CUT",
		"right_only": "BSL",
		"both": "BOTH",
		}
		)

		unique = (
		merge.drop(BLUE + "BOTH", errors="ignore")
		# split into differing and identical routines
		diff = (
		merge.drop("BOTH", errors="ignore")
		.reset_index()
		.sort_values(["Routine", "Source"], ascending=[True, False])
		)
		common = (
		merge.drop(GREEN + "BSL", errors="ignore")
		.drop(RED + "CUT", errors="ignore")
		same = (
		merge.drop("BSL", errors="ignore")
		.drop("CUT", errors="ignore")
		.reset_index()
		.sort_values("Routine", ascending=False)
		)

		if not unique.empty:
		print(
		"Complexity difference of routines".center(
		PD_STRING_KWARGS["line_width"], "-"
		)
		)
		print(unique.to_string(**PD_STRING_KWARGS) + NOCOLOUR)
		# get the intersection of the routines so we can calculate the diff
		bsl = diff[diff["Source"] == "BSL"].drop(columns="Source").set_index("Routine")
		cut = diff[diff["Source"] == "CUT"].drop(columns="Source").set_index("Routine")
		overlaps = bsl.index.intersection(cut.index)

		if not common.empty:
		print(
		"Routines with no differences".center(
		PD_STRING_KWARGS["line_width"], "-"
		)
		)
		print(common.to_string(**PD_STRING_KWARGS) + NOCOLOUR)
		else:
		print(
		"No differences in complexity of routines".center(
		PD_STRING_KWARGS["line_width"], "-"
		)
		)
		print(merge.to_string(**PD_STRING_KWARGS))
		SEPARATOR = "_" * PD_STRING_KWARGS["line_width"]
		print(NOCOLOUR + SEPARATOR)
		# find the diff for intersecting routines
		routines_diff = cut.loc[overlaps] - bsl.loc[overlaps]

		table_wmops = pd.concat(
		[
		bsl_wmops.iloc[-1][2:5],
		cut_wmops.iloc[-1][2:5],
		cut_wmops.iloc[-1][2:5] - bsl_wmops.iloc[-1][2:5],
		],
		axis=1,
		)
		table_wmops.columns = ["BSL", "CUT", "CUT - BSL"]
		# retrieve the unique routines for each side
		bsl_unique = bsl[~bsl.index.isin(overlaps)]
		cut_unique = cut[~cut.index.isin(overlaps)]

		table_mem = pd.concat(
		[
		bsl_mem.iloc[:, 1],
		cut_mem.iloc[:, 1],
		cut_mem.iloc[:, 1] - bsl_mem.iloc[:, 1],
		],
		axis=1,
		return bsl_unique, cut_unique, same, diff, routines_diff


		def main(
		bsl,
		cut,
		out_file,
		detailed=False,
		sort_key=None,
		quiet=False,
		dump_bsl=None,
		dump_cut=None,
		):

		if not quiet:
		print(GREEN + f"Baseline conditon: {bsl}" + NOCOLOUR)
		print(RED + f"Condition under test: {cut}" + NOCOLOUR)

		# parse log files to dataframe
		bsl_wmops, bsl_mem = log2df(bsl, True)
		cut_wmops, cut_mem = log2df(cut, True)

		# get wmops and memory diff and concatenate into the summary table
		table_wmops = diff_wmops(bsl_wmops, cut_wmops)
		table_mem = diff_mem(bsl_mem, cut_mem)
		summary_table = pd.concat([table_wmops, table_mem])

		if detailed:
		bsl_unique, cut_unique, same, diff, routines_diff = diff_routines(
		bsl_wmops, cut_wmops
		)
		table_mem.set_index(bsl_mem.iloc[:, 0], inplace=True)
		table_mem.columns = ["BSL", "CUT", "CUT - BSL"]
		if sort_key:
		for df in [bsl_unique, cut_unique, same, diff, routines_diff]:
		df.sort_values([SORT_DICT[sort_key]], inplace=True)

		# write output files
		if out_file:
		summary_table.to_csv(out_file)
		if detailed:
		detailed_output = out_file.with_stem(f"{out_file.stem}_diff_detailed")
		routines_diff.to_csv(detailed_output)

		if dump_bsl:
		w, m = log2df(bsl)
		pd.concat([w, m.T]).set_index("Routine").to_csv(args.dump_bsl)
		if not quiet:
		print(GREEN + f"Wrote BSL data to {args.dump_bsl}" + NOCOLOUR)
		if dump_cut:
		w, m = log2df(cut)
		pd.concat([w, m.T]).set_index("Routine").to_csv(args.dump_cut)
		if not quiet:
		print(RED + f"Wrote CUT data to {args.dump_cut}" + NOCOLOUR)

		table = pd.concat([table_wmops, table_mem])
		# print to CLI
		if not quiet:
		PD_STRING_KWARGS["line_width"] = get_terminal_size()[0]

		def fmt_diff(x):
		if isinstance(x, int):
		fmt = "{}"
		else:
		def fmt_df(x, has_int=False, diff=False):
		x = float(x)
		fmt = "{:.3f}"

		if has_int and x % 1 == 0:
		fmt = "{:.0f}"
		if diff:
		if x > 0:
		return RED + fmt.format(x) + NOCOLOUR
		if x < 0:
		elif x < 0:
		return GREEN + fmt.format(x) + NOCOLOUR
		else:
		return BLUE + fmt.format(x) + NOCOLOUR

		table["CUT - BSL"] = table["CUT - BSL"].apply(fmt_diff)

		if not quiet:
		print()
		print(table.to_string(justify="left"))

		if out_file:
		table.to_csv(out_file)
		elif not quiet:
		print("\nNo output file specified - console output only!")
		else:
		return fmt.format(x)

		def print_df(df, title, has_int=False, diff=False):
		df = df.map(fmt_df, has_int=has_int, diff=diff)
		print(title.center(PD_STRING_KWARGS["line_width"], "-"))
		print(df.to_string(**PD_STRING_KWARGS) + NOCOLOUR)

		if detailed:
		if not same.empty:
		print(BLUE)
		same = same.drop(columns="Source").set_index("Routine")
		print_df(same, "Routines with no differences")
		if not bsl_unique.empty:
		print(GREEN)
		print_df(bsl_unique, "Routines only in BSL")
		if not cut_unique.empty:
		print(RED)
		print_df(cut_unique, "Routines only in CUT")
		if not routines_diff.empty:
		print_df(routines_diff, "Diff of routines", diff=True)

		# summary table
		summary_table["BSL"] = summary_table["BSL"].apply(
		fmt_df,
		has_int=True,
		)
		summary_table["CUT"] = summary_table["CUT"].apply(
		fmt_df,
		has_int=True,
		)
		summary_table["CUT - BSL"] = summary_table["CUT - BSL"].apply(
		fmt_df, has_int=True, diff=True
		)
		print("WMOPs and Memory Summary".center(PD_STRING_KWARGS["line_width"], "-"))
		print(summary_table.to_string(justify="left"))


		if __name__ == "__main__":
		@@ -220,46 +313,61 @@ if __name__ == "__main__":

		parser.add_argument(
		"bsl",
		type=str,
		type=Path,
		help="input logfile for baseline condition",
		)

		parser.add_argument(
		"cut",
		type=str,
		type=Path,
		help="input logfile for condition under test",
		)

		parser.add_argument(
		"-o",
		"--outfile",
		required=False,
		type=str,
		"--output",
		type=Path,
		help="output csv table",
		)

		parser.add_argument(
		"-q",
		"--quiet",
		required=False,
		"-db",
		"--dump_bsl",
		type=Path,
		help="Dump BSL data to specified .csv file",
		)
		parser.add_argument(
		"-dc",
		"--dump_cut",
		type=Path,
		help="Dump CUT data to specified .csv file",
		)
		parser.add_argument(
		"-d",
		"--detailed",
		action="store_true",
		help="no console output",
		default=False,
		help="print detailed info about routines, if used with -o/--output, writes an addtional _detailed.csv file",
		)
		parser.add_argument(
		"-s",
		"--sort",
		choices=SORT_DICT.keys(),
		default=None,
		help="Sort WMOPs data by this column, only affects detailed output (default = %(default)s)",
		)

		parser.add_argument(
		"-v",
		"--verbose",
		required=False,
		"-q",
		"--quiet",
		action="store_true",
		help="print detailed info about routines",
		default=False,
		help="no console output",
		)

		args = parser.parse_args()

		if args.verbose and args.quiet:
		print("Both verbose and quiet options specified, defaulting to verbose")
		args.quiet = False

		main(args.bsl, args.cut, args.outfile, args.quiet, args.verbose)
		main(
		args.bsl,
		args.cut,
		args.output,
		args.detailed,
		args.sort,
		args.quiet,
		args.dump_bsl,
		args.dump_cut,
		)

scripts/parse_complexity_run_logs.py

0 → 100644

+97 −0

Original line number	Diff line number	Diff line
		#!/usr/bin/env python3
		"""
		(C) 2022-2024 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
		Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
		Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
		Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
		contributors to this repository. All Rights Reserved.

		This software is protected by copyright law and by international treaties.
		The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
		Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
		Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
		Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
		contributors to this repository retain full ownership rights in their respective contributions in
		the software. This notice grants no license of any kind, including but not limited to patent
		license, nor is any license granted by implication, estoppel or otherwise.

		Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
		contributions.

		This software is provided "AS IS", without any express or implied warranties. The software is in the
		development stage. It is intended exclusively for experts who have experience with such software and
		solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
		and fitness for a particular purpose are hereby disclaimed and excluded.

		Any dispute, controversy or claim arising under or in relation to providing this software shall be
		submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
		accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
		the United Nations Convention on Contracts on the International Sales of Goods.
		"""

		# place this script along with diff_complexity.py in the root folder of the logs
		from diff_complexity import (
		log2df,
		diff_wmops,
		diff_mem,
		)
		from pathlib import Path
		import pandas as pd
		from tqdm import tqdm
		import numpy as np

		REF_LOGDIR = Path("float_detail_run_21_1_2025/output/logs")
		CUT_LOGDIR = Path("basop_detail_run_21_1_2025/output/logs")

		ref_logfiles = sorted(
		f
		for f in REF_LOGDIR.glob("*.txt")
		if not f.name.endswith("pcm.txt") and "dec" in f.stem
		# and "ltv48_STEREO" in f.stem
		)
		cut_logfiles = sorted(
		f
		for f in CUT_LOGDIR.glob("*.txt")
		if not f.name.endswith("pcm.txt") and "dec" in f.stem
		# and "ltv48_STEREO" in f.stem
		)

		unique = set(f.name for f in ref_logfiles).difference(f.name for f in cut_logfiles)
		if unique:
		raise FileNotFoundError(
		f"One or more files were not found in either directory {unique}"
		)

		records = []
		crashes = []
		for ref, cut in tqdm(zip(ref_logfiles, cut_logfiles), total=len(ref_logfiles)):
		# parse logfiles
		try:
		ref_wmops, ref_mem = log2df(ref)
		except ValueError:
		crashes.append(str(ref))
		continue
		try:
		cut_wmops, cut_mem = log2df(cut)
		except ValueError:
		crashes.append(str(cut))
		continue

		# get the diff for wmops and memory
		wmops = diff_wmops(ref_wmops, cut_wmops)
		mem = diff_mem(ref_mem, cut_mem)

		# only extract the difference column
		diff = pd.DataFrame(pd.concat([wmops, mem])["CUT - BSL"]).T
		diff.rename({"CUT - BSL": "Values"}, inplace=True, axis=1)

		diff.insert(0, "Name", ref.stem)
		records.append(diff)

		df = pd.DataFrame(np.squeeze(records), columns=diff.columns)
		df.set_index("Name", inplace=True)
		df.sort_values("WMOPs max", inplace=True, ascending=False)
		df.to_csv("all_diff.csv", float_format="%.3f")

		with open("crashes.log", "w") as f:
		[print(c, file=f) for c in crashes]