From 9ba5d516c5bbfe8dcba2c43e43908dc36a11de39 Mon Sep 17 00:00:00 2001
From: Erik Norvell <erik.norvell@ericsson.com>
Date: Tue, 27 Jan 2026 11:29:40 +0100
Subject: [PATCH 01/10] Fixes for the long term logs

---
 ci/process_long_term_logs.py | 90 +++++++++++++++---------------------
 1 file changed, 37 insertions(+), 53 deletions(-)
diff --git a/ci/process_long_term_logs.py b/ci/process_long_term_logs.py
index baabc7d93a..1c7d1a668f 100644
--- a/ci/process_long_term_logs.py
+++ b/ci/process_long_term_logs.py
@@ -39,7 +39,7 @@ def parse_csv_data(csv_data):
     return concat_df
 
 
-def plot_data(df, output_filename):
+def plot_data(df, output_filename, days):
     """plot max values for 'MLD' and 'MAX_ABS_DIFF' data and save
     to html file."""
     # Convert 'date' to datetime
@@ -47,30 +47,25 @@ def plot_data(df, output_filename):
     df["MLD"] = pd.to_numeric(df["MLD"], errors="coerce")
     df["MAX_ABS_DIFF"] = pd.to_numeric(df["MAX_ABS_DIFF"], errors="coerce")
 
+    # Filter out rows older than "days"
+    cutoff = df["date"].max() - pd.Timedelta(days=days)
+    df = df[df["date"] > cutoff].reset_index(drop=True)
+
     # Drop rows with NaT and NaN
-    clean_df = df.dropna(subset=["date", "MLD", "MAX_ABS_DIFF"])
+    df = df.dropna(subset=["date", "MLD", "MAX_ABS_DIFF"])
 
     # Group by 'format' and 'date' to get rows with max 'MLD' per group
-    max_mld = (
-        clean_df.groupby(["format", "date"])
-        .apply(lambda x: x.loc[x["MLD"].idxmax()])
-        .reset_index(drop=True)
-    )
-
-    # Group by 'format' and 'date' to get rows with max 'MAX_ABS_DIFF' per
-    # group
-    max_abs_diff = (
-        clean_df.groupby(["format", "date"])
-        .apply(lambda x: x.loc[x["MAX_ABS_DIFF"].idxmax()])
-        .reset_index(drop=True)
-    )
+    idx = df.groupby(['format', 'date'])['MLD'].idxmax()
+    max = df.loc[idx].reset_index(drop=True)
+    idx = df.groupby(['format', 'date'])['MLD'].idxmin()
+    min = df.loc[idx].reset_index(drop=True)
+    mean = df.groupby(['format', 'date'])['MLD'].mean().to_frame('mean').reset_index()
 
-    formats = sorted(clean_df["format"].unique())
+    formats = sorted(df["format"].unique())
 
     fig = make_subplots(
         rows=5,
         cols=2,
-        specs=[[{"secondary_y": True}] * 2] * 5,
         subplot_titles=[f"{i}" for i in formats],
         shared_xaxes="columns",
     )
@@ -79,9 +74,7 @@ def plot_data(df, output_filename):
         row = i // 2 + 1
         col = i % 2 + 1
 
-        data_mld = max_mld[max_mld["format"] == fmt].sort_values("date")
-        data_diff = max_abs_diff[max_abs_diff["format"]
-                                 == fmt].sort_values("date")
+        data_mld = max[max["format"] == fmt].sort_values("date")
 
         # Add max 'MLD' to primary y-axis
         fig.add_trace(
@@ -89,54 +82,51 @@ def plot_data(df, output_filename):
                 x=data_mld["date"],
                 y=data_mld["MLD"],
                 mode="lines+markers",
-                name=f" {fmt} - Max MLD",
+                name=f"Max MLD",
                 hovertext=[
-                    f"Testcase: {tc}<br>MLD: {mld:.4f}<br>MAX_ABS_DIFF:"
-                    f"{abs_diff}<br>Format:"
+                    f"Testcase: {tc}<br>MLD: {mld:.4f}"
                     f" {format}<br>Date: {date.date()}"
-                    for tc, mld, abs_diff, format, date in zip(
+                    for tc, mld, format, date in zip(
                         data_mld["testcase"],
                         data_mld["MLD"],
-                        data_mld["MAX_ABS_DIFF"],
                         data_mld["format"],
                         data_mld["date"],
                     )
+
                 ],
                 hoverinfo="text",
             ),
             row=row,
             col=col,
-            secondary_y=False,
         )
 
-        # Add max 'MAX_ABS_DIFF' to secondary y-axis
+        data_mld = mean[mean["format"] == fmt].sort_values("date")
+
+        # Add mean 'MLD' to primary y-axis
         fig.add_trace(
             go.Scatter(
-                x=data_diff["date"],
-                y=data_diff["MAX_ABS_DIFF"],
+                x=data_mld["date"],
+                y=data_mld["mean"],
                 mode="lines+markers",
-                name=f"{fmt} - Max MAX_ABS_DIFF",
+                name=f"Mean MLD",
                 hovertext=[
-                    f"Testcase: {tc}<br>MLD: {mld:.4f}<br>MAX_ABS_DIFF:"
-                    f" {abs_diff:.4f}<br>Format:"
+                    f"Mean MLD: {mld:.4f}"
                     f" {format}<br>Date: {date.date()}"
-                    for tc, mld, abs_diff, format, date in zip(
-                        data_diff["testcase"],
-                        data_diff["MLD"],
-                        data_diff["MAX_ABS_DIFF"],
-                        data_diff["format"],
-                        data_diff["date"],
+                    for mld, format, date in zip(
+                        data_mld["mean"],
+                        data_mld["format"],
+                        data_mld["date"],
                     )
+
                 ],
                 hoverinfo="text",
             ),
             row=row,
             col=col,
-            secondary_y=True,
         )
 
     fig.update_layout(
-        title_text="Long-term regression: max MLD and max MAX_ABS_DIFF",
+        title_text="History: MLD",        
         legend=dict(x=1, y=1, orientation="v"),
         hovermode="x unified",
     )
@@ -144,18 +134,6 @@ def plot_data(df, output_filename):
     fig.update_xaxes(automargin=True)
     fig.update_yaxes(automargin=True)
 
-    # Update y-axes titles per subplot
-    for i in range(10):
-        yaxis_num = i * 2 + 1
-        yaxis2_num = yaxis_num + 1
-        fig["layout"][f"yaxis{yaxis_num}"].update(
-            title="Max MLD", titlefont=dict(color="blue"), tickfont=dict(color="blue")
-        )
-        fig["layout"][f"yaxis{yaxis2_num}"].update(
-            title="Max MAX_ABS_DIFF",
-            titlefont=dict(color="green"),
-            tickfont=dict(color="green"),
-        )
 
     # Save to html
     fig.write_html(output_filename)
@@ -173,8 +151,14 @@ if __name__ == "__main__":
         type=str,
         help="Filename of the generated plot. e.g" ". long_term_regression.html",
     )
+    parser.add_argument(
+        "--days",
+        type=int,
+        help="Number of days in history. Default: 30",
+        default=30,
+    )    
     args = parser.parse_args()
 
     csv_data = read_csv_files(args.root_dir)
     data = parse_csv_data(csv_data)
-    plot_data(data, args.output_filename)
+    plot_data(data, args.output_filename, args.days)
-- 
GitLab


From b52261bc264381e71b2b3a952caab6658d2f48f2 Mon Sep 17 00:00:00 2001
From: Erik Norvell <erik.norvell@ericsson.com>
Date: Tue, 27 Jan 2026 13:45:39 +0100
Subject: [PATCH 02/10] Make measure a command line parameter, to allow
 creating separate html filer for MLD and MAX_ABS_DIFF

---
 ci/process_long_term_logs.py | 45 +++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 18 deletions(-)

diff --git a/ci/process_long_term_logs.py b/ci/process_long_term_logs.py
index 1c7d1a668f..3fcd733dc7 100644
--- a/ci/process_long_term_logs.py
+++ b/ci/process_long_term_logs.py
@@ -39,27 +39,26 @@ def parse_csv_data(csv_data):
     return concat_df
 
 
-def plot_data(df, output_filename, days):
-    """plot max values for 'MLD' and 'MAX_ABS_DIFF' data and save
+def plot_data(df, output_filename, days, measure):
+    """plot max values for measure and data and save
     to html file."""
     # Convert 'date' to datetime
     df["date"] = pd.to_datetime(df["date"], errors="coerce")
-    df["MLD"] = pd.to_numeric(df["MLD"], errors="coerce")
-    df["MAX_ABS_DIFF"] = pd.to_numeric(df["MAX_ABS_DIFF"], errors="coerce")
+    df["MLD"] = pd.to_numeric(df[measure], errors="coerce")
 
     # Filter out rows older than "days"
     cutoff = df["date"].max() - pd.Timedelta(days=days)
     df = df[df["date"] > cutoff].reset_index(drop=True)
 
     # Drop rows with NaT and NaN
-    df = df.dropna(subset=["date", "MLD", "MAX_ABS_DIFF"])
+    df = df.dropna(subset=["date", measure])
 
     # Group by 'format' and 'date' to get rows with max 'MLD' per group
-    idx = df.groupby(['format', 'date'])['MLD'].idxmax()
+    idx = df.groupby(['format', 'date'])[measure].idxmax()
     max = df.loc[idx].reset_index(drop=True)
-    idx = df.groupby(['format', 'date'])['MLD'].idxmin()
+    idx = df.groupby(['format', 'date'])[measure].idxmin()
     min = df.loc[idx].reset_index(drop=True)
-    mean = df.groupby(['format', 'date'])['MLD'].mean().to_frame('mean').reset_index()
+    mean = df.groupby(['format', 'date'])[measure].mean().to_frame('mean').reset_index()
 
     formats = sorted(df["format"].unique())
 
@@ -76,25 +75,27 @@ def plot_data(df, output_filename, days):
 
         data_mld = max[max["format"] == fmt].sort_values("date")
 
-        # Add max 'MLD' to primary y-axis
+        # Add max measure to plots
         fig.add_trace(
             go.Scatter(
                 x=data_mld["date"],
-                y=data_mld["MLD"],
+                y=data_mld[measure],
                 mode="lines+markers",
-                name=f"Max MLD",
+                name=f"Max {measure}",
                 hovertext=[
-                    f"Testcase: {tc}<br>MLD: {mld:.4f}"
+                    f"Testcase: {tc}<br>{measure}: {mld:.4f}"
                     f" {format}<br>Date: {date.date()}"
                     for tc, mld, format, date in zip(
                         data_mld["testcase"],
-                        data_mld["MLD"],
+                        data_mld[measure],
                         data_mld["format"],
                         data_mld["date"],
                     )
 
                 ],
                 hoverinfo="text",
+                marker_color="red",
+                showlegend=(i==0),
             ),
             row=row,
             col=col,
@@ -108,9 +109,9 @@ def plot_data(df, output_filename, days):
                 x=data_mld["date"],
                 y=data_mld["mean"],
                 mode="lines+markers",
-                name=f"Mean MLD",
+                name=f"Mean {measure}",
                 hovertext=[
-                    f"Mean MLD: {mld:.4f}"
+                    f"Mean {measure}: {mld:.4f}"
                     f" {format}<br>Date: {date.date()}"
                     for mld, format, date in zip(
                         data_mld["mean"],
@@ -120,13 +121,15 @@ def plot_data(df, output_filename, days):
 
                 ],
                 hoverinfo="text",
+                marker_color="blue",
+                showlegend=(i==0),
             ),
             row=row,
             col=col,
         )
 
     fig.update_layout(
-        title_text="History: MLD",        
+        title_text=f"History: {measure}",        
         legend=dict(x=1, y=1, orientation="v"),
         hovermode="x unified",
     )
@@ -156,9 +159,15 @@ if __name__ == "__main__":
         type=int,
         help="Number of days in history. Default: 30",
         default=30,
-    )    
+    )
+    parser.add_argument(
+        "--measure",
+        type=str,
+        help="Measure for analysis: MLD, MAX_ABS_DIFF, MIN_ODG, default: MLD",
+        default="MLD",
+    )        
     args = parser.parse_args()
 
     csv_data = read_csv_files(args.root_dir)
     data = parse_csv_data(csv_data)
-    plot_data(data, args.output_filename, args.days)
+    plot_data(data, args.output_filename, args.days, args.measure)
-- 
GitLab


From 29820a98657419869d183a5cc4d7119d9c60d66c Mon Sep 17 00:00:00 2001
From: Erik Norvell <erik.norvell@ericsson.com>
Date: Tue, 27 Jan 2026 14:09:39 +0100
Subject: [PATCH 03/10] Cleanup of text hover message

---
 ci/process_long_term_logs.py | 32 +++++++++++++-------------------
 1 file changed, 13 insertions(+), 19 deletions(-)

diff --git a/ci/process_long_term_logs.py b/ci/process_long_term_logs.py
index 3fcd733dc7..112365693e 100644
--- a/ci/process_long_term_logs.py
+++ b/ci/process_long_term_logs.py
@@ -54,11 +54,11 @@ def plot_data(df, output_filename, days, measure):
     df = df.dropna(subset=["date", measure])
 
     # Group by 'format' and 'date' to get rows with max 'MLD' per group
-    idx = df.groupby(['format', 'date'])[measure].idxmax()
+    idx = df.groupby(["format", "date"])[measure].idxmax()
     max = df.loc[idx].reset_index(drop=True)
-    idx = df.groupby(['format', 'date'])[measure].idxmin()
+    idx = df.groupby(["format", "date"])[measure].idxmin()
     min = df.loc[idx].reset_index(drop=True)
-    mean = df.groupby(['format', 'date'])[measure].mean().to_frame('mean').reset_index()
+    mean = df.groupby(["format", "date"])[measure].mean().to_frame("mean").reset_index()
 
     formats = sorted(df["format"].unique())
 
@@ -83,19 +83,17 @@ def plot_data(df, output_filename, days, measure):
                 mode="lines+markers",
                 name=f"Max {measure}",
                 hovertext=[
-                    f"Testcase: {tc}<br>{measure}: {mld:.4f}"
-                    f" {format}<br>Date: {date.date()}"
-                    for tc, mld, format, date in zip(
+                    f"Testcase: {tc}<br>Max {measure}: {mld:.4f}"
+                    f"<br>Date: {date.date()}"
+                    for tc, mld, date in zip(
                         data_mld["testcase"],
                         data_mld[measure],
-                        data_mld["format"],
                         data_mld["date"],
                     )
-
                 ],
                 hoverinfo="text",
                 marker_color="red",
-                showlegend=(i==0),
+                showlegend=(i == 0),
             ),
             row=row,
             col=col,
@@ -103,7 +101,7 @@ def plot_data(df, output_filename, days, measure):
 
         data_mld = mean[mean["format"] == fmt].sort_values("date")
 
-        # Add mean 'MLD' to primary y-axis
+        # Add mean measure to plots
         fig.add_trace(
             go.Scatter(
                 x=data_mld["date"],
@@ -111,25 +109,22 @@ def plot_data(df, output_filename, days, measure):
                 mode="lines+markers",
                 name=f"Mean {measure}",
                 hovertext=[
-                    f"Mean {measure}: {mld:.4f}"
-                    f" {format}<br>Date: {date.date()}"
-                    for mld, format, date in zip(
+                    f"Mean {measure}: {mld:.4f}" f"<br>Date: {date.date()}"
+                    for mld, date in zip(
                         data_mld["mean"],
-                        data_mld["format"],
                         data_mld["date"],
                     )
-
                 ],
                 hoverinfo="text",
                 marker_color="blue",
-                showlegend=(i==0),
+                showlegend=(i == 0),
             ),
             row=row,
             col=col,
         )
 
     fig.update_layout(
-        title_text=f"History: {measure}",        
+        title_text=f"History: {measure}",
         legend=dict(x=1, y=1, orientation="v"),
         hovermode="x unified",
     )
@@ -137,7 +132,6 @@ def plot_data(df, output_filename, days, measure):
     fig.update_xaxes(automargin=True)
     fig.update_yaxes(automargin=True)
 
-
     # Save to html
     fig.write_html(output_filename)
 
@@ -165,7 +159,7 @@ if __name__ == "__main__":
         type=str,
         help="Measure for analysis: MLD, MAX_ABS_DIFF, MIN_ODG, default: MLD",
         default="MLD",
-    )        
+    )
     args = parser.parse_args()
 
     csv_data = read_csv_files(args.root_dir)
-- 
GitLab


From 4f6b4a0fbf502a6688d74626e3966055c3cf188c Mon Sep 17 00:00:00 2001
From: Erik Norvell <erik.norvell@ericsson.com>
Date: Mon, 2 Feb 2026 16:09:17 +0100
Subject: [PATCH 04/10] Add scripts/find_regressions_from_logs2.py as
 alternative analysis script

---
 scripts/find_regressions_from_logs2.py | 189 +++++++++++++++++++++++++
 1 file changed, 189 insertions(+)
 create mode 100644 scripts/find_regressions_from_logs2.py

diff --git a/scripts/find_regressions_from_logs2.py b/scripts/find_regressions_from_logs2.py
new file mode 100644
index 0000000000..cd07ead015
--- /dev/null
+++ b/scripts/find_regressions_from_logs2.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python3
+
+import argparse
+from pathlib import Path
+import pandas as pd
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+
+def main(logs_dir, output_filename, measure, days, all_results, diff_thr, ratio_thr, curr_value_thr):
+
+    input_path = Path(logs_dir)
+    logs = [f for f in input_path.iterdir() if f.is_dir()]
+
+    # Build dict of scores
+    formatdict = {}
+    sha = {}
+    logdict = {}
+    for log in logs:
+        date = log.name
+        logdict[date] = {}
+        formatdict[date] = {}
+        for logfile in log.glob("*.csv"):
+            tmp = logfile.name.split("-")
+            job = "-".join(tmp[3:-4])
+            sha[date] = tmp[-1].split(".")[0]
+            data = pd.read_csv(logfile, usecols=["testcase", measure, "format"])
+            logdict[date][job] = {}
+            formatdict[date][job] = {}
+
+            for testcase, value, format in zip(
+                data["testcase"], data[measure], data["format"]
+            ):
+                formatdict[date][job][testcase] = format
+                logdict[date][job][testcase] = value
+
+    # Restructure dict
+    csv_rows = []
+    formats = []
+    for date, jobs in logdict.items():
+        for job, testcases in jobs.items():
+            for testcase, value in testcases.items():
+                csv_rows.append((job, testcase, date, value))
+                formats.append((job, testcase, date, formatdict[date][job][testcase]))
+
+    result = pd.DataFrame(csv_rows, columns=["job", "testcase", "date", "value"])
+    result = result.pivot(
+        index=["job", "testcase"], columns="date", values="value"
+    ).reset_index()
+
+    f = pd.DataFrame(formats, columns=["job", "testcase", "date", "format"])
+    f = f.pivot(
+        index=["job", "testcase"], columns="date", values="format"
+    ).reset_index()
+
+    ratio = result.copy()
+    dates = result.iloc[:, 2:].columns
+
+    # Calculate ratios
+    ratio[dates[0]] = 1 # Set first ratio to 1
+    for prevdate, currdate in zip( dates[0:-1], dates[1:]):
+        ratio[currdate] = result[currdate] / result[prevdate]
+
+    values = result.iloc[:, 2:]
+    date = values.columns
+    last_date = date[-1]
+    result.insert(2, "format", f[last_date])
+    ratio.insert(2, "format", f[last_date])
+
+    formats = result['format'].dropna().unique().tolist()
+
+    plotdata = pd.DataFrame(0.0, index=formats, columns=dates[-(days+1):-1])
+    plottext = pd.DataFrame("", index=formats, columns=dates[-(days+1):-1])
+
+    for i in range(days):
+        currdate = dates[-(days-i+1)] # Make robust for shorter history
+        prevdate = dates[-(days-i+2)]
+        idx = ratio.groupby("format")[currdate].nlargest(10).index.get_level_values(1)
+        tmp = result[["job","testcase","format",prevdate,currdate]].iloc[idx,:].copy().reset_index()
+        tmp.insert(3, "prev_date", prevdate)
+        tmp.insert(4, "prev_sha", sha[prevdate])
+        tmp.insert(5, "curr_date", currdate)
+        tmp.insert(6, "curr_sha", sha[prevdate])
+        tmp.insert(7, "diff", tmp[currdate] - tmp[prevdate])
+        tmp.insert(8, "ratio", tmp[currdate] / tmp[prevdate])
+        tmp.loc[tmp[prevdate] == 0, "ratio"] = (
+            1  # Set ratio to 1 for denominator 0
+        )
+        tmp["prev_sha"] = "'" + tmp["prev_sha"] # Add apostrophy to prevent Excel reading this as a number
+        tmp["curr_sha"] = "'" + tmp["curr_sha"] # Add apostrophy to prevent Excel reading this as a number
+
+        csv_filename = f"regressions_{measure}_{currdate}.csv"
+        tmp.to_csv(csv_filename, sep=";", index=False)
+
+        # Store worst case per format for plotting
+        idx = tmp.groupby("format")["ratio"].idxmax()
+        for f in formats:
+            plotdata.loc[f, currdate] = tmp.iloc[idx[f]]["ratio"]
+            plottext.loc[f, currdate] = f"{tmp.iloc[idx[f]]['job']} - {tmp.iloc[idx[f]]['testcase']} - Max {measure} ratio: {tmp.iloc[idx[f]]['ratio']:.2f}"
+
+    fig = make_subplots(
+        rows=5,
+        cols=2,
+        subplot_titles=[f"{i}" for i in formats],
+        shared_xaxes="columns",
+    )
+
+    for i, fmt in enumerate(formats):
+        row = i // 2 + 1
+        col = i % 2 + 1
+
+        fig.add_trace(
+            go.Scatter(
+                x=pd.to_datetime(plotdata.columns),
+                y=plotdata.loc[fmt],
+                mode="lines+markers",
+                name=f"Max {measure}",
+                hovertext=plottext.loc[fmt],
+                hoverinfo="text",
+                showlegend=False,
+            ),
+            row=row,
+            col=col,
+        )
+
+    fig.update_layout(
+        title_text=f"Regression detection: Max {measure} ratio",
+        legend=dict(x=1, y=1, orientation="v"),
+        hovermode="x unified",
+    )
+
+    fig.update_xaxes(automargin=True)
+    fig.update_yaxes(automargin=True)
+
+    # Save to html
+    fig.write_html(output_filename)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="logs dir")
+    parser.add_argument(
+        "logs_dir",
+        type=str,
+        help="Logs dir, e.g. logs",
+    )
+    parser.add_argument(
+        "output_filename",
+        type=str,
+        help="Filename of the combined csv file. e.g mld.csv",
+    )
+    parser.add_argument(
+        "--measure",
+        type=str,
+        help="Measure for summary, one of MLD MIN_SSNR MAX_ABS_DIFF MIN_ODG, (default: MLD)",
+        default="MLD",
+    )
+    parser.add_argument(
+        "--days",
+        type=int,
+        help="Number of days in history, (default: whole history)",
+        default=-1,
+    )
+    parser.add_argument(
+        "--all_results", 
+        action="store_true",
+        help="Output all results, including cases without regression (default: off)",
+        default=False,        
+    )
+    parser.add_argument(
+        "--diff_thr",
+        type=float,
+        help="Include test cases with diff above diff_thr, (default: 0.0)",
+        default=0.0,
+    )
+    parser.add_argument(
+        "--ratio_thr",
+        type=float,
+        help="Include test cases with ratio above ratio_thr, (default: 1.0)",
+        default=1.0,
+    )
+    parser.add_argument(
+        "--curr_value_thr",
+        type=float,
+        help="Include test cases with curr_value above curr_value_thr, (default: 0.0)",
+        default=0.0,
+    )
+
+    args = parser.parse_args()
+
+    main(args.logs_dir, args.output_filename, args.measure, args.days, args.all_results, args.diff_thr, args.ratio_thr, args.curr_value_thr)
+
-- 
GitLab


From d10423218891f16dd76ac46cacd1f8fb7ca2c54e Mon Sep 17 00:00:00 2001
From: Erik Norvell <erik.norvell@ericsson.com>
Date: Thu, 19 Feb 2026 13:22:43 +0100
Subject: [PATCH 05/10] Updates to regression analysis

---
 scripts/find_regressions_from_logs2.py | 52 ++++++++++----------------
 1 file changed, 20 insertions(+), 32 deletions(-)

diff --git a/scripts/find_regressions_from_logs2.py b/scripts/find_regressions_from_logs2.py
index cd07ead015..8de7f0b4ec 100644
--- a/scripts/find_regressions_from_logs2.py
+++ b/scripts/find_regressions_from_logs2.py
@@ -39,63 +39,51 @@ def main(logs_dir, output_filename, measure, days, all_results, diff_thr, ratio_
     for date, jobs in logdict.items():
         for job, testcases in jobs.items():
             for testcase, value in testcases.items():
-                csv_rows.append((job, testcase, date, value))
-                formats.append((job, testcase, date, formatdict[date][job][testcase]))
+                csv_rows.append((job, testcase, formatdict[date][job][testcase], date, value))
 
-    result = pd.DataFrame(csv_rows, columns=["job", "testcase", "date", "value"])
+    result = pd.DataFrame(csv_rows, columns=["job", "testcase", "format", "date", "value"])
     result = result.pivot(
-        index=["job", "testcase"], columns="date", values="value"
+        index=["job", "testcase", "format"], columns="date", values="value"
     ).reset_index()
 
-    f = pd.DataFrame(formats, columns=["job", "testcase", "date", "format"])
-    f = f.pivot(
-        index=["job", "testcase"], columns="date", values="format"
-    ).reset_index()
+    # Keep only tests for which results exist in the last run
+    if days == -1:
+        rng = result.columns[3:] # Whole history
+    else:
+        rng = result.columns[-days:]
+    result = result.dropna(subset=rng)
+    result = result.reset_index(drop=True)
 
     ratio = result.copy()
-    dates = result.iloc[:, 2:].columns
+    ratio = ratio.reset_index()
+    dates = result.iloc[:, 3:].columns
 
     # Calculate ratios
-    ratio[dates[0]] = 1 # Set first ratio to 1
+    ratio[dates[0]] = 1.0 # Set first ratio to 1.0
     for prevdate, currdate in zip( dates[0:-1], dates[1:]):
         ratio[currdate] = result[currdate] / result[prevdate]
 
-    values = result.iloc[:, 2:]
+    values = result.iloc[:, 3:]
     date = values.columns
-    last_date = date[-1]
-    result.insert(2, "format", f[last_date])
-    ratio.insert(2, "format", f[last_date])
 
     formats = result['format'].dropna().unique().tolist()
 
     plotdata = pd.DataFrame(0.0, index=formats, columns=dates[-(days+1):-1])
     plottext = pd.DataFrame("", index=formats, columns=dates[-(days+1):-1])
 
+    all_indices = []
+
     for i in range(days):
         currdate = dates[-(days-i+1)] # Make robust for shorter history
         prevdate = dates[-(days-i+2)]
-        idx = ratio.groupby("format")[currdate].nlargest(10).index.get_level_values(1)
-        tmp = result[["job","testcase","format",prevdate,currdate]].iloc[idx,:].copy().reset_index()
-        tmp.insert(3, "prev_date", prevdate)
-        tmp.insert(4, "prev_sha", sha[prevdate])
-        tmp.insert(5, "curr_date", currdate)
-        tmp.insert(6, "curr_sha", sha[prevdate])
-        tmp.insert(7, "diff", tmp[currdate] - tmp[prevdate])
-        tmp.insert(8, "ratio", tmp[currdate] / tmp[prevdate])
-        tmp.loc[tmp[prevdate] == 0, "ratio"] = (
-            1  # Set ratio to 1 for denominator 0
-        )
-        tmp["prev_sha"] = "'" + tmp["prev_sha"] # Add apostrophy to prevent Excel reading this as a number
-        tmp["curr_sha"] = "'" + tmp["curr_sha"] # Add apostrophy to prevent Excel reading this as a number
 
-        csv_filename = f"regressions_{measure}_{currdate}.csv"
-        tmp.to_csv(csv_filename, sep=";", index=False)
+        idx = ratio.groupby("format")[currdate].idxmax()
+        all_indices.append(idx)
 
         # Store worst case per format for plotting
-        idx = tmp.groupby("format")["ratio"].idxmax()
         for f in formats:
-            plotdata.loc[f, currdate] = tmp.iloc[idx[f]]["ratio"]
-            plottext.loc[f, currdate] = f"{tmp.iloc[idx[f]]['job']} - {tmp.iloc[idx[f]]['testcase']} - Max {measure} ratio: {tmp.iloc[idx[f]]['ratio']:.2f}"
+            plotdata.loc[f, currdate] = ratio.iloc[idx[f]][currdate]
+            plottext.loc[f, currdate] = f"{result.iloc[idx[f]]['job']} - {result.iloc[idx[f]]['testcase']} - Max {measure} ratio: {ratio.iloc[idx[f]][currdate]:.2f}<br>Date: {currdate}"
 
     fig = make_subplots(
         rows=5,
-- 
GitLab


From 13845e017e52d846ceef0ecd8a480e90bbf9cd3f Mon Sep 17 00:00:00 2001
From: Erik Norvell <erik.norvell@ericsson.com>
Date: Thu, 19 Feb 2026 14:54:13 +0100
Subject: [PATCH 06/10] Add filtering options for testcase

---
 ci/process_long_term_logs.py | 46 ++++++++++++++++++++++++++++++++----
 1 file changed, 42 insertions(+), 4 deletions(-)

diff --git a/ci/process_long_term_logs.py b/ci/process_long_term_logs.py
index 112365693e..e5ae8afd5d 100644
--- a/ci/process_long_term_logs.py
+++ b/ci/process_long_term_logs.py
@@ -1,7 +1,9 @@
+#!/usr/bin/env python3
+
 import os
 import pandas as pd
 import argparse
-import plotly.express as px
+import re
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 
@@ -39,9 +41,13 @@ def parse_csv_data(csv_data):
     return concat_df
 
 
-def plot_data(df, output_filename, days, measure):
+def plot_data(df, args):
     """plot max values for measure and data and save
     to html file."""
+
+    measure = args.measure
+    days = args.days
+
     # Convert 'date' to datetime
     df["date"] = pd.to_datetime(df["date"], errors="coerce")
     df["MLD"] = pd.to_numeric(df[measure], errors="coerce")
@@ -53,6 +59,21 @@ def plot_data(df, output_filename, days, measure):
     # Drop rows with NaT and NaN
     df = df.dropna(subset=["date", measure])
 
+    # Filter test cases based on include/reject/match arguments
+    if args.include:
+        mask = pd.Series(False, index=df.index)
+        for tag in args.include:
+            mask |= df["testcase"].str.contains(tag, case=False, na=False)
+        df = df[mask]
+    if args.reject:
+        mask = pd.Series(False, index=df.index)
+        for tag in args.reject:
+            mask |= df["testcase"].str.contains(tag, case=False, na=False)
+        df = df[~mask]
+    if args.match:
+        pattern = re.compile(args.match, re.IGNORECASE)
+        df = df[df["testcase"].str.contains(pattern, na=False)]
+
     # Group by 'format' and 'date' to get rows with max 'MLD' per group
     idx = df.groupby(["format", "date"])[measure].idxmax()
     max = df.loc[idx].reset_index(drop=True)
@@ -133,7 +154,7 @@ def plot_data(df, output_filename, days, measure):
     fig.update_yaxes(automargin=True)
 
     # Save to html
-    fig.write_html(output_filename)
+    fig.write_html(args.output_filename)
 
 
 if __name__ == "__main__":
@@ -160,8 +181,25 @@ if __name__ == "__main__":
         help="Measure for analysis: MLD, MAX_ABS_DIFF, MIN_ODG, default: MLD",
         default="MLD",
     )
+    parser.add_argument(
+        "--include",
+        nargs="+",
+        type=str,
+        help="List of tags to include",
+    )
+    parser.add_argument(
+        "--reject",
+        nargs="+",
+        type=str,
+        help="List of tags to reject",
+    )
+    parser.add_argument(
+        "--match",
+        type=str,
+        help="Regex pattern for selecting tests",
+    )
     args = parser.parse_args()
 
     csv_data = read_csv_files(args.root_dir)
     data = parse_csv_data(csv_data)
-    plot_data(data, args.output_filename, args.days, args.measure)
+    plot_data(data, args)
-- 
GitLab


From 94cbb18ac6eb9231a963b59709582cb80e096bbc Mon Sep 17 00:00:00 2001
From: Erik Norvell <erik.norvell@ericsson.com>
Date: Thu, 19 Feb 2026 15:02:49 +0100
Subject: [PATCH 07/10] Fixes for MIN_ODG and MIN_SSNR

---
 ci/process_long_term_logs.py | 45 ++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 20 deletions(-)

diff --git a/ci/process_long_term_logs.py b/ci/process_long_term_logs.py
index e5ae8afd5d..cce9d1c22b 100644
--- a/ci/process_long_term_logs.py
+++ b/ci/process_long_term_logs.py
@@ -25,9 +25,9 @@ def read_csv_files(root_dir):
 
 
 def parse_csv_data(csv_data):
-    """keep 'testcase', 'format', 'MLD', 'MAX_ABS_DIFF'  and add
+    """keep 'testcase', 'format', 'MLD', 'MAX_ABS_DIFF', 'MIN_ODG', 'MIN_SSNR'  and add
     'date' column."""
-    cols_to_keep = ["testcase", "format", "MLD", "MAX_ABS_DIFF"]
+    cols_to_keep = ["testcase", "format", "MLD", "MAX_ABS_DIFF", "MIN_ODG", "MIN_SSNR"]
     parsed_data = {}
     for key, df in csv_data.items():
         cols = [col for col in cols_to_keep if col in df.columns]
@@ -50,7 +50,7 @@ def plot_data(df, args):
 
     # Convert 'date' to datetime
     df["date"] = pd.to_datetime(df["date"], errors="coerce")
-    df["MLD"] = pd.to_numeric(df[measure], errors="coerce")
+    df[measure] = pd.to_numeric(df[measure], errors="coerce")
 
     # Filter out rows older than "days"
     cutoff = df["date"].max() - pd.Timedelta(days=days)
@@ -94,22 +94,27 @@ def plot_data(df, args):
         row = i // 2 + 1
         col = i % 2 + 1
 
-        data_mld = max[max["format"] == fmt].sort_values("date")
+        if "MIN" in measure:
+            data = min[min["format"] == fmt].sort_values("date")
+            maxmin_str = "Min"
+        else:    
+            data = max[max["format"] == fmt].sort_values("date")
+            maxmin_str = "Max"
 
         # Add max measure to plots
         fig.add_trace(
             go.Scatter(
-                x=data_mld["date"],
-                y=data_mld[measure],
+                x=data["date"],
+                y=data[measure],
                 mode="lines+markers",
-                name=f"Max {measure}",
+                name=f"{maxmin_str} {measure}",
                 hovertext=[
-                    f"Testcase: {tc}<br>Max {measure}: {mld:.4f}"
+                    f"Testcase: {tc}<br>{maxmin_str} {measure}: {value:.4f}"
                     f"<br>Date: {date.date()}"
-                    for tc, mld, date in zip(
-                        data_mld["testcase"],
-                        data_mld[measure],
-                        data_mld["date"],
+                    for tc, value, date in zip(
+                        data["testcase"],
+                        data[measure],
+                        data["date"],
                     )
                 ],
                 hoverinfo="text",
@@ -120,20 +125,20 @@ def plot_data(df, args):
             col=col,
         )
 
-        data_mld = mean[mean["format"] == fmt].sort_values("date")
+        data = mean[mean["format"] == fmt].sort_values("date")
 
         # Add mean measure to plots
         fig.add_trace(
             go.Scatter(
-                x=data_mld["date"],
-                y=data_mld["mean"],
+                x=data["date"],
+                y=data["mean"],
                 mode="lines+markers",
                 name=f"Mean {measure}",
                 hovertext=[
-                    f"Mean {measure}: {mld:.4f}" f"<br>Date: {date.date()}"
-                    for mld, date in zip(
-                        data_mld["mean"],
-                        data_mld["date"],
+                    f"Mean {measure}: {value:.4f}" f"<br>Date: {date.date()}"
+                    for value, date in zip(
+                        data["mean"],
+                        data["date"],
                     )
                 ],
                 hoverinfo="text",
@@ -178,7 +183,7 @@ if __name__ == "__main__":
     parser.add_argument(
         "--measure",
         type=str,
-        help="Measure for analysis: MLD, MAX_ABS_DIFF, MIN_ODG, default: MLD",
+        help="Measure for analysis: MLD, MAX_ABS_DIFF, MIN_ODG, MIN_SSNR, default: MLD",
         default="MLD",
     )
     parser.add_argument(
-- 
GitLab


From 8742c1bbd7cb9c75b2d134eb78ea8f814f9ffa77 Mon Sep 17 00:00:00 2001
From: Erik Norvell <erik.norvell@ericsson.com>
Date: Thu, 19 Feb 2026 15:28:39 +0100
Subject: [PATCH 08/10] Add filtering options for jobs

---
 ci/process_long_term_logs.py | 48 ++++++++++++++++++++++++++++++++----
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/ci/process_long_term_logs.py b/ci/process_long_term_logs.py
index cce9d1c22b..1eec7fff35 100644
--- a/ci/process_long_term_logs.py
+++ b/ci/process_long_term_logs.py
@@ -26,14 +26,17 @@ def read_csv_files(root_dir):
 
 def parse_csv_data(csv_data):
     """keep 'testcase', 'format', 'MLD', 'MAX_ABS_DIFF', 'MIN_ODG', 'MIN_SSNR'  and add
-    'date' column."""
+    'date' and 'job' column."""
     cols_to_keep = ["testcase", "format", "MLD", "MAX_ABS_DIFF", "MIN_ODG", "MIN_SSNR"]
     parsed_data = {}
     for key, df in csv_data.items():
+        tmp = key.split("-")
+        job = "-".join(tmp[4:-4])
         cols = [col for col in cols_to_keep if col in df.columns]
         date = os.path.basename(os.path.dirname(key))
         new_df = df[cols].copy()
         new_df["date"] = date
+        new_df["job"] = job
         parsed_data[key] = new_df
 
     # concatenate all dataframe in the dictionary
@@ -74,6 +77,22 @@ def plot_data(df, args):
         pattern = re.compile(args.match, re.IGNORECASE)
         df = df[df["testcase"].str.contains(pattern, na=False)]
 
+    # Filter jobs based on job-include/job-reject/job-match arguments
+    if args.job_include:
+        mask = pd.Series(False, index=df.index)
+        for tag in args.job_include:
+            mask |= df["job"].str.contains(tag, case=False, na=False)
+        df = df[mask]
+    if args.job_reject:
+        mask = pd.Series(False, index=df.index)
+        for tag in args.job_reject:
+            mask |= df["job"].str.contains(tag, case=False, na=False)
+        df = df[~mask]
+    if args.job_match:
+        pattern = re.compile(args.job_match, re.IGNORECASE)
+        df = df[df["job"].str.contains(pattern, na=False)]
+
+
     # Group by 'format' and 'date' to get rows with max 'MLD' per group
     idx = df.groupby(["format", "date"])[measure].idxmax()
     max = df.loc[idx].reset_index(drop=True)
@@ -110,8 +129,10 @@ def plot_data(df, args):
                 name=f"{maxmin_str} {measure}",
                 hovertext=[
                     f"Testcase: {tc}<br>{maxmin_str} {measure}: {value:.4f}"
+                    f"<br>Job: {job}"
                     f"<br>Date: {date.date()}"
-                    for tc, value, date in zip(
+                    for job, tc, value, date in zip(
+                        data["job"],
                         data["testcase"],
                         data[measure],
                         data["date"],
@@ -190,18 +211,35 @@ if __name__ == "__main__":
         "--include",
         nargs="+",
         type=str,
-        help="List of tags to include",
+        help="List of tags to include in testcases",
     )
     parser.add_argument(
         "--reject",
         nargs="+",
         type=str,
-        help="List of tags to reject",
+        help="List of tags to reject in testcases",
     )
     parser.add_argument(
         "--match",
         type=str,
-        help="Regex pattern for selecting tests",
+        help="Regex pattern for selecting testcases",
+    )
+    parser.add_argument(
+        "--job-include",
+        nargs="+",
+        type=str,
+        help="List of tags to include in jobs",
+    )
+    parser.add_argument(
+        "--job-reject",
+        nargs="+",
+        type=str,
+        help="List of tags to reject in jobs",
+    )
+    parser.add_argument(
+        "--job-match",
+        type=str,
+        help="Regex pattern for selecting jobs",
     )
     args = parser.parse_args()
 
-- 
GitLab


From 268a18d1139ccefe36a8376bdadfe45c508ec922 Mon Sep 17 00:00:00 2001
From: Erik Norvell <erik.norvell@ericsson.com>
Date: Thu, 19 Feb 2026 15:46:31 +0100
Subject: [PATCH 09/10] Changed name of detect_regressions.py

---
 ...ns_from_logs2.py => detect_regressions.py} | 32 +++----------------
 1 file changed, 4 insertions(+), 28 deletions(-)
 rename scripts/{find_regressions_from_logs2.py => detect_regressions.py} (78%)

diff --git a/scripts/find_regressions_from_logs2.py b/scripts/detect_regressions.py
similarity index 78%
rename from scripts/find_regressions_from_logs2.py
rename to scripts/detect_regressions.py
index 8de7f0b4ec..64de5a7a23 100644
--- a/scripts/find_regressions_from_logs2.py
+++ b/scripts/detect_regressions.py
@@ -6,7 +6,7 @@ import pandas as pd
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 
-def main(logs_dir, output_filename, measure, days, all_results, diff_thr, ratio_thr, curr_value_thr):
+def main(logs_dir, output_filename, measure, days):
 
     input_path = Path(logs_dir)
     logs = [f for f in input_path.iterdir() if f.is_dir()]
@@ -83,7 +83,7 @@ def main(logs_dir, output_filename, measure, days, all_results, diff_thr, ratio_
         # Store worst case per format for plotting
         for f in formats:
             plotdata.loc[f, currdate] = ratio.iloc[idx[f]][currdate]
-            plottext.loc[f, currdate] = f"{result.iloc[idx[f]]['job']} - {result.iloc[idx[f]]['testcase']} - Max {measure} ratio: {ratio.iloc[idx[f]][currdate]:.2f}<br>Date: {currdate}"
+            plottext.loc[f, currdate] = f"Job: {result.iloc[idx[f]]['job']}<br>Testcase: {result.iloc[idx[f]]['testcase']} <br>Max {measure} ratio: {ratio.iloc[idx[f]][currdate]:.2f}<br>Date: {currdate}"
 
     fig = make_subplots(
         rows=5,
@@ -132,7 +132,7 @@ if __name__ == "__main__":
     parser.add_argument(
         "output_filename",
         type=str,
-        help="Filename of the combined csv file. e.g mld.csv",
+        help="Output html file. e.g mld.html",
     )
     parser.add_argument(
         "--measure",
@@ -146,32 +146,8 @@ if __name__ == "__main__":
         help="Number of days in history, (default: whole history)",
         default=-1,
     )
-    parser.add_argument(
-        "--all_results", 
-        action="store_true",
-        help="Output all results, including cases without regression (default: off)",
-        default=False,        
-    )
-    parser.add_argument(
-        "--diff_thr",
-        type=float,
-        help="Include test cases with diff above diff_thr, (default: 0.0)",
-        default=0.0,
-    )
-    parser.add_argument(
-        "--ratio_thr",
-        type=float,
-        help="Include test cases with ratio above ratio_thr, (default: 1.0)",
-        default=1.0,
-    )
-    parser.add_argument(
-        "--curr_value_thr",
-        type=float,
-        help="Include test cases with curr_value above curr_value_thr, (default: 0.0)",
-        default=0.0,
-    )
 
     args = parser.parse_args()
 
-    main(args.logs_dir, args.output_filename, args.measure, args.days, args.all_results, args.diff_thr, args.ratio_thr, args.curr_value_thr)
+    main(args.logs_dir, args.output_filename, args.measure, args.days)
 
-- 
GitLab


From 3a4fc5e31ddd08b4536d164c089cd9631fd1e2ed Mon Sep 17 00:00:00 2001
From: Erik Norvell <erik.norvell@ericsson.com>
Date: Thu, 19 Feb 2026 16:54:03 +0100
Subject: [PATCH 10/10] Change mld output file from detect_regressions.py

---
 scripts/detect_regressions.py | 75 +++++++++++++++++++++++++++--------
 1 file changed, 58 insertions(+), 17 deletions(-)

diff --git a/scripts/detect_regressions.py b/scripts/detect_regressions.py
index 64de5a7a23..4aa98941f6 100644
--- a/scripts/detect_regressions.py
+++ b/scripts/detect_regressions.py
@@ -6,7 +6,13 @@ import pandas as pd
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 
-def main(logs_dir, output_filename, measure, days):
+
+def main(args):
+
+    logs_dir = args.logs_dir
+    output_filename = args.output_filename
+    measure = args.measure
+    days = args.days
 
     input_path = Path(logs_dir)
     logs = [f for f in input_path.iterdir() if f.is_dir()]
@@ -39,16 +45,20 @@ def main(logs_dir, output_filename, measure, days):
     for date, jobs in logdict.items():
         for job, testcases in jobs.items():
             for testcase, value in testcases.items():
-                csv_rows.append((job, testcase, formatdict[date][job][testcase], date, value))
+                csv_rows.append(
+                    (job, testcase, formatdict[date][job][testcase], date, value)
+                )
 
-    result = pd.DataFrame(csv_rows, columns=["job", "testcase", "format", "date", "value"])
+    result = pd.DataFrame(
+        csv_rows, columns=["job", "testcase", "format", "date", "value"]
+    )
     result = result.pivot(
         index=["job", "testcase", "format"], columns="date", values="value"
     ).reset_index()
 
-    # Keep only tests for which results exist in the last run
+    # Keep only tests for which results exist in any of the days
     if days == -1:
-        rng = result.columns[3:] # Whole history
+        rng = result.columns[3:]  # Whole history
     else:
         rng = result.columns[-days:]
     result = result.dropna(subset=rng)
@@ -59,31 +69,33 @@ def main(logs_dir, output_filename, measure, days):
     dates = result.iloc[:, 3:].columns
 
     # Calculate ratios
-    ratio[dates[0]] = 1.0 # Set first ratio to 1.0
-    for prevdate, currdate in zip( dates[0:-1], dates[1:]):
+    ratio[dates[0]] = 1.0  # Set first ratio to 1.0
+    for prevdate, currdate in zip(dates[0:-1], dates[1:]):
         ratio[currdate] = result[currdate] / result[prevdate]
 
     values = result.iloc[:, 3:]
     date = values.columns
 
-    formats = result['format'].dropna().unique().tolist()
+    formats = result["format"].dropna().unique().tolist()
 
-    plotdata = pd.DataFrame(0.0, index=formats, columns=dates[-(days+1):-1])
-    plottext = pd.DataFrame("", index=formats, columns=dates[-(days+1):-1])
+    plotdata = pd.DataFrame(0.0, index=formats, columns=dates[-(days) : -1])
+    plottext = pd.DataFrame("", index=formats, columns=dates[-(days) : -1])
 
     all_indices = []
 
     for i in range(days):
-        currdate = dates[-(days-i+1)] # Make robust for shorter history
-        prevdate = dates[-(days-i+2)]
+        currdate = dates[-(days - i)]  # Make robust for shorter history
+        prevdate = dates[-(days - i + 1)]
 
         idx = ratio.groupby("format")[currdate].idxmax()
-        all_indices.append(idx)
+        all_indices.extend(idx.tolist())
 
         # Store worst case per format for plotting
         for f in formats:
             plotdata.loc[f, currdate] = ratio.iloc[idx[f]][currdate]
-            plottext.loc[f, currdate] = f"Job: {result.iloc[idx[f]]['job']}<br>Testcase: {result.iloc[idx[f]]['testcase']} <br>Max {measure} ratio: {ratio.iloc[idx[f]][currdate]:.2f}<br>Date: {currdate}"
+            plottext.loc[f, currdate] = (
+                f"Job: {result.iloc[idx[f]]['job']}<br>Testcase: {result.iloc[idx[f]]['testcase']} <br>Max {measure} ratio: {ratio.iloc[idx[f]][currdate]:.2f}<br>Date: {currdate}"
+            )
 
     fig = make_subplots(
         rows=5,
@@ -122,6 +134,32 @@ def main(logs_dir, output_filename, measure, days):
     # Save to html
     fig.write_html(output_filename)
 
+    # Write CSV-file
+    if args.csv:
+        output = result.iloc[all_indices].copy()
+        cols = ["job","testcase","format"]
+        cols.extend(date[-days:].tolist())
+        output = output.loc[:,cols]
+        values = output.iloc[:, 3:]
+        last_date = values.columns[-1]
+        output.insert(3, "min_date", values.idxmin(axis=1))
+        output.insert(4, "min_sha", output["min_date"].map(sha))
+        output.insert(5, "curr_value", output[last_date])
+        output.insert(6, "min_value", values.min(axis=1))
+        output.insert(7, "diff", output["curr_value"] - output["min_value"])
+        output.insert(8, "ratio", output["curr_value"] / output["min_value"])
+        output.loc[output["min_value"] == 0, "ratio"] = (
+            1  # Set ratio to 1 for denominator 0
+        )
+        output["min_sha"] = (
+            "'" + output["min_sha"]
+        )  # Add apostrophy to prevent Excel reading this as a number
+        output.sort_values(
+            by=["format", "ratio"], ascending=[True, False], inplace=True
+        )
+        output.to_csv(args.csv, sep=";", index=False)
+
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="logs dir")
     parser.add_argument(
@@ -146,8 +184,11 @@ if __name__ == "__main__":
         help="Number of days in history, (default: whole history)",
         default=-1,
     )
+    parser.add_argument(
+        "--csv",
+        type=str,
+        help="CSV output file",
+    )
 
     args = parser.parse_args()
-
-    main(args.logs_dir, args.output_filename, args.measure, args.days)
-
+    main(args)
-- 
GitLab