From 52f27a7d64ddd6fc6a1eba295c212ab217cb6ec9 Mon Sep 17 00:00:00 2001
From: Charles Kinuthia <charles.kinuthia@ericsson.com>
Date: Mon, 28 Jul 2025 10:30:38 +0200
Subject: [PATCH 1/2] add script for processing long-term logs

Usage:
 -  python process_long_term_logs.py logs long_term_regression.html
---
 ci/process_long_term_logs.py | 147 +++++++++++++++++++++++++++++++++++
 1 file changed, 147 insertions(+)
 create mode 100644 ci/process_long_term_logs.py
diff --git a/ci/process_long_term_logs.py b/ci/process_long_term_logs.py
new file mode 100644
index 0000000000..9a221ee7ce
--- /dev/null
+++ b/ci/process_long_term_logs.py
@@ -0,0 +1,147 @@
+import os
+import pandas as pd
+import argparse
+import plotly.express as px
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+
+
+def read_csv_files(root_dir):
+    """ Read csv files as dictionary of panda dataframes."""
+    csv_data = {}
+    for subdir, dirs, files in os.walk(root_dir):
+        for file in files:
+            if file.endswith('.csv'):
+                file_path = os.path.join(subdir, file)
+                try:
+                    df = pd.read_csv(file_path)
+                    csv_data[file_path] = df
+                except Exception as e:
+                    print(f'Failed to read {file_path}: {e}')
+                    exit(-1)
+    return csv_data
+
+
+def parse_csv_data(csv_data):
+    """ keep 'testcase', 'format', 'MLD', 'MAX_ABS_DIFF'  and add
+      'date' column."""
+    cols_to_keep = ["testcase", "format", "MLD", "MAX_ABS_DIFF"]
+    parsed_data = {}
+    for key, df in csv_data.items():
+        cols = [
+            col for col in cols_to_keep if col in df.columns]
+        date = os.path.basename(os.path.dirname(key))
+        new_df = df[cols].copy()
+        new_df['date'] = date
+        parsed_data[key] = new_df
+
+    # concatenate all dataframe in the dictionary
+    concat_df = pd.concat(parsed_data.values(), ignore_index=True)
+    return concat_df
+
+
+def plot_data(df, output_filename):
+    """ plot max values for 'MLD' and 'MAX_ABS_DIFF' data and save
+    to html file. """
+    # Convert 'date' to datetime
+    df['date'] = pd.to_datetime(df['date'], errors='coerce')
+    df['MLD'] = pd.to_numeric(df['MLD'], errors='coerce')
+    df['MAX_ABS_DIFF'] = pd.to_numeric(df['MAX_ABS_DIFF'], errors='coerce')
+
+    # Drop rows with NaT and NaN
+    clean_df = df.dropna(subset=['date', 'MLD', 'MAX_ABS_DIFF'])
+
+    # Group by 'format' and 'date' to get rows with max 'MLD' per group
+    max_mld = clean_df.groupby(['format', 'date']).apply(
+        lambda x: x.loc[x['MLD'].idxmax()]).reset_index(drop=True)
+
+    # Group by 'format' and 'date' to get rows with max 'MAX_ABS_DIFF' per
+    # group
+    max_diff = clean_df.groupby(['format', 'date']).apply(
+        lambda x: x.loc[x['MAX_ABS_DIFF'].idxmax()]).reset_index(drop=True)
+
+    formats = sorted(clean_df['format'].unique())
+
+    fig = make_subplots(
+        rows=5, cols=2,
+        specs=[[{"secondary_y": True}]*2]*5,
+        subplot_titles=[f'{i}' for i in formats],
+        shared_xaxes='columns'
+    )
+
+    for i, fmt in enumerate(formats):
+        row = i // 2 + 1
+        col = i % 2 + 1
+
+        data_mld = max_mld[max_mld['format'] == fmt].sort_values('date')
+        data_diff = max_diff[max_diff['format'] == fmt].sort_values('date')
+
+        # Add max 'MLD' to primary y-axis
+        fig.add_trace(
+            go.Scatter(
+                x=data_mld['date'], y=data_mld['MLD'], mode='lines+markers',
+                name=f' {fmt} - Max MLD',
+                hovertext=[f"Testcase: {tc}<br>MAX_ABS_DIFF: {diff}<br>Format:"
+                           f" {format}<br>Date: {date.date()}"
+                           for tc, diff, format, date in zip(
+                               data_mld['testcase'], data_mld['MAX_ABS_DIFF'],
+                               data_mld['format'], data_mld['date'])],
+                hoverinfo='text+y'
+            ),
+            row=row, col=col, secondary_y=False
+        )
+
+        # Add max 'MAX_ABS_DIFF' to secondary y-axis
+        fig.add_trace(
+            go.Scatter(
+                x=data_diff['date'], y=data_diff['MAX_ABS_DIFF'],
+                mode='lines+markers',
+                name=f'{fmt} - Max MAX_ABS_DIFF',
+                hovertext=[f"Testcase: {tc}<br>MLD: {mld:.4f}<br>Format:"
+                           f" {format}<br>Date: {date.date()}"
+                           for tc, mld, format, date in zip(
+                               data_diff['testcase'], data_diff['MLD'],
+                               data_mld['format'], data_diff['date'])],
+                hoverinfo='text+y'
+            ),
+            row=row, col=col, secondary_y=True
+        )
+
+    fig.update_layout(
+        title_text='Long-term regression: max MLD and max MAX_ABS_DIFF',
+        legend=dict(x=1, y=1, orientation='v'),
+        hovermode='x unified'
+    )
+
+    fig.update_xaxes(automargin=True)
+    fig.update_yaxes(automargin=True)
+
+    # Update y-axes titles per subplot
+    for i in range(10):
+        yaxis_num = i*2 + 1
+        yaxis2_num = yaxis_num + 1
+        fig['layout'][f'yaxis{yaxis_num}'].update(
+            title='Max MLD', titlefont=dict(color='blue'),
+            tickfont=dict(color='blue'))
+        fig['layout'][f'yaxis{yaxis2_num}'].update(
+            title='Max MAX_ABS_DIFF', titlefont=dict(color='green'),
+            tickfont=dict(color='green'))
+
+    # Save to html
+    fig.write_html(output_filename)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description='Plot long term logs')
+    parser.add_argument(
+        'root_dir', type=str, help='Root directory containing subdirectories'
+        ' with CSV log files')
+    parser.add_argument(
+        'output_filename', type=str, help='Filename of the generated plot. e.g'
+        '. long_term_regression.html')
+    args = parser.parse_args()
+
+    csv_data = read_csv_files(args.root_dir)
+    data = parse_csv_data(csv_data)
+    plot_data(data, args.output_filename)
-- 
GitLab


From f43dfa50e47e8f879746873c39260263b81c5b9b Mon Sep 17 00:00:00 2001
From: Charles Kinuthia <charles.kinuthia@ericsson.com>
Date: Tue, 5 Aug 2025 08:37:26 +0200
Subject: [PATCH 2/2] format code and add label to hover data

---
 ci/process_long_term_logs.py | 155 +++++++++++++++++++++--------------
 1 file changed, 94 insertions(+), 61 deletions(-)

diff --git a/ci/process_long_term_logs.py b/ci/process_long_term_logs.py
index 9a221ee7ce..baabc7d93a 100644
--- a/ci/process_long_term_logs.py
+++ b/ci/process_long_term_logs.py
@@ -7,32 +7,31 @@ from plotly.subplots import make_subplots
 
 
 def read_csv_files(root_dir):
-    """ Read csv files as dictionary of panda dataframes."""
+    """Read csv files as dictionary of panda dataframes."""
     csv_data = {}
     for subdir, dirs, files in os.walk(root_dir):
         for file in files:
-            if file.endswith('.csv'):
+            if file.endswith(".csv"):
                 file_path = os.path.join(subdir, file)
                 try:
                     df = pd.read_csv(file_path)
                     csv_data[file_path] = df
                 except Exception as e:
-                    print(f'Failed to read {file_path}: {e}')
+                    print(f"Failed to read {file_path}: {e}")
                     exit(-1)
     return csv_data
 
 
 def parse_csv_data(csv_data):
-    """ keep 'testcase', 'format', 'MLD', 'MAX_ABS_DIFF'  and add
-      'date' column."""
+    """keep 'testcase', 'format', 'MLD', 'MAX_ABS_DIFF'  and add
+    'date' column."""
     cols_to_keep = ["testcase", "format", "MLD", "MAX_ABS_DIFF"]
     parsed_data = {}
     for key, df in csv_data.items():
-        cols = [
-            col for col in cols_to_keep if col in df.columns]
+        cols = [col for col in cols_to_keep if col in df.columns]
         date = os.path.basename(os.path.dirname(key))
         new_df = df[cols].copy()
-        new_df['date'] = date
+        new_df["date"] = date
         parsed_data[key] = new_df
 
     # concatenate all dataframe in the dictionary
@@ -41,76 +40,105 @@ def parse_csv_data(csv_data):
 
 
 def plot_data(df, output_filename):
-    """ plot max values for 'MLD' and 'MAX_ABS_DIFF' data and save
-    to html file. """
+    """plot max values for 'MLD' and 'MAX_ABS_DIFF' data and save
+    to html file."""
     # Convert 'date' to datetime
-    df['date'] = pd.to_datetime(df['date'], errors='coerce')
-    df['MLD'] = pd.to_numeric(df['MLD'], errors='coerce')
-    df['MAX_ABS_DIFF'] = pd.to_numeric(df['MAX_ABS_DIFF'], errors='coerce')
+    df["date"] = pd.to_datetime(df["date"], errors="coerce")
+    df["MLD"] = pd.to_numeric(df["MLD"], errors="coerce")
+    df["MAX_ABS_DIFF"] = pd.to_numeric(df["MAX_ABS_DIFF"], errors="coerce")
 
     # Drop rows with NaT and NaN
-    clean_df = df.dropna(subset=['date', 'MLD', 'MAX_ABS_DIFF'])
+    clean_df = df.dropna(subset=["date", "MLD", "MAX_ABS_DIFF"])
 
     # Group by 'format' and 'date' to get rows with max 'MLD' per group
-    max_mld = clean_df.groupby(['format', 'date']).apply(
-        lambda x: x.loc[x['MLD'].idxmax()]).reset_index(drop=True)
+    max_mld = (
+        clean_df.groupby(["format", "date"])
+        .apply(lambda x: x.loc[x["MLD"].idxmax()])
+        .reset_index(drop=True)
+    )
 
     # Group by 'format' and 'date' to get rows with max 'MAX_ABS_DIFF' per
     # group
-    max_diff = clean_df.groupby(['format', 'date']).apply(
-        lambda x: x.loc[x['MAX_ABS_DIFF'].idxmax()]).reset_index(drop=True)
+    max_abs_diff = (
+        clean_df.groupby(["format", "date"])
+        .apply(lambda x: x.loc[x["MAX_ABS_DIFF"].idxmax()])
+        .reset_index(drop=True)
+    )
 
-    formats = sorted(clean_df['format'].unique())
+    formats = sorted(clean_df["format"].unique())
 
     fig = make_subplots(
-        rows=5, cols=2,
-        specs=[[{"secondary_y": True}]*2]*5,
-        subplot_titles=[f'{i}' for i in formats],
-        shared_xaxes='columns'
+        rows=5,
+        cols=2,
+        specs=[[{"secondary_y": True}] * 2] * 5,
+        subplot_titles=[f"{i}" for i in formats],
+        shared_xaxes="columns",
     )
 
     for i, fmt in enumerate(formats):
         row = i // 2 + 1
         col = i % 2 + 1
 
-        data_mld = max_mld[max_mld['format'] == fmt].sort_values('date')
-        data_diff = max_diff[max_diff['format'] == fmt].sort_values('date')
+        data_mld = max_mld[max_mld["format"] == fmt].sort_values("date")
+        data_diff = max_abs_diff[max_abs_diff["format"]
+                                 == fmt].sort_values("date")
 
         # Add max 'MLD' to primary y-axis
         fig.add_trace(
             go.Scatter(
-                x=data_mld['date'], y=data_mld['MLD'], mode='lines+markers',
-                name=f' {fmt} - Max MLD',
-                hovertext=[f"Testcase: {tc}<br>MAX_ABS_DIFF: {diff}<br>Format:"
-                           f" {format}<br>Date: {date.date()}"
-                           for tc, diff, format, date in zip(
-                               data_mld['testcase'], data_mld['MAX_ABS_DIFF'],
-                               data_mld['format'], data_mld['date'])],
-                hoverinfo='text+y'
+                x=data_mld["date"],
+                y=data_mld["MLD"],
+                mode="lines+markers",
+                name=f" {fmt} - Max MLD",
+                hovertext=[
+                    f"Testcase: {tc}<br>MLD: {mld:.4f}<br>MAX_ABS_DIFF:"
+                    f"{abs_diff}<br>Format:"
+                    f" {format}<br>Date: {date.date()}"
+                    for tc, mld, abs_diff, format, date in zip(
+                        data_mld["testcase"],
+                        data_mld["MLD"],
+                        data_mld["MAX_ABS_DIFF"],
+                        data_mld["format"],
+                        data_mld["date"],
+                    )
+                ],
+                hoverinfo="text",
             ),
-            row=row, col=col, secondary_y=False
+            row=row,
+            col=col,
+            secondary_y=False,
         )
 
         # Add max 'MAX_ABS_DIFF' to secondary y-axis
         fig.add_trace(
             go.Scatter(
-                x=data_diff['date'], y=data_diff['MAX_ABS_DIFF'],
-                mode='lines+markers',
-                name=f'{fmt} - Max MAX_ABS_DIFF',
-                hovertext=[f"Testcase: {tc}<br>MLD: {mld:.4f}<br>Format:"
-                           f" {format}<br>Date: {date.date()}"
-                           for tc, mld, format, date in zip(
-                               data_diff['testcase'], data_diff['MLD'],
-                               data_mld['format'], data_diff['date'])],
-                hoverinfo='text+y'
+                x=data_diff["date"],
+                y=data_diff["MAX_ABS_DIFF"],
+                mode="lines+markers",
+                name=f"{fmt} - Max MAX_ABS_DIFF",
+                hovertext=[
+                    f"Testcase: {tc}<br>MLD: {mld:.4f}<br>MAX_ABS_DIFF:"
+                    f" {abs_diff:.4f}<br>Format:"
+                    f" {format}<br>Date: {date.date()}"
+                    for tc, mld, abs_diff, format, date in zip(
+                        data_diff["testcase"],
+                        data_diff["MLD"],
+                        data_diff["MAX_ABS_DIFF"],
+                        data_diff["format"],
+                        data_diff["date"],
+                    )
+                ],
+                hoverinfo="text",
             ),
-            row=row, col=col, secondary_y=True
+            row=row,
+            col=col,
+            secondary_y=True,
         )
 
     fig.update_layout(
-        title_text='Long-term regression: max MLD and max MAX_ABS_DIFF',
-        legend=dict(x=1, y=1, orientation='v'),
-        hovermode='x unified'
+        title_text="Long-term regression: max MLD and max MAX_ABS_DIFF",
+        legend=dict(x=1, y=1, orientation="v"),
+        hovermode="x unified",
     )
 
     fig.update_xaxes(automargin=True)
@@ -118,28 +146,33 @@ def plot_data(df, output_filename):
 
     # Update y-axes titles per subplot
     for i in range(10):
-        yaxis_num = i*2 + 1
+        yaxis_num = i * 2 + 1
         yaxis2_num = yaxis_num + 1
-        fig['layout'][f'yaxis{yaxis_num}'].update(
-            title='Max MLD', titlefont=dict(color='blue'),
-            tickfont=dict(color='blue'))
-        fig['layout'][f'yaxis{yaxis2_num}'].update(
-            title='Max MAX_ABS_DIFF', titlefont=dict(color='green'),
-            tickfont=dict(color='green'))
+        fig["layout"][f"yaxis{yaxis_num}"].update(
+            title="Max MLD", titlefont=dict(color="blue"), tickfont=dict(color="blue")
+        )
+        fig["layout"][f"yaxis{yaxis2_num}"].update(
+            title="Max MAX_ABS_DIFF",
+            titlefont=dict(color="green"),
+            tickfont=dict(color="green"),
+        )
 
     # Save to html
     fig.write_html(output_filename)
 
 
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='Plot long term logs')
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Plot long term logs")
     parser.add_argument(
-        'root_dir', type=str, help='Root directory containing subdirectories'
-        ' with CSV log files')
+        "root_dir",
+        type=str,
+        help="Root directory containing subdirectories" " with CSV log files",
+    )
     parser.add_argument(
-        'output_filename', type=str, help='Filename of the generated plot. e.g'
-        '. long_term_regression.html')
+        "output_filename",
+        type=str,
+        help="Filename of the generated plot. e.g" ". long_term_regression.html",
+    )
     args = parser.parse_args()
 
     csv_data = read_csv_files(args.root_dir)
-- 
GitLab