Unverified Commit 9ba5d516 authored by norvell's avatar norvell
Browse files

Fixes for the long term logs

parent e4dd50d4
Loading
Loading
Loading
Loading
+37 −53
Original line number Diff line number Diff line
@@ -39,7 +39,7 @@ def parse_csv_data(csv_data):
    return concat_df


def plot_data(df, output_filename):
def plot_data(df, output_filename, days):
    """plot max values for 'MLD' and 'MAX_ABS_DIFF' data and save
    to html file."""
    # Convert 'date' to datetime
@@ -47,30 +47,25 @@ def plot_data(df, output_filename):
    df["MLD"] = pd.to_numeric(df["MLD"], errors="coerce")
    df["MAX_ABS_DIFF"] = pd.to_numeric(df["MAX_ABS_DIFF"], errors="coerce")

    # Filter out rows older than "days"
    cutoff = df["date"].max() - pd.Timedelta(days=days)
    df = df[df["date"] > cutoff].reset_index(drop=True)

    # Drop rows with NaT and NaN
    clean_df = df.dropna(subset=["date", "MLD", "MAX_ABS_DIFF"])
    df = df.dropna(subset=["date", "MLD", "MAX_ABS_DIFF"])

    # Group by 'format' and 'date' to get rows with max 'MLD' per group
    max_mld = (
        clean_df.groupby(["format", "date"])
        .apply(lambda x: x.loc[x["MLD"].idxmax()])
        .reset_index(drop=True)
    )

    # Group by 'format' and 'date' to get rows with max 'MAX_ABS_DIFF' per
    # group
    max_abs_diff = (
        clean_df.groupby(["format", "date"])
        .apply(lambda x: x.loc[x["MAX_ABS_DIFF"].idxmax()])
        .reset_index(drop=True)
    )
    idx = df.groupby(['format', 'date'])['MLD'].idxmax()
    max = df.loc[idx].reset_index(drop=True)
    idx = df.groupby(['format', 'date'])['MLD'].idxmin()
    min = df.loc[idx].reset_index(drop=True)
    mean = df.groupby(['format', 'date'])['MLD'].mean().to_frame('mean').reset_index()

    formats = sorted(clean_df["format"].unique())
    formats = sorted(df["format"].unique())

    fig = make_subplots(
        rows=5,
        cols=2,
        specs=[[{"secondary_y": True}] * 2] * 5,
        subplot_titles=[f"{i}" for i in formats],
        shared_xaxes="columns",
    )
@@ -79,9 +74,7 @@ def plot_data(df, output_filename):
        row = i // 2 + 1
        col = i % 2 + 1

        data_mld = max_mld[max_mld["format"] == fmt].sort_values("date")
        data_diff = max_abs_diff[max_abs_diff["format"]
                                 == fmt].sort_values("date")
        data_mld = max[max["format"] == fmt].sort_values("date")

        # Add max 'MLD' to primary y-axis
        fig.add_trace(
@@ -89,54 +82,51 @@ def plot_data(df, output_filename):
                x=data_mld["date"],
                y=data_mld["MLD"],
                mode="lines+markers",
                name=f" {fmt} - Max MLD",
                name=f"Max MLD",
                hovertext=[
                    f"Testcase: {tc}<br>MLD: {mld:.4f}<br>MAX_ABS_DIFF:"
                    f"{abs_diff}<br>Format:"
                    f"Testcase: {tc}<br>MLD: {mld:.4f}"
                    f" {format}<br>Date: {date.date()}"
                    for tc, mld, abs_diff, format, date in zip(
                    for tc, mld, format, date in zip(
                        data_mld["testcase"],
                        data_mld["MLD"],
                        data_mld["MAX_ABS_DIFF"],
                        data_mld["format"],
                        data_mld["date"],
                    )

                ],
                hoverinfo="text",
            ),
            row=row,
            col=col,
            secondary_y=False,
        )

        # Add max 'MAX_ABS_DIFF' to secondary y-axis
        data_mld = mean[mean["format"] == fmt].sort_values("date")

        # Add mean 'MLD' to primary y-axis
        fig.add_trace(
            go.Scatter(
                x=data_diff["date"],
                y=data_diff["MAX_ABS_DIFF"],
                x=data_mld["date"],
                y=data_mld["mean"],
                mode="lines+markers",
                name=f"{fmt} - Max MAX_ABS_DIFF",
                name=f"Mean MLD",
                hovertext=[
                    f"Testcase: {tc}<br>MLD: {mld:.4f}<br>MAX_ABS_DIFF:"
                    f" {abs_diff:.4f}<br>Format:"
                    f"Mean MLD: {mld:.4f}"
                    f" {format}<br>Date: {date.date()}"
                    for tc, mld, abs_diff, format, date in zip(
                        data_diff["testcase"],
                        data_diff["MLD"],
                        data_diff["MAX_ABS_DIFF"],
                        data_diff["format"],
                        data_diff["date"],
                    for mld, format, date in zip(
                        data_mld["mean"],
                        data_mld["format"],
                        data_mld["date"],
                    )

                ],
                hoverinfo="text",
            ),
            row=row,
            col=col,
            secondary_y=True,
        )

    fig.update_layout(
        title_text="Long-term regression: max MLD and max MAX_ABS_DIFF",
        title_text="History: MLD",        
        legend=dict(x=1, y=1, orientation="v"),
        hovermode="x unified",
    )
@@ -144,18 +134,6 @@ def plot_data(df, output_filename):
    fig.update_xaxes(automargin=True)
    fig.update_yaxes(automargin=True)

    # Update y-axes titles per subplot
    for i in range(10):
        yaxis_num = i * 2 + 1
        yaxis2_num = yaxis_num + 1
        fig["layout"][f"yaxis{yaxis_num}"].update(
            title="Max MLD", titlefont=dict(color="blue"), tickfont=dict(color="blue")
        )
        fig["layout"][f"yaxis{yaxis2_num}"].update(
            title="Max MAX_ABS_DIFF",
            titlefont=dict(color="green"),
            tickfont=dict(color="green"),
        )

    # Save to html
    fig.write_html(output_filename)
@@ -173,8 +151,14 @@ if __name__ == "__main__":
        type=str,
        help="Filename of the generated plot. e.g" ". long_term_regression.html",
    )
    parser.add_argument(
        "--days",
        type=int,
        help="Number of days in history. Default: 30",
        default=30,
    )    
    args = parser.parse_args()

    csv_data = read_csv_files(args.root_dir)
    data = parse_csv_data(csv_data)
    plot_data(data, args.output_filename)
    plot_data(data, args.output_filename, args.days)