project_for_kamlesh/examples/gnarly_csv/gnarly_csv_delta.py

"""
gnarly_csv_delta – Silicon subsystem power-delta visualiser
============================================================
Derived from ``drawings/gnarly_csv_files.excalidraw``.

Diagram summary
---------------
Two CSV files representing two chip revisions (rev_A, rev_B) share an
identical schema:

    Columns 1–4  (measured raw data)
    ─────────────────────────────────
    meas_current_mA   – rail current measurement
    meas_voltage_mV   – rail voltage measurement
    meas_power_mW     – directly measured power
    meas_temp_C       – junction temperature

    Columns 5–9  (derived complexity hierarchy, low → high)
    ────────────────────────────────────────────────────────
    col 5  rail_power_mW     – power at the rail for a given subsystem
    col 6  subsys_power_mW   – subsystem-level aggregated power
    col 7  cluster_power_mW  – cluster-level aggregated power
    col 8  soc_power_mW      – full SoC power
    col 9  battery_power_mW  – total battery draw (highest complexity)

    Rows  – silicon subsystem parts (CPU, GPU, DRAM, NPU, ISP, …)

Goal
----
Load both CSV files, compute the **delta** (rev_B − rev_A) for columns 5–9
across every subsystem row, then render a grouped bar chart via matplotlib.

Data-flow (as drawn):
    rev_a.csv  ──┐
                 ▼
            load & align  ──►  compute_deltas  ──►  plot_bar_chart
                 ▲
    rev_b.csv  ──┘

CLI usage:
    python gnarly_csv_delta.py data/rev_a.csv data/rev_b.csv \\
        [--output delta_chart.png] [--show]
"""

from __future__ import annotations

import argparse
import sys
from dataclasses import dataclass, field
from pathlib import Path
from typing import Sequence

import matplotlib.pyplot as plt
import pandas as pd


# ---------------------------------------------------------------------------
# Constants – the 9-column schema from the diagram
# ---------------------------------------------------------------------------

#: Columns 1–4: raw measured data (not used for delta visualisation).
MEASURED_COLS: list[str] = [
    "meas_current_mA",
    "meas_voltage_mV",
    "meas_power_mW",
    "meas_temp_C",
]

#: Columns 5–9: derived complexity hierarchy – these are the delta targets.
DERIVED_COLS: list[str] = [
    "rail_power_mW",       # col 5
    "subsys_power_mW",     # col 6
    "cluster_power_mW",    # col 7
    "soc_power_mW",        # col 8
    "battery_power_mW",    # col 9
]

#: The index column that identifies each subsystem row.
SUBSYSTEM_COL: str = "subsystem"

#: All expected columns in the CSV files.
EXPECTED_COLS: list[str] = [SUBSYSTEM_COL] + MEASURED_COLS + DERIVED_COLS


# ---------------------------------------------------------------------------
# Data structures
# ---------------------------------------------------------------------------


@dataclass
class RevisionBundle:
    """A single chip-revision CSV loaded into memory."""

    path: Path
    label: str          # e.g. "rev_A"
    data: pd.DataFrame  # indexed by SUBSYSTEM_COL after load


@dataclass
class DeltaResult:
    """Holds the per-subsystem, per-derived-column deltas (rev_B − rev_A)."""

    delta_df: pd.DataFrame          # shape: (n_subsystems, len(DERIVED_COLS))
    rev_a_label: str
    rev_b_label: str
    derived_cols: list[str] = field(default_factory=lambda: list(DERIVED_COLS))


# ---------------------------------------------------------------------------
# Pipeline steps
# ---------------------------------------------------------------------------


def load_revision(path: Path, label: str) -> RevisionBundle:
    """Load a single chip-revision CSV and validate its schema.

    Parameters
    ----------
    path:
        Filesystem path to the CSV file.
    label:
        Human-readable revision label (e.g. ``"rev_A"``).

    Returns
    -------
    RevisionBundle
        The loaded and schema-validated bundle.

    Raises
    ------
    ValueError
        If any expected column is missing from the CSV.
    """
    # TODO: read the CSV with pd.read_csv(path)
    # TODO: validate that all EXPECTED_COLS are present; raise ValueError if not
    # TODO: set SUBSYSTEM_COL as the DataFrame index
    # TODO: return RevisionBundle(path=path, label=label, data=df)
    raise NotImplementedError


def align_revisions(
    rev_a: RevisionBundle,
    rev_b: RevisionBundle,
) -> tuple[pd.DataFrame, pd.DataFrame]:
    """Align both revision DataFrames so they share the same subsystem rows.

    The diagram shows both CSVs feeding into a single "load & align" step.
    Rows present in one revision but not the other are dropped with a warning.

    Parameters
    ----------
    rev_a:
        The first (baseline) revision bundle.
    rev_b:
        The second (comparison) revision bundle.

    Returns
    -------
    tuple[pd.DataFrame, pd.DataFrame]
        ``(aligned_a, aligned_b)`` – DataFrames with identical row indices.
    """
    # TODO: find the intersection of subsystem indices from both DataFrames
    # TODO: warn (via print/logging) about any rows dropped from either side
    # TODO: reindex both DataFrames to the common index
    # TODO: return (aligned_a, aligned_b)
    raise NotImplementedError


def compute_deltas(
    aligned_a: pd.DataFrame,
    aligned_b: pd.DataFrame,
    rev_a_label: str,
    rev_b_label: str,
) -> DeltaResult:
    """Compute per-subsystem deltas for columns 5–9 (rev_B − rev_A).

    Parameters
    ----------
    aligned_a:
        Aligned baseline DataFrame (rev_A).
    aligned_b:
        Aligned comparison DataFrame (rev_B).
    rev_a_label:
        Label string for rev_A (used in the result).
    rev_b_label:
        Label string for rev_B (used in the result).

    Returns
    -------
    DeltaResult
        Populated delta result with a DataFrame of shape
        ``(n_subsystems, len(DERIVED_COLS))``.
    """
    # TODO: subtract aligned_a[DERIVED_COLS] from aligned_b[DERIVED_COLS]
    # TODO: store result in a DeltaResult dataclass
    # TODO: return the DeltaResult
    raise NotImplementedError


def plot_bar_chart(
    result: DeltaResult,
    output_path: Path | None = None,
    show: bool = False,
) -> None:
    """Render a grouped bar chart of the deltas for columns 5–9.

    One group of bars per subsystem row; one bar per derived column (5–9).
    Positive delta = rev_B draws more power; negative = rev_B is more efficient.
    Uses plain matplotlib – no seaborn, no fancy theming.

    Parameters
    ----------
    result:
        The populated :class:`DeltaResult`.
    output_path:
        If provided, save the figure to this path (PNG/SVG/PDF).
    show:
        If ``True``, call ``plt.show()`` to open an interactive window.
    """
    # TODO: create a figure and axes with plt.subplots()
    # TODO: compute bar positions for a grouped layout
    #       (one group per subsystem, one bar per derived column)
    # TODO: iterate over DERIVED_COLS and call ax.bar() for each column's deltas
    # TODO: add axis labels, title, legend, and a horizontal zero-line
    # TODO: if output_path is not None, call fig.savefig(output_path, bbox_inches="tight")
    # TODO: if show is True, call plt.show()
    # TODO: call plt.close(fig) to free memory
    pass


# ---------------------------------------------------------------------------
# CLI entry-point
# ---------------------------------------------------------------------------


def build_parser() -> argparse.ArgumentParser:
    """Construct the :mod:`argparse` parser for the ``gnarly_csv_delta`` CLI."""
    parser = argparse.ArgumentParser(
        prog="gnarly_csv_delta",
        description=(
            "Compare two chip-revision CSV files and visualise power deltas "
            "(columns 5–9) as a grouped bar chart."
        ),
    )
    parser.add_argument(
        "rev_a",
        type=Path,
        help="Path to the baseline chip-revision CSV (rev_A).",
    )
    parser.add_argument(
        "rev_b",
        type=Path,
        help="Path to the comparison chip-revision CSV (rev_B).",
    )
    parser.add_argument(
        "--output",
        "-o",
        type=Path,
        default=None,
        metavar="FILE",
        help=(
            "Save the bar chart to this file (e.g. delta_chart.png). "
            "Format is inferred from the extension. "
            "If omitted, the chart is not saved to disk."
        ),
    )
    parser.add_argument(
        "--show",
        action="store_true",
        default=False,
        help="Open an interactive matplotlib window after rendering.",
    )
    return parser


def main(argv: Sequence[str] | None = None) -> None:
    """Parse CLI arguments and run the full delta-visualisation pipeline.

    Pipeline
    --------
    1. :func:`load_revision` × 2
    2. :func:`align_revisions`
    3. :func:`compute_deltas`
    4. :func:`plot_bar_chart`
    """
    args = build_parser().parse_args(list(argv) if argv is not None else sys.argv[1:])

    # TODO: call load_revision(args.rev_a, label="rev_A")
    # TODO: call load_revision(args.rev_b, label="rev_B")
    # TODO: call align_revisions(rev_a_bundle, rev_b_bundle)
    # TODO: call compute_deltas(aligned_a, aligned_b, ...)
    # TODO: call plot_bar_chart(result, output_path=args.output, show=args.show)
    pass


if __name__ == "__main__":
    main()