Files
project_for_kamlesh/examples/gnarly_csv/gnarly_csv_delta.py

298 lines
9.3 KiB
Python
Raw Normal View History

"""
gnarly_csv_delta Silicon subsystem power-delta visualiser
============================================================
Derived from ``drawings/gnarly_csv_files.excalidraw``.
Diagram summary
---------------
Two CSV files representing two chip revisions (rev_A, rev_B) share an
identical schema:
Columns 14 (measured raw data)
meas_current_mA rail current measurement
meas_voltage_mV rail voltage measurement
meas_power_mW directly measured power
meas_temp_C junction temperature
Columns 59 (derived complexity hierarchy, low high)
col 5 rail_power_mW power at the rail for a given subsystem
col 6 subsys_power_mW subsystem-level aggregated power
col 7 cluster_power_mW cluster-level aggregated power
col 8 soc_power_mW full SoC power
col 9 battery_power_mW total battery draw (highest complexity)
Rows silicon subsystem parts (CPU, GPU, DRAM, NPU, ISP, )
Goal
----
Load both CSV files, compute the **delta** (rev_B rev_A) for columns 59
across every subsystem row, then render a grouped bar chart via matplotlib.
Data-flow (as drawn):
rev_a.csv
load & align compute_deltas plot_bar_chart
rev_b.csv
CLI usage:
python gnarly_csv_delta.py data/rev_a.csv data/rev_b.csv \\
[--output delta_chart.png] [--show]
"""
from __future__ import annotations
import argparse
import sys
from dataclasses import dataclass, field
from pathlib import Path
from typing import Sequence
import matplotlib.pyplot as plt
import pandas as pd
# ---------------------------------------------------------------------------
# Constants the 9-column schema from the diagram
# ---------------------------------------------------------------------------
#: Columns 14: raw measured data (not used for delta visualisation).
MEASURED_COLS: list[str] = [
"meas_current_mA",
"meas_voltage_mV",
"meas_power_mW",
"meas_temp_C",
]
#: Columns 59: derived complexity hierarchy these are the delta targets.
DERIVED_COLS: list[str] = [
"rail_power_mW", # col 5
"subsys_power_mW", # col 6
"cluster_power_mW", # col 7
"soc_power_mW", # col 8
"battery_power_mW", # col 9
]
#: The index column that identifies each subsystem row.
SUBSYSTEM_COL: str = "subsystem"
#: All expected columns in the CSV files.
EXPECTED_COLS: list[str] = [SUBSYSTEM_COL] + MEASURED_COLS + DERIVED_COLS
# ---------------------------------------------------------------------------
# Data structures
# ---------------------------------------------------------------------------
@dataclass
class RevisionBundle:
"""A single chip-revision CSV loaded into memory."""
path: Path
label: str # e.g. "rev_A"
data: pd.DataFrame # indexed by SUBSYSTEM_COL after load
@dataclass
class DeltaResult:
"""Holds the per-subsystem, per-derived-column deltas (rev_B rev_A)."""
delta_df: pd.DataFrame # shape: (n_subsystems, len(DERIVED_COLS))
rev_a_label: str
rev_b_label: str
derived_cols: list[str] = field(default_factory=lambda: list(DERIVED_COLS))
# ---------------------------------------------------------------------------
# Pipeline steps
# ---------------------------------------------------------------------------
def load_revision(path: Path, label: str) -> RevisionBundle:
"""Load a single chip-revision CSV and validate its schema.
Parameters
----------
path:
Filesystem path to the CSV file.
label:
Human-readable revision label (e.g. ``"rev_A"``).
Returns
-------
RevisionBundle
The loaded and schema-validated bundle.
Raises
------
ValueError
If any expected column is missing from the CSV.
"""
# TODO: read the CSV with pd.read_csv(path)
# TODO: validate that all EXPECTED_COLS are present; raise ValueError if not
# TODO: set SUBSYSTEM_COL as the DataFrame index
# TODO: return RevisionBundle(path=path, label=label, data=df)
raise NotImplementedError
def align_revisions(
rev_a: RevisionBundle,
rev_b: RevisionBundle,
) -> tuple[pd.DataFrame, pd.DataFrame]:
"""Align both revision DataFrames so they share the same subsystem rows.
The diagram shows both CSVs feeding into a single "load & align" step.
Rows present in one revision but not the other are dropped with a warning.
Parameters
----------
rev_a:
The first (baseline) revision bundle.
rev_b:
The second (comparison) revision bundle.
Returns
-------
tuple[pd.DataFrame, pd.DataFrame]
``(aligned_a, aligned_b)`` DataFrames with identical row indices.
"""
# TODO: find the intersection of subsystem indices from both DataFrames
# TODO: warn (via print/logging) about any rows dropped from either side
# TODO: reindex both DataFrames to the common index
# TODO: return (aligned_a, aligned_b)
raise NotImplementedError
def compute_deltas(
aligned_a: pd.DataFrame,
aligned_b: pd.DataFrame,
rev_a_label: str,
rev_b_label: str,
) -> DeltaResult:
"""Compute per-subsystem deltas for columns 59 (rev_B rev_A).
Parameters
----------
aligned_a:
Aligned baseline DataFrame (rev_A).
aligned_b:
Aligned comparison DataFrame (rev_B).
rev_a_label:
Label string for rev_A (used in the result).
rev_b_label:
Label string for rev_B (used in the result).
Returns
-------
DeltaResult
Populated delta result with a DataFrame of shape
``(n_subsystems, len(DERIVED_COLS))``.
"""
# TODO: subtract aligned_a[DERIVED_COLS] from aligned_b[DERIVED_COLS]
# TODO: store result in a DeltaResult dataclass
# TODO: return the DeltaResult
raise NotImplementedError
def plot_bar_chart(
result: DeltaResult,
output_path: Path | None = None,
show: bool = False,
) -> None:
"""Render a grouped bar chart of the deltas for columns 59.
One group of bars per subsystem row; one bar per derived column (59).
Positive delta = rev_B draws more power; negative = rev_B is more efficient.
Uses plain matplotlib no seaborn, no fancy theming.
Parameters
----------
result:
The populated :class:`DeltaResult`.
output_path:
If provided, save the figure to this path (PNG/SVG/PDF).
show:
If ``True``, call ``plt.show()`` to open an interactive window.
"""
# TODO: create a figure and axes with plt.subplots()
# TODO: compute bar positions for a grouped layout
# (one group per subsystem, one bar per derived column)
# TODO: iterate over DERIVED_COLS and call ax.bar() for each column's deltas
# TODO: add axis labels, title, legend, and a horizontal zero-line
# TODO: if output_path is not None, call fig.savefig(output_path, bbox_inches="tight")
# TODO: if show is True, call plt.show()
# TODO: call plt.close(fig) to free memory
pass
# ---------------------------------------------------------------------------
# CLI entry-point
# ---------------------------------------------------------------------------
def build_parser() -> argparse.ArgumentParser:
"""Construct the :mod:`argparse` parser for the ``gnarly_csv_delta`` CLI."""
parser = argparse.ArgumentParser(
prog="gnarly_csv_delta",
description=(
"Compare two chip-revision CSV files and visualise power deltas "
"(columns 59) as a grouped bar chart."
),
)
parser.add_argument(
"rev_a",
type=Path,
help="Path to the baseline chip-revision CSV (rev_A).",
)
parser.add_argument(
"rev_b",
type=Path,
help="Path to the comparison chip-revision CSV (rev_B).",
)
parser.add_argument(
"--output",
"-o",
type=Path,
default=None,
metavar="FILE",
help=(
"Save the bar chart to this file (e.g. delta_chart.png). "
"Format is inferred from the extension. "
"If omitted, the chart is not saved to disk."
),
)
parser.add_argument(
"--show",
action="store_true",
default=False,
help="Open an interactive matplotlib window after rendering.",
)
return parser
def main(argv: Sequence[str] | None = None) -> None:
"""Parse CLI arguments and run the full delta-visualisation pipeline.
Pipeline
--------
1. :func:`load_revision` × 2
2. :func:`align_revisions`
3. :func:`compute_deltas`
4. :func:`plot_bar_chart`
"""
args = build_parser().parse_args(list(argv) if argv is not None else sys.argv[1:])
# TODO: call load_revision(args.rev_a, label="rev_A")
# TODO: call load_revision(args.rev_b, label="rev_B")
# TODO: call align_revisions(rev_a_bundle, rev_b_bundle)
# TODO: call compute_deltas(aligned_a, aligned_b, ...)
# TODO: call plot_bar_chart(result, output_path=args.output, show=args.show)
pass
if __name__ == "__main__":
main()