From 18a3b464f21969cc610f8b4f96f1a6092dedb5c3 Mon Sep 17 00:00:00 2001 From: Vyaas Date: Sat, 11 Apr 2026 15:54:42 -0700 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=89=20Initial=20commit:=20project=20sc?= =?UTF-8?q?affold,=20agent=20modes,=20and=20gnarly=5Fcsv=20example?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add .roomodes with four custom Roo agents: • excalidraw-to-python – diagram → typed Python skeleton • python-coder – skeleton → production implementation (design patterns) • tester – pytest suite writer & runner • orchestrator – coordinates the full excalidraw→code→test→execute pipeline - Add src/csv_grok.py and tests/test_csv_grok.py (CSV diff utility) - Add examples/gnarly_csv/ with gnarly_csv_delta.py and sample data (rev_a/rev_b) - Add drawings/ with design.excalidraw and gnarly_csv_files.excalidraw - Add docs/excalidraw-to-python-agent.md - Add requirements.txt and .gitignore --- .gitignore | 30 + .roomodes | 56 ++ analysis.txt | 8 + data/file1.csv | 6 + data/file2.csv | 6 + docs/excalidraw-to-python-agent.md | 84 +++ drawings/design.excalidraw | 613 ++++++++++++++++++++ drawings/gnarly_csv_files.excalidraw | 713 ++++++++++++++++++++++++ examples/gnarly_csv/data/rev_a.csv | 9 + examples/gnarly_csv/data/rev_b.csv | 9 + examples/gnarly_csv/gnarly_csv_delta.py | 297 ++++++++++ requirements.txt | 1 + src/csv_grok.py | 231 ++++++++ tests/test_csv_grok.py | 304 ++++++++++ 14 files changed, 2367 insertions(+) create mode 100644 .gitignore create mode 100644 .roomodes create mode 100644 analysis.txt create mode 100644 data/file1.csv create mode 100644 data/file2.csv create mode 100644 docs/excalidraw-to-python-agent.md create mode 100644 drawings/design.excalidraw create mode 100644 drawings/gnarly_csv_files.excalidraw create mode 100644 examples/gnarly_csv/data/rev_a.csv create mode 100644 examples/gnarly_csv/data/rev_b.csv create mode 100644 examples/gnarly_csv/gnarly_csv_delta.py create mode 100644 requirements.txt create mode 100644 src/csv_grok.py create mode 100644 tests/test_csv_grok.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e8a2626 --- /dev/null +++ b/.gitignore @@ -0,0 +1,30 @@ +# Python +__pycache__/ +*.py[cod] +*.pyo +*.pyd +*.egg-info/ +dist/ +build/ +*.egg + +# Virtual environment +.venv/ +venv/ +env/ + +# Pytest / coverage +.pytest_cache/ +.coverage +htmlcov/ + +# Editor / OS +.DS_Store +*.swp +*.swo +.idea/ +.vscode/ + +# Secrets / local config +*.env +.env diff --git a/.roomodes b/.roomodes new file mode 100644 index 0000000..7087b8a --- /dev/null +++ b/.roomodes @@ -0,0 +1,56 @@ +{ + "customModes": [ + { + "slug": "excalidraw-to-python", + "name": "🎨 Excalidraw → Python", + "model": "claude-sonnet-4-6", + "roleDefinition": "You are an expert software architect and Python developer. Your speciality is reading Excalidraw `.excalidraw` JSON files, interpreting the diagram's shapes, labels, and arrows to understand the intended software design, and then generating strongly-typed skeletal Python code (using type hints, dataclasses or Pydantic models, and `argparse` or `click` for CLIs where applicable). You never write implementation logic — only well-structured skeletons with docstrings, type annotations, `pass` bodies, and `TODO` comments that guide a developer to fill in the real logic.", + "customInstructions": "When given an Excalidraw file:\n1. Parse the JSON and identify all `rectangle` / `ellipse` / `diamond` shapes and their associated `text` labels (via `containerId` linkage).\n2. Identify all `arrow` elements and their `startBinding` / `endBinding` to understand data-flow direction.\n3. Identify any free-floating `text` elements — these often represent CLI usage examples or notes.\n4. Map the diagram to Python constructs:\n - Each labelled box → a function or class.\n - Arrows → function call chains or data-flow between functions.\n - CLI usage text → an `argparse` / `click` entry-point.\n5. Emit a single `.py` file (or multiple files if the diagram has clear module boundaries) with:\n - Module-level docstring summarising the diagram.\n - `from __future__ import annotations` and all necessary stdlib imports.\n - Strongly-typed function signatures (`def foo(x: pd.DataFrame) -> pd.DataFrame:`).\n - Dataclass or TypedDict definitions for any data structures implied by the diagram.\n - `pass` bodies with `# TODO:` comments.\n - A `if __name__ == '__main__':` block wiring up the CLI.\n6. Always use Python 3.10+ type-hint syntax.\n7. Never invent logic — only scaffold.", + "groups": [ + "read", + "edit", + "command" + ], + "source": "project" + }, + { + "slug": "python-coder", + "name": "🐍 Python Coder", + "model": "claude-sonnet-4-6", + "roleDefinition": "You are a senior Python engineer who specialises in filling in skeletal code produced by a software architect. You are deeply fluent in modern design patterns (Strategy, Factory, Repository, Observer, Dependency Injection, etc.) and Python idioms (dataclasses, Pydantic v2, context managers, generators, async/await, functools, itertools). Given a skeleton `.py` file with `pass` bodies and `# TODO:` comments, you replace every stub with a correct, idiomatic, production-quality implementation. You never change public interfaces (signatures, class names, module structure) unless a bug in the skeleton makes it unavoidable — and you always document such deviations. You write clean, readable code with inline comments explaining non-obvious decisions.", + "customInstructions": "When given a skeletal Python file to implement:\n1. Read the entire file first. Understand the module docstring, all class/function signatures, type annotations, and TODO comments before writing a single line.\n2. Identify which design pattern best fits each component:\n - Classes with interchangeable algorithms → Strategy pattern.\n - Object creation with varying subtypes → Factory or Abstract Factory.\n - Data access / persistence → Repository pattern.\n - Event-driven wiring → Observer / event-bus.\n - Cross-cutting concerns (logging, caching, retries) → Decorator pattern.\n3. Implement each `pass` / `# TODO:` body:\n a. Use the type annotations as a contract — honour them exactly.\n b. Prefer stdlib solutions; reach for third-party libraries only when they are already imported or listed in `requirements.txt`.\n c. Handle error cases explicitly: raise typed exceptions (`ValueError`, `TypeError`, custom exceptions) with descriptive messages.\n d. Add `# NOTE:` inline comments for any non-obvious algorithmic choice.\n4. Do NOT alter:\n - Public function/method signatures.\n - Class names or module-level `__all__`.\n - Existing import statements (you may add new ones).\n5. After implementing, do a self-review pass:\n - Ensure no `pass` or `# TODO:` remains (unless intentionally deferred — mark those `# DEFERRED:`).\n - Ensure all type annotations are satisfied.\n - Ensure the `if __name__ == '__main__':` block is runnable.\n6. Output the complete, updated file — never a diff or partial snippet.", + "groups": [ + "read", + "edit", + "command" + ], + "source": "project" + }, + { + "slug": "tester", + "name": "🧪 Tester", + "model": "claude-sonnet-4-6", + "roleDefinition": "You are a rigorous Python QA engineer embedded in this project. Your sole responsibility is to verify the correctness of every source file under `src/`. You write pytest unit tests, run them, and report results. You never modify source files — only test files under `tests/`. For every computation you test, you verify the result using an independent second method (e.g. compute a mean both via pandas and via manual sum/count) so that the test itself is trustworthy. You treat a green test suite as the only acceptable definition of 'correct'.", + "customInstructions": "When asked to test a module:\n1. Read the source file(s) under `src/` to understand the public API (function signatures, dataclasses, return types).\n2. Read any existing data fixtures under `data/` or `tests/fixtures/`.\n3. Create or update `tests/test_.py` using pytest.\n4. For every function under test:\n a. Write at least one happy-path test.\n b. Write at least one edge-case or error-path test (e.g. missing column raises ValueError).\n c. Where a numeric result is computed, verify it with an independent calculation inside the test body (do NOT just call the function twice — use a different expression).\n5. Install any missing test dependencies (e.g. pytest) into `.venv/` with `.venv/bin/pip install`.\n6. Run the full test suite with `.venv/bin/pytest tests/ -v` and capture output.\n7. If any test fails, diagnose the root cause, fix the test (or report the bug clearly), and re-run until the suite is green.\n8. Report a final summary: number of tests, pass/fail counts, and a one-line verdict.", + "groups": [ + "read", + "edit", + "command" + ], + "source": "project" + }, + { + "slug": "orchestrator", + "name": "🪃 Orchestrator", + "model": "claude-sonnet-4-6", + "roleDefinition": "You are a senior technical project manager and workflow orchestrator. You decompose complex, multi-step software tasks into a sequenced plan and delegate each step to the most appropriate specialist agent. You never write implementation code yourself — you coordinate, review hand-offs, and ensure quality gates are met before advancing to the next stage. You are the single source of truth for task state and progress.", + "customInstructions": "You manage the canonical four-stage pipeline for this project:\n\n**Stage 1 — Excalidraw → Skeleton** (`excalidraw-to-python` mode)\n- Trigger: user provides a `.excalidraw` file or describes a diagram.\n- Action: delegate to `excalidraw-to-python` agent to produce a skeletal `.py` file.\n- Gate: skeleton file exists, parses without syntax errors (`python -m ast `), all public interfaces are typed, every body is `pass` / `# TODO:`.\n\n**Stage 2 — Skeleton → Implementation** (`python-coder` mode)\n- Trigger: skeleton file passes Stage 1 gate.\n- Action: delegate to `python-coder` agent to fill in all method bodies using appropriate design patterns.\n- Gate: no `pass` or `# TODO:` remains; file parses cleanly; all type annotations are satisfied; `if __name__ == '__main__':` block is present and runnable.\n\n**Stage 3 — Implementation → Tests** (`tester` mode)\n- Trigger: implementation file passes Stage 2 gate.\n- Action: delegate to `tester` agent to write and run a full pytest suite.\n- Gate: all tests pass (`pytest` exits 0); at least one happy-path and one error-path test per public function; numeric results verified by independent calculation.\n\n**Stage 4 — Execute & Validate**\n- Trigger: test suite passes Stage 3 gate.\n- Action: run the implemented module end-to-end with representative inputs (from `data/` or `examples/`) and capture stdout/stderr.\n- Gate: process exits 0; output matches expected shape/format described in the original diagram or user spec.\n\n**Orchestration rules:**\n1. Always announce which stage you are entering and which agent you are delegating to.\n2. After each stage, explicitly check the gate criteria before proceeding.\n3. If a gate fails, send the artefact back to the responsible agent with a precise failure description — do not skip ahead.\n4. Maintain a running status table (Stage | Agent | Status | Notes) in your responses.\n5. When all four stages are green, deliver a final summary: files produced, test counts, and the exact command to run the solution.", + "groups": [ + "read", + "edit", + "command" + ], + "source": "project" + } + ] +} diff --git a/analysis.txt b/analysis.txt new file mode 100644 index 0000000..e0ae9c6 --- /dev/null +++ b/analysis.txt @@ -0,0 +1,8 @@ +================================================== +csvGrok – Mean Comparison Report +================================================== +Column analysed : age +Mean (file 1) : 30.0000 +Mean (file 2) : 34.8000 +Difference : -4.8000 (file1 − file2) +================================================== diff --git a/data/file1.csv b/data/file1.csv new file mode 100644 index 0000000..5da1060 --- /dev/null +++ b/data/file1.csv @@ -0,0 +1,6 @@ +name,age,score,salary +Alice,30,88.5,72000 +Bob,25,91.0,65000 +Carol,35,76.3,85000 +Dave,28,83.7,70000 +Eve,32,95.2,90000 diff --git a/data/file2.csv b/data/file2.csv new file mode 100644 index 0000000..5b204b7 --- /dev/null +++ b/data/file2.csv @@ -0,0 +1,6 @@ +name,age,score,salary +Frank,40,70.1,95000 +Grace,22,88.9,58000 +Hank,45,65.4,110000 +Iris,29,79.8,68000 +Jack,38,82.3,88000 diff --git a/docs/excalidraw-to-python-agent.md b/docs/excalidraw-to-python-agent.md new file mode 100644 index 0000000..a7e336d --- /dev/null +++ b/docs/excalidraw-to-python-agent.md @@ -0,0 +1,84 @@ +# 🎨 Excalidraw → Python Agent + +## What is this mode? + +The **Excalidraw → Python** mode (`excalidraw-to-python`) is a custom Roo agent that reads +`.excalidraw` diagram files and translates them into **strongly-typed skeletal Python code**. + +It never writes implementation logic — it produces well-structured scaffolding with type +annotations, docstrings, and `# TODO:` markers so a developer can fill in the real logic. + +--- + +## How to activate it + +1. Open the Roo chat panel in VS Code. +2. Click the mode selector (bottom-left of the chat input). +3. Choose **🎨 Excalidraw → Python**. + +Or, if you are already in another mode, ask Roo: + +> "Switch to Excalidraw → Python mode and parse `drawings/design.excalidraw`." + +--- + +## What the agent does — step by step + +| Step | What the agent looks for | What it produces | +|------|--------------------------|-----------------| +| 1 | `rectangle` / `ellipse` / `diamond` shapes + their text labels | Python functions or classes | +| 2 | `arrow` elements with `startBinding` / `endBinding` | Call-chain / data-flow order | +| 3 | Free-floating `text` elements (CLI usage, notes) | `argparse` entry-point | +| 4 | Frame labels | Module / file boundaries | + +--- + +## Rules the agent follows + +- Uses **Python 3.10+** type-hint syntax (`X | Y`, `list[str]`, etc.). +- Imports `from __future__ import annotations` at the top. +- Every function has a **typed signature** and a **docstring**. +- Bodies contain only `pass` and `# TODO:` comments. +- A `if __name__ == "__main__":` block wires up the CLI. +- Uses `argparse` (stdlib) unless the diagram explicitly mentions another CLI library. + +--- + +## Example + +Given the diagram in `drawings/design.excalidraw` the agent produces `src/csv_grok.py` +(see that file for the full output). + +The diagram encodes: + +``` +CSV File 1 ──┐ + ▼ + Combine (union) ──► Analyse (cross-correlate?) + ▲ +CSV File 2 ──┘ + +CLI: csvGrok file1.csv file2.csv -o analysis.txt +``` + +--- + +## Configuration location + +The mode is defined in **`.roomodes`** at the project root. +That file is the single source of truth for all custom Roo agents in this project. + +```jsonc +// .roomodes +{ + "customModes": [ + { + "slug": "excalidraw-to-python", + "name": "🎨 Excalidraw → Python", + ... + } + ] +} +``` + +To add more agents, append additional objects to the `customModes` array. diff --git a/drawings/design.excalidraw b/drawings/design.excalidraw new file mode 100644 index 0000000..122ef12 --- /dev/null +++ b/drawings/design.excalidraw @@ -0,0 +1,613 @@ +{ + "type": "excalidraw", + "version": 2, + "source": "https://marketplace.visualstudio.com/items?itemName=pomdtr.excalidraw-editor", + "elements": [ + { + "id": "oV2KOq4jYO4sHeSU5R8kh", + "type": "rectangle", + "x": 348.578125, + "y": 163.6484375, + "width": 134.08203125, + "height": 50.02734375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "eIjsV5OBQ7LGHZoAVsUnQ", + "index": "a0", + "roundness": { + "type": 3 + }, + "seed": 397563852, + "version": 146, + "versionNonce": 1693445236, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "KkCbO7TroLR8XAh5tWvgt" + }, + { + "id": "ZCPXQAC_L5Vu14RBmgVO6", + "type": "arrow" + } + ], + "updated": 1775943776933, + "link": null, + "locked": false + }, + { + "id": "KkCbO7TroLR8XAh5tWvgt", + "type": "text", + "x": 368.2491760253906, + "y": 176.162109375, + "width": 94.73992919921875, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "eIjsV5OBQ7LGHZoAVsUnQ", + "index": "a1", + "roundness": null, + "seed": 679962228, + "version": 115, + "versionNonce": 329925836, + "isDeleted": false, + "boundElements": null, + "updated": 1775943776933, + "link": null, + "locked": false, + "text": "CSV File 1", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "oV2KOq4jYO4sHeSU5R8kh", + "originalText": "CSV File 1", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "RXxXHhdoKM3VIEHiNG-zf", + "type": "rectangle", + "x": 513.353515625, + "y": 163.794921875, + "width": 134.08203125, + "height": 50.02734375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "eIjsV5OBQ7LGHZoAVsUnQ", + "index": "a5", + "roundness": { + "type": 3 + }, + "seed": 366181364, + "version": 194, + "versionNonce": 111295988, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "CksPw0e-nSZFhhZCtD5s2" + }, + { + "id": "T-bwLmuOeO20N7BcoiHu5", + "type": "arrow" + } + ], + "updated": 1775943776933, + "link": null, + "locked": false + }, + { + "id": "CksPw0e-nSZFhhZCtD5s2", + "type": "text", + "x": 530.2945709228516, + "y": 176.30859375, + "width": 100.19992065429688, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "eIjsV5OBQ7LGHZoAVsUnQ", + "index": "a6", + "roundness": null, + "seed": 57138548, + "version": 165, + "versionNonce": 1512033100, + "isDeleted": false, + "boundElements": [], + "updated": 1775943776933, + "link": null, + "locked": false, + "text": "CSV File 2", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "RXxXHhdoKM3VIEHiNG-zf", + "originalText": "CSV File 2", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "VKBVsKP110l_g8EKo59iL", + "type": "rectangle", + "x": 410.57421875, + "y": 362.04296875, + "width": 177.49609375, + "height": 44.21875, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "eIjsV5OBQ7LGHZoAVsUnQ", + "index": "a7", + "roundness": { + "type": 3 + }, + "seed": 1125773684, + "version": 45, + "versionNonce": 823025524, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "WFs0Vvba0EV1XcG3qdlwO" + }, + { + "id": "ZCPXQAC_L5Vu14RBmgVO6", + "type": "arrow" + }, + { + "id": "T-bwLmuOeO20N7BcoiHu5", + "type": "arrow" + }, + { + "id": "xGMkWmNHSd7ywC11KpcCu", + "type": "arrow" + } + ], + "updated": 1775943776933, + "link": null, + "locked": false + }, + { + "id": "WFs0Vvba0EV1XcG3qdlwO", + "type": "text", + "x": 424.91233825683594, + "y": 371.65234375, + "width": 148.81985473632812, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "eIjsV5OBQ7LGHZoAVsUnQ", + "index": "a7V", + "roundness": null, + "seed": 463147596, + "version": 21, + "versionNonce": 1965064652, + "isDeleted": false, + "boundElements": null, + "updated": 1775943776933, + "link": null, + "locked": false, + "text": "Combine (union)", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "VKBVsKP110l_g8EKo59iL", + "originalText": "Combine (union)", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "y8V_3VbfnTPnx7-ArwtoN", + "type": "rectangle", + "x": 727.712890625, + "y": 364.5390625, + "width": 282.35156249999994, + "height": 60, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "eIjsV5OBQ7LGHZoAVsUnQ", + "index": "a9", + "roundness": { + "type": 3 + }, + "seed": 1075004620, + "version": 344, + "versionNonce": 197552204, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "7bEUeAY1YUu_ofgrrcd0D" + }, + { + "id": "xGMkWmNHSd7ywC11KpcCu", + "type": "arrow" + } + ], + "updated": 1775943776933, + "link": null, + "locked": false + }, + { + "id": "7bEUeAY1YUu_ofgrrcd0D", + "type": "text", + "x": 742.1788024902344, + "y": 382.0390625, + "width": 253.41973876953125, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "eIjsV5OBQ7LGHZoAVsUnQ", + "index": "aA", + "roundness": null, + "seed": 292080460, + "version": 351, + "versionNonce": 1215042164, + "isDeleted": false, + "boundElements": [], + "updated": 1775943776933, + "link": null, + "locked": false, + "text": "Analyse (cross-correlate?)", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "y8V_3VbfnTPnx7-ArwtoN", + "originalText": "Analyse (cross-correlate?)", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "Pd7IY77wmGYpp4i4d11K8", + "type": "text", + "x": 411.51953125, + "y": 512.0703125, + "width": 472.99932861328125, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "eIjsV5OBQ7LGHZoAVsUnQ", + "index": "aC", + "roundness": null, + "seed": 823786444, + "version": 295, + "versionNonce": 1374387916, + "isDeleted": false, + "boundElements": null, + "updated": 1775943776933, + "link": null, + "locked": false, + "text": "csvGrok file1.csv file2.csv -o analysis.txt", + "fontSize": 20, + "fontFamily": 8, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "csvGrok file1.csv file2.csv -o analysis.txt", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "ZCPXQAC_L5Vu14RBmgVO6", + "type": "arrow", + "x": 422.2883608075379, + "y": 218.55580996812628, + "width": 76.93390481746206, + "height": 138.48715878187372, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "eIjsV5OBQ7LGHZoAVsUnQ", + "index": "aF", + "roundness": null, + "seed": 411677428, + "version": 47, + "versionNonce": 866755572, + "isDeleted": false, + "boundElements": null, + "updated": 1775943776933, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 69.30356503187372 + ], + [ + 76.93390481746206, + 69.30356503187372 + ], + [ + 76.93390481746206, + 138.48715878187372 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "oV2KOq4jYO4sHeSU5R8kh", + "fixedPoint": [ + 0.5497398504509746, + 1.097547228222091 + ], + "focus": 0, + "gap": 0 + }, + "endBinding": { + "elementId": "VKBVsKP110l_g8EKo59iL", + "fixedPoint": [ + 0.4994366073197032, + -0.11307420494699646 + ], + "focus": 0, + "gap": 0 + }, + "startArrowhead": null, + "endArrowhead": "triangle", + "elbowed": true, + "fixedSegments": null, + "startIsSpecial": null, + "endIsSpecial": null + }, + { + "id": "T-bwLmuOeO20N7BcoiHu5", + "type": "arrow", + "x": 569.2312447250728, + "y": 218.50245438424201, + "width": 70.00897910007279, + "height": 138.54051436575799, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "eIjsV5OBQ7LGHZoAVsUnQ", + "index": "aG", + "roundness": null, + "seed": 185453684, + "version": 57, + "versionNonce": 1881785676, + "isDeleted": false, + "boundElements": null, + "updated": 1775943776933, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 69.43016280325799 + ], + [ + -70.00897910007279, + 69.43016280325799 + ], + [ + -70.00897910007279, + 138.54051436575799 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "RXxXHhdoKM3VIEHiNG-zf", + "fixedPoint": [ + 0.41674285942079037, + 1.0935526135992781 + ], + "focus": 0, + "gap": 0 + }, + "endBinding": { + "elementId": "VKBVsKP110l_g8EKo59iL", + "fixedPoint": [ + 0.4994366073197032, + -0.11307420494699646 + ], + "focus": 0, + "gap": 0 + }, + "startArrowhead": null, + "endArrowhead": "triangle", + "elbowed": true, + "fixedSegments": null, + "startIsSpecial": null, + "endIsSpecial": null + }, + { + "id": "xGMkWmNHSd7ywC11KpcCu", + "type": "arrow", + "x": 499.222265625, + "y": 411.26171875, + "width": 369.56640625, + "height": 121.72265625, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "eIjsV5OBQ7LGHZoAVsUnQ", + "index": "aH", + "roundness": null, + "seed": 111612620, + "version": 154, + "versionNonce": 1413023092, + "isDeleted": false, + "boundElements": null, + "updated": 1775943776933, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 35 + ], + [ + 158.66933593750002, + 35 + ], + [ + 158.66933593750002, + -86.72265625 + ], + [ + 369.56640625, + -86.72265625 + ], + [ + 369.56640625, + -51.72265625 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "VKBVsKP110l_g8EKo59iL", + "fixedPoint": [ + 0.4994366073197032, + 1.1130742049469964 + ], + "focus": 0, + "gap": 0 + }, + "endBinding": { + "elementId": "y8V_3VbfnTPnx7-ArwtoN", + "fixedPoint": [ + 0.4996458316039955, + -0.08333333333333333 + ], + "focus": 0, + "gap": 0 + }, + "startArrowhead": null, + "endArrowhead": "triangle", + "elbowed": true, + "fixedSegments": null, + "startIsSpecial": null, + "endIsSpecial": null + }, + { + "id": "eIjsV5OBQ7LGHZoAVsUnQ", + "type": "frame", + "x": 324.8125, + "y": 135.859375, + "width": 714.30859375, + "height": 439.265625, + "angle": 0, + "strokeColor": "#bbb", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 0, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aI", + "roundness": null, + "seed": 1691802100, + "version": 45, + "versionNonce": 526773836, + "isDeleted": false, + "boundElements": null, + "updated": 1775943776614, + "link": null, + "locked": false, + "name": null + } + ], + "appState": { + "gridSize": 20, + "gridStep": 5, + "gridModeEnabled": false, + "viewBackgroundColor": "#ffffff" + }, + "files": {} +} \ No newline at end of file diff --git a/drawings/gnarly_csv_files.excalidraw b/drawings/gnarly_csv_files.excalidraw new file mode 100644 index 0000000..7d74e2c --- /dev/null +++ b/drawings/gnarly_csv_files.excalidraw @@ -0,0 +1,713 @@ +{ + "type": "excalidraw", + "version": 2, + "source": "https://marketplace.visualstudio.com/items?itemName=pomdtr.excalidraw-editor", + "elements": [ + { + "id": "jOTi5onkq_0apG8tE9P_r", + "type": "rectangle", + "x": 407.37841796875, + "y": 113.30700404114185, + "width": 384.9256591796875, + "height": 40.139434814453125, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "BfFEV5MrC7N3ElY9SS2y_", + "index": "a0", + "roundness": { + "type": 3 + }, + "seed": 1041882253, + "version": 312, + "versionNonce": 696001667, + "isDeleted": false, + "boundElements": null, + "updated": 1775946092000, + "link": null, + "locked": false + }, + { + "id": "S0KIoBgWYH4hlFPqWiN5m", + "type": "rectangle", + "x": 164.64163208007812, + "y": 353.93336486816406, + "width": 530.0428466796875, + "height": 40.139434814453125, + "angle": 1.5707963267948957, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "BfFEV5MrC7N3ElY9SS2y_", + "index": "a1", + "roundness": { + "type": 3 + }, + "seed": 1856727181, + "version": 170, + "versionNonce": 1462278445, + "isDeleted": false, + "boundElements": [], + "updated": 1775946092000, + "link": null, + "locked": false + }, + { + "id": "Yz29YwFK4457AJhIxNVWo", + "type": "rectangle", + "x": 207.66936492919922, + "y": 351.6764221191406, + "width": 530.0428466796875, + "height": 40.139434814453125, + "angle": 1.5707963267948957, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "BfFEV5MrC7N3ElY9SS2y_", + "index": "a2", + "roundness": { + "type": 3 + }, + "seed": 821970915, + "version": 156, + "versionNonce": 2792483, + "isDeleted": false, + "boundElements": [], + "updated": 1775946092000, + "link": null, + "locked": false + }, + { + "id": "gURVhAAieFWJTgJvAPGvG", + "type": "rectangle", + "x": 251.62217712402344, + "y": 353.3826904296875, + "width": 530.0428466796875, + "height": 40.139434814453125, + "angle": 1.5707963267948957, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "BfFEV5MrC7N3ElY9SS2y_", + "index": "a3", + "roundness": { + "type": 3 + }, + "seed": 1631587469, + "version": 167, + "versionNonce": 1651960717, + "isDeleted": false, + "boundElements": [], + "updated": 1775946092000, + "link": null, + "locked": false + }, + { + "id": "ZgrFWIayo_RmUCwikjlvn", + "type": "rectangle", + "x": 294.9184341430664, + "y": 354.70648193359375, + "width": 530.0428466796875, + "height": 40.139434814453125, + "angle": 1.5707963267948957, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "BfFEV5MrC7N3ElY9SS2y_", + "index": "a4", + "roundness": { + "type": 3 + }, + "seed": 25557507, + "version": 180, + "versionNonce": 1432952771, + "isDeleted": false, + "boundElements": [], + "updated": 1775946092000, + "link": null, + "locked": false + }, + { + "id": "6nBv-y3o5l-2izqxlzx96", + "type": "rectangle", + "x": 337.8322448730469, + "y": 355.3114013671875, + "width": 530.0428466796875, + "height": 40.139434814453125, + "angle": 1.5707963267948957, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "BfFEV5MrC7N3ElY9SS2y_", + "index": "a5", + "roundness": { + "type": 3 + }, + "seed": 520459373, + "version": 188, + "versionNonce": 1762390509, + "isDeleted": false, + "boundElements": [], + "updated": 1775946092000, + "link": null, + "locked": false + }, + { + "id": "zejZfmv-SE2hAOVN0U6Xu", + "type": "rectangle", + "x": 379.48470306396484, + "y": 357.6036071777344, + "width": 530.0428466796875, + "height": 40.139434814453125, + "angle": 1.5707963267948957, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "BfFEV5MrC7N3ElY9SS2y_", + "index": "a6", + "roundness": { + "type": 3 + }, + "seed": 542791715, + "version": 200, + "versionNonce": 1935028067, + "isDeleted": false, + "boundElements": [], + "updated": 1775946092000, + "link": null, + "locked": false + }, + { + "id": "mhj9RG-hbUCNev1helu3l", + "type": "rectangle", + "x": 423.06581115722656, + "y": 360.55499267578125, + "width": 530.0428466796875, + "height": 40.139434814453125, + "angle": 1.5707963267948957, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "BfFEV5MrC7N3ElY9SS2y_", + "index": "a7", + "roundness": { + "type": 3 + }, + "seed": 351940685, + "version": 203, + "versionNonce": 938552397, + "isDeleted": false, + "boundElements": [], + "updated": 1775946092000, + "link": null, + "locked": false + }, + { + "id": "WTLZ5KBCeDlzg7mRFaIaW", + "type": "rectangle", + "x": 464.4442825317383, + "y": 358.12445068359375, + "width": 530.0428466796875, + "height": 40.139434814453125, + "angle": 1.5707963267948957, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "BfFEV5MrC7N3ElY9SS2y_", + "index": "a8", + "roundness": { + "type": 3 + }, + "seed": 739238467, + "version": 211, + "versionNonce": 1547717379, + "isDeleted": false, + "boundElements": [], + "updated": 1775946092000, + "link": null, + "locked": false + }, + { + "id": "pXZtckdGWl6UMkkEUebfx", + "type": "rectangle", + "x": 505.73858642578125, + "y": 357.6578674316406, + "width": 530.0428466796875, + "height": 40.139434814453125, + "angle": 1.5707963267948957, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "BfFEV5MrC7N3ElY9SS2y_", + "index": "a9", + "roundness": { + "type": 3 + }, + "seed": 1597071405, + "version": 186, + "versionNonce": 1056971437, + "isDeleted": false, + "boundElements": [], + "updated": 1775946092000, + "link": null, + "locked": false + }, + { + "id": "NhYnOCzRic8tKs6NwfgP5", + "type": "text", + "x": 426.60589599609375, + "y": 120.87672144836841, + "width": 8.539993286132812, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "BfFEV5MrC7N3ElY9SS2y_", + "index": "aA", + "roundness": null, + "seed": 266920909, + "version": 9, + "versionNonce": 1151726243, + "isDeleted": false, + "boundElements": null, + "updated": 1775946092000, + "link": null, + "locked": false, + "text": "1", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "1", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "WCJmDQpp5vZ8xj3w8Uw30", + "type": "text", + "x": 465.7305221557617, + "y": 120.87672144836841, + "width": 13.999984741210938, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "BfFEV5MrC7N3ElY9SS2y_", + "index": "aB", + "roundness": null, + "seed": 1946215949, + "version": 30, + "versionNonce": 920796429, + "isDeleted": false, + "boundElements": [], + "updated": 1775946092000, + "link": null, + "locked": false, + "text": "2", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "2", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "7FiQvWVD6pTPOKOMu6hEa", + "type": "text", + "x": 505.11583709716797, + "y": 120.87672144836841, + "width": 12.159988403320312, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "BfFEV5MrC7N3ElY9SS2y_", + "index": "aC", + "roundness": null, + "seed": 1109028291, + "version": 13, + "versionNonce": 1295361603, + "isDeleted": false, + "boundElements": [], + "updated": 1775946092000, + "link": null, + "locked": false, + "text": "3", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "3", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "wEdSLJyKqK9akYx9hl5yk", + "type": "text", + "x": 555.7668685913086, + "y": 120.87672144836841, + "width": 11.699981689453125, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "BfFEV5MrC7N3ElY9SS2y_", + "index": "aD", + "roundness": null, + "seed": 1633037485, + "version": 13, + "versionNonce": 118559597, + "isDeleted": false, + "boundElements": [], + "updated": 1775946092000, + "link": null, + "locked": false, + "text": "4", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "4", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "qUSy-FXt42OEx7fqjpx-B", + "type": "text", + "x": 597.5420761108398, + "y": 120.87672144836841, + "width": 12.3599853515625, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "BfFEV5MrC7N3ElY9SS2y_", + "index": "aE", + "roundness": null, + "seed": 183399395, + "version": 13, + "versionNonce": 2093868515, + "isDeleted": false, + "boundElements": [], + "updated": 1775946092000, + "link": null, + "locked": false, + "text": "5", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "5", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "oEWHrXCJc0y1S7bOmu31l", + "type": "text", + "x": 639.5667953491211, + "y": 120.87672144836841, + "width": 12.79998779296875, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "BfFEV5MrC7N3ElY9SS2y_", + "index": "aF", + "roundness": null, + "seed": 10723469, + "version": 13, + "versionNonce": 1809194445, + "isDeleted": false, + "boundElements": [], + "updated": 1775946092000, + "link": null, + "locked": false, + "text": "6", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "6", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "xb85g5A17BkubzxxpDkxd", + "type": "text", + "x": 675.3957748413086, + "y": 120.87672144836841, + "width": 11.159988403320312, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "BfFEV5MrC7N3ElY9SS2y_", + "index": "aG", + "roundness": null, + "seed": 1138402819, + "version": 13, + "versionNonce": 2055025027, + "isDeleted": false, + "boundElements": [], + "updated": 1775946092000, + "link": null, + "locked": false, + "text": "7", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "7", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "VyBEfGPI8_VWNFQq52B-C", + "type": "text", + "x": 719.6015243530273, + "y": 120.87672144836841, + "width": 12.719985961914062, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "BfFEV5MrC7N3ElY9SS2y_", + "index": "aH", + "roundness": null, + "seed": 1236052077, + "version": 13, + "versionNonce": 1548409901, + "isDeleted": false, + "boundElements": [], + "updated": 1775946092000, + "link": null, + "locked": false, + "text": "8", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "8", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "bynU81UXwHZyfi3AuSC5X", + "type": "text", + "x": 771.0934677124023, + "y": 120.87672144836841, + "width": 12.579986572265625, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "BfFEV5MrC7N3ElY9SS2y_", + "index": "aI", + "roundness": null, + "seed": 2084799523, + "version": 16, + "versionNonce": 125962531, + "isDeleted": false, + "boundElements": [], + "updated": 1775946092000, + "link": null, + "locked": false, + "text": "9", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "9", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "XOStuLkvgQS9_sNgtP6sm", + "type": "text", + "x": 817.0083618164062, + "y": 115.95051397217631, + "width": 768.3558349609376, + "height": 275, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": "BfFEV5MrC7N3ElY9SS2y_", + "index": "aJ", + "roundness": null, + "seed": 420209379, + "version": 858, + "versionNonce": 811126413, + "isDeleted": false, + "boundElements": null, + "updated": 1775946092000, + "link": null, + "locked": false, + "text": "* The use case is in the domain of silicon hardware (purposely abstract)\n* Columns 5 through 9 indicate increasing complexity of hardware\n** 9 --> power consumption and battery\n** 5 --> power consumption at rails for a given subsystem\n* Columns 1 through 4 are measured consumption data\n* Rows refer to different parts of the subsystem\n\n* Given two csv files, we should like to show deltas between the two csv files\n** particularly those between 5-9\n** visualize via bar chart (no-nonsense matplotlib)\n", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "* The use case is in the domain of silicon hardware (purposely abstract)\n* Columns 5 through 9 indicate increasing complexity of hardware\n** 9 --> power consumption and battery\n** 5 --> power consumption at rails for a given subsystem\n* Columns 1 through 4 are measured consumption data\n* Rows refer to different parts of the subsystem\n\n* Given two csv files, we should like to show deltas between the two csv files\n** particularly those between 5-9\n** visualize via bar chart (no-nonsense matplotlib)\n", + "autoResize": false, + "lineHeight": 1.25 + }, + { + "id": "BfFEV5MrC7N3ElY9SS2y_", + "type": "frame", + "x": 312.54710896809894, + "y": 54.54637985759274, + "width": 1298.5478210449219, + "height": 623.5215759277344, + "angle": 0, + "strokeColor": "#bbb", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 0, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aK", + "roundness": null, + "seed": 182100995, + "version": 39, + "versionNonce": 921139917, + "isDeleted": false, + "boundElements": null, + "updated": 1775946091718, + "link": null, + "locked": false, + "name": null + } + ], + "appState": { + "gridSize": 20, + "gridStep": 5, + "gridModeEnabled": false, + "viewBackgroundColor": "#ffffff" + }, + "files": {} +} \ No newline at end of file diff --git a/examples/gnarly_csv/data/rev_a.csv b/examples/gnarly_csv/data/rev_a.csv new file mode 100644 index 0000000..d8766f7 --- /dev/null +++ b/examples/gnarly_csv/data/rev_a.csv @@ -0,0 +1,9 @@ +subsystem,meas_current_mA,meas_voltage_mV,meas_power_mW,meas_temp_C,rail_power_mW,subsys_power_mW,cluster_power_mW,soc_power_mW,battery_power_mW +CPU,850,1050,892.5,72.3,910.0,1820.0,3640.0,5460.0,6200.0 +GPU,1200,950,1140.0,81.5,1160.0,2320.0,4640.0,6960.0,7900.0 +DRAM,430,1200,516.0,55.1,530.0,1060.0,2120.0,3180.0,3600.0 +NPU,620,1100,682.0,68.4,700.0,1400.0,2800.0,4200.0,4750.0 +ISP,310,1050,325.5,49.7,340.0,680.0,1360.0,2040.0,2300.0 +PCIe,180,1800,324.0,44.2,335.0,670.0,1340.0,2010.0,2250.0 +USB,95,1800,171.0,38.6,180.0,360.0,720.0,1080.0,1200.0 +Display,540,1200,648.0,61.8,660.0,1320.0,2640.0,3960.0,4500.0 diff --git a/examples/gnarly_csv/data/rev_b.csv b/examples/gnarly_csv/data/rev_b.csv new file mode 100644 index 0000000..fe5911f --- /dev/null +++ b/examples/gnarly_csv/data/rev_b.csv @@ -0,0 +1,9 @@ +subsystem,meas_current_mA,meas_voltage_mV,meas_power_mW,meas_temp_C,rail_power_mW,subsys_power_mW,cluster_power_mW,soc_power_mW,battery_power_mW +CPU,790,1050,829.5,69.1,845.0,1690.0,3380.0,5070.0,5750.0 +GPU,1380,950,1311.0,88.2,1335.0,2670.0,5340.0,8010.0,9100.0 +DRAM,410,1200,492.0,53.4,505.0,1010.0,2020.0,3030.0,3430.0 +NPU,710,1100,781.0,74.9,800.0,1600.0,3200.0,4800.0,5450.0 +ISP,295,1050,309.75,47.3,322.0,644.0,1288.0,1932.0,2180.0 +PCIe,175,1800,315.0,43.0,326.0,652.0,1304.0,1956.0,2190.0 +USB,88,1800,158.4,37.1,166.0,332.0,664.0,996.0,1110.0 +Display,610,1200,732.0,66.5,748.0,1496.0,2992.0,4488.0,5100.0 diff --git a/examples/gnarly_csv/gnarly_csv_delta.py b/examples/gnarly_csv/gnarly_csv_delta.py new file mode 100644 index 0000000..606df41 --- /dev/null +++ b/examples/gnarly_csv/gnarly_csv_delta.py @@ -0,0 +1,297 @@ +""" +gnarly_csv_delta – Silicon subsystem power-delta visualiser +============================================================ +Derived from ``drawings/gnarly_csv_files.excalidraw``. + +Diagram summary +--------------- +Two CSV files representing two chip revisions (rev_A, rev_B) share an +identical schema: + + Columns 1–4 (measured raw data) + ───────────────────────────────── + meas_current_mA – rail current measurement + meas_voltage_mV – rail voltage measurement + meas_power_mW – directly measured power + meas_temp_C – junction temperature + + Columns 5–9 (derived complexity hierarchy, low → high) + ──────────────────────────────────────────────────────── + col 5 rail_power_mW – power at the rail for a given subsystem + col 6 subsys_power_mW – subsystem-level aggregated power + col 7 cluster_power_mW – cluster-level aggregated power + col 8 soc_power_mW – full SoC power + col 9 battery_power_mW – total battery draw (highest complexity) + + Rows – silicon subsystem parts (CPU, GPU, DRAM, NPU, ISP, …) + +Goal +---- +Load both CSV files, compute the **delta** (rev_B − rev_A) for columns 5–9 +across every subsystem row, then render a grouped bar chart via matplotlib. + +Data-flow (as drawn): + rev_a.csv ──┐ + ▼ + load & align ──► compute_deltas ──► plot_bar_chart + ▲ + rev_b.csv ──┘ + +CLI usage: + python gnarly_csv_delta.py data/rev_a.csv data/rev_b.csv \\ + [--output delta_chart.png] [--show] +""" + +from __future__ import annotations + +import argparse +import sys +from dataclasses import dataclass, field +from pathlib import Path +from typing import Sequence + +import matplotlib.pyplot as plt +import pandas as pd + + +# --------------------------------------------------------------------------- +# Constants – the 9-column schema from the diagram +# --------------------------------------------------------------------------- + +#: Columns 1–4: raw measured data (not used for delta visualisation). +MEASURED_COLS: list[str] = [ + "meas_current_mA", + "meas_voltage_mV", + "meas_power_mW", + "meas_temp_C", +] + +#: Columns 5–9: derived complexity hierarchy – these are the delta targets. +DERIVED_COLS: list[str] = [ + "rail_power_mW", # col 5 + "subsys_power_mW", # col 6 + "cluster_power_mW", # col 7 + "soc_power_mW", # col 8 + "battery_power_mW", # col 9 +] + +#: The index column that identifies each subsystem row. +SUBSYSTEM_COL: str = "subsystem" + +#: All expected columns in the CSV files. +EXPECTED_COLS: list[str] = [SUBSYSTEM_COL] + MEASURED_COLS + DERIVED_COLS + + +# --------------------------------------------------------------------------- +# Data structures +# --------------------------------------------------------------------------- + + +@dataclass +class RevisionBundle: + """A single chip-revision CSV loaded into memory.""" + + path: Path + label: str # e.g. "rev_A" + data: pd.DataFrame # indexed by SUBSYSTEM_COL after load + + +@dataclass +class DeltaResult: + """Holds the per-subsystem, per-derived-column deltas (rev_B − rev_A).""" + + delta_df: pd.DataFrame # shape: (n_subsystems, len(DERIVED_COLS)) + rev_a_label: str + rev_b_label: str + derived_cols: list[str] = field(default_factory=lambda: list(DERIVED_COLS)) + + +# --------------------------------------------------------------------------- +# Pipeline steps +# --------------------------------------------------------------------------- + + +def load_revision(path: Path, label: str) -> RevisionBundle: + """Load a single chip-revision CSV and validate its schema. + + Parameters + ---------- + path: + Filesystem path to the CSV file. + label: + Human-readable revision label (e.g. ``"rev_A"``). + + Returns + ------- + RevisionBundle + The loaded and schema-validated bundle. + + Raises + ------ + ValueError + If any expected column is missing from the CSV. + """ + # TODO: read the CSV with pd.read_csv(path) + # TODO: validate that all EXPECTED_COLS are present; raise ValueError if not + # TODO: set SUBSYSTEM_COL as the DataFrame index + # TODO: return RevisionBundle(path=path, label=label, data=df) + raise NotImplementedError + + +def align_revisions( + rev_a: RevisionBundle, + rev_b: RevisionBundle, +) -> tuple[pd.DataFrame, pd.DataFrame]: + """Align both revision DataFrames so they share the same subsystem rows. + + The diagram shows both CSVs feeding into a single "load & align" step. + Rows present in one revision but not the other are dropped with a warning. + + Parameters + ---------- + rev_a: + The first (baseline) revision bundle. + rev_b: + The second (comparison) revision bundle. + + Returns + ------- + tuple[pd.DataFrame, pd.DataFrame] + ``(aligned_a, aligned_b)`` – DataFrames with identical row indices. + """ + # TODO: find the intersection of subsystem indices from both DataFrames + # TODO: warn (via print/logging) about any rows dropped from either side + # TODO: reindex both DataFrames to the common index + # TODO: return (aligned_a, aligned_b) + raise NotImplementedError + + +def compute_deltas( + aligned_a: pd.DataFrame, + aligned_b: pd.DataFrame, + rev_a_label: str, + rev_b_label: str, +) -> DeltaResult: + """Compute per-subsystem deltas for columns 5–9 (rev_B − rev_A). + + Parameters + ---------- + aligned_a: + Aligned baseline DataFrame (rev_A). + aligned_b: + Aligned comparison DataFrame (rev_B). + rev_a_label: + Label string for rev_A (used in the result). + rev_b_label: + Label string for rev_B (used in the result). + + Returns + ------- + DeltaResult + Populated delta result with a DataFrame of shape + ``(n_subsystems, len(DERIVED_COLS))``. + """ + # TODO: subtract aligned_a[DERIVED_COLS] from aligned_b[DERIVED_COLS] + # TODO: store result in a DeltaResult dataclass + # TODO: return the DeltaResult + raise NotImplementedError + + +def plot_bar_chart( + result: DeltaResult, + output_path: Path | None = None, + show: bool = False, +) -> None: + """Render a grouped bar chart of the deltas for columns 5–9. + + One group of bars per subsystem row; one bar per derived column (5–9). + Positive delta = rev_B draws more power; negative = rev_B is more efficient. + Uses plain matplotlib – no seaborn, no fancy theming. + + Parameters + ---------- + result: + The populated :class:`DeltaResult`. + output_path: + If provided, save the figure to this path (PNG/SVG/PDF). + show: + If ``True``, call ``plt.show()`` to open an interactive window. + """ + # TODO: create a figure and axes with plt.subplots() + # TODO: compute bar positions for a grouped layout + # (one group per subsystem, one bar per derived column) + # TODO: iterate over DERIVED_COLS and call ax.bar() for each column's deltas + # TODO: add axis labels, title, legend, and a horizontal zero-line + # TODO: if output_path is not None, call fig.savefig(output_path, bbox_inches="tight") + # TODO: if show is True, call plt.show() + # TODO: call plt.close(fig) to free memory + pass + + +# --------------------------------------------------------------------------- +# CLI entry-point +# --------------------------------------------------------------------------- + + +def build_parser() -> argparse.ArgumentParser: + """Construct the :mod:`argparse` parser for the ``gnarly_csv_delta`` CLI.""" + parser = argparse.ArgumentParser( + prog="gnarly_csv_delta", + description=( + "Compare two chip-revision CSV files and visualise power deltas " + "(columns 5–9) as a grouped bar chart." + ), + ) + parser.add_argument( + "rev_a", + type=Path, + help="Path to the baseline chip-revision CSV (rev_A).", + ) + parser.add_argument( + "rev_b", + type=Path, + help="Path to the comparison chip-revision CSV (rev_B).", + ) + parser.add_argument( + "--output", + "-o", + type=Path, + default=None, + metavar="FILE", + help=( + "Save the bar chart to this file (e.g. delta_chart.png). " + "Format is inferred from the extension. " + "If omitted, the chart is not saved to disk." + ), + ) + parser.add_argument( + "--show", + action="store_true", + default=False, + help="Open an interactive matplotlib window after rendering.", + ) + return parser + + +def main(argv: Sequence[str] | None = None) -> None: + """Parse CLI arguments and run the full delta-visualisation pipeline. + + Pipeline + -------- + 1. :func:`load_revision` × 2 + 2. :func:`align_revisions` + 3. :func:`compute_deltas` + 4. :func:`plot_bar_chart` + """ + args = build_parser().parse_args(list(argv) if argv is not None else sys.argv[1:]) + + # TODO: call load_revision(args.rev_a, label="rev_A") + # TODO: call load_revision(args.rev_b, label="rev_B") + # TODO: call align_revisions(rev_a_bundle, rev_b_bundle) + # TODO: call compute_deltas(aligned_a, aligned_b, ...) + # TODO: call plot_bar_chart(result, output_path=args.output, show=args.show) + pass + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e8a5a9d --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +pandas>=2.0 diff --git a/src/csv_grok.py b/src/csv_grok.py new file mode 100644 index 0000000..f66e5a6 --- /dev/null +++ b/src/csv_grok.py @@ -0,0 +1,231 @@ +""" +csvGrok – CSV mean-comparison tool +=================================== +Derived from drawings/design.excalidraw. + +Diagram summary +--------------- +Two CSV files (each with 4 columns, arbitrary rows) are loaded and +combined (union / concatenation). A user-specified column is then +analysed: the per-file column mean is computed and the comparison is +written to an output ``analysis.txt`` file. + +Data-flow (as drawn): + CSV File 1 ──┐ + ▼ + Combine (union) ──► Analyse (compare means) + ▲ + CSV File 2 ──┘ + +CLI usage (from diagram free-text): + csvGrok file1.csv file2.csv --column -o analysis.txt +""" + +from __future__ import annotations + +import argparse +import sys +from dataclasses import dataclass +from pathlib import Path + +import pandas as pd + + +# --------------------------------------------------------------------------- +# Data structures +# --------------------------------------------------------------------------- + +@dataclass +class CsvBundle: + """Holds a single loaded CSV together with its source path.""" + + path: Path + data: pd.DataFrame + + +@dataclass +class AnalysisResult: + """Stores the per-file means and the computed difference.""" + + column: str + mean_file1: float + mean_file2: float + difference: float # mean_file1 - mean_file2 + + +# --------------------------------------------------------------------------- +# Pipeline steps (matching diagram boxes) +# --------------------------------------------------------------------------- + +def load_csv(path: Path) -> CsvBundle: + """Load a single CSV file into a :class:`CsvBundle`. + + Parameters + ---------- + path: + Filesystem path to the CSV file. + + Returns + ------- + CsvBundle + The loaded data together with its source path. + """ + df = pd.read_csv(path) + return CsvBundle(path=path, data=df) + + +def load_and_combine(path1: Path, path2: Path) -> tuple[CsvBundle, CsvBundle, pd.DataFrame]: + """Load both CSV files and produce a combined (union) DataFrame. + + Corresponds to the **Combine (union)** box in the diagram. + Both files are loaded independently so that per-file statistics can + still be computed downstream. + + Parameters + ---------- + path1: + Path to the first CSV file. + path2: + Path to the second CSV file. + + Returns + ------- + tuple[CsvBundle, CsvBundle, pd.DataFrame] + ``(bundle1, bundle2, combined_df)`` where ``combined_df`` is the + row-wise concatenation of both DataFrames (union semantics). + """ + bundle1 = load_csv(path1) + bundle2 = load_csv(path2) + combined = pd.concat([bundle1.data, bundle2.data], ignore_index=True) + return bundle1, bundle2, combined + + +def analyse( + bundle1: CsvBundle, + bundle2: CsvBundle, + column: str, +) -> AnalysisResult: + """Compare the column means between the two CSV files. + + Corresponds to the **Analyse (cross-correlate?)** box in the diagram. + + Parameters + ---------- + bundle1: + The first loaded CSV bundle. + bundle2: + The second loaded CSV bundle. + column: + Name of the column whose mean should be compared. + + Returns + ------- + AnalysisResult + Populated result dataclass. + """ + for bundle in (bundle1, bundle2): + if column not in bundle.data.columns: + raise ValueError( + f"Column '{column}' not found in {bundle.path}. " + f"Available columns: {list(bundle.data.columns)}" + ) + + mean1 = float(bundle1.data[column].mean()) + mean2 = float(bundle2.data[column].mean()) + return AnalysisResult( + column=column, + mean_file1=mean1, + mean_file2=mean2, + difference=mean1 - mean2, + ) + + +def write_report(result: AnalysisResult, output_path: Path) -> None: + """Serialise the :class:`AnalysisResult` to a plain-text ``analysis.txt``. + + Parameters + ---------- + result: + The populated analysis result. + output_path: + Destination file path (e.g. ``analysis.txt``). + """ + lines = [ + "=" * 50, + "csvGrok – Mean Comparison Report", + "=" * 50, + f"Column analysed : {result.column}", + f"Mean (file 1) : {result.mean_file1:.4f}", + f"Mean (file 2) : {result.mean_file2:.4f}", + f"Difference : {result.difference:+.4f} (file1 − file2)", + "=" * 50, + ] + report = "\n".join(lines) + "\n" + output_path.write_text(report) + print(report, end="") + print(f"Report written to: {output_path}") + + +# --------------------------------------------------------------------------- +# CLI entry-point (from diagram free-text: csvGrok file1.csv file2.csv -o analysis.txt) +# --------------------------------------------------------------------------- + +def build_parser() -> argparse.ArgumentParser: + """Construct the :mod:`argparse` parser for the ``csvGrok`` CLI.""" + parser = argparse.ArgumentParser( + prog="csvGrok", + description="Compare the mean of a column across two CSV files.", + ) + parser.add_argument( + "file1", + type=Path, + help="Path to the first CSV file.", + ) + parser.add_argument( + "file2", + type=Path, + help="Path to the second CSV file.", + ) + parser.add_argument( + "--column", + "-c", + required=False, + default=None, + type=str, + help=( + "Name of the column to compare (must exist in both files). " + "If omitted, the first numeric column is used." + ), + ) + parser.add_argument( + "--output", + "-o", + type=Path, + default=Path("analysis.txt"), + help="Output file path (default: analysis.txt).", + ) + return parser + + +def main(argv: list[str] | None = None) -> None: + """Main entry-point: parse args, run the pipeline, write the report.""" + args = build_parser().parse_args(argv) + + bundle1, bundle2, _combined = load_and_combine(args.file1, args.file2) + + # If no column specified, pick the first numeric column automatically. + column = args.column + if column is None: + numeric_cols = bundle1.data.select_dtypes(include="number").columns.tolist() + if not numeric_cols: + print("Error: no numeric columns found in the CSV files.", file=sys.stderr) + sys.exit(1) + column = numeric_cols[0] + print(f"No --column specified; defaulting to first numeric column: '{column}'") + + result = analyse(bundle1, bundle2, column) + write_report(result, args.output) + + +if __name__ == "__main__": + main() diff --git a/tests/test_csv_grok.py b/tests/test_csv_grok.py new file mode 100644 index 0000000..aa15071 --- /dev/null +++ b/tests/test_csv_grok.py @@ -0,0 +1,304 @@ +""" +Unit tests for src/csv_grok.py +================================ +Every numeric assertion is verified by an independent second method so +the test itself is trustworthy and not just a tautology. + +Independent verification strategy +----------------------------------- +- Means are cross-checked via sum(values) / len(values) computed + directly from the raw fixture lists — no pandas involved in the + reference calculation. +- Combined row count is verified by simple integer addition. +- File content checks use plain string search, not the module's own + formatting helpers. +""" + +from __future__ import annotations + +import math +from pathlib import Path + +import pandas as pd +import pytest + +# Make src/ importable without installing the package. +import sys +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from csv_grok import ( + AnalysisResult, + CsvBundle, + analyse, + load_and_combine, + load_csv, + write_report, +) + +# --------------------------------------------------------------------------- +# Paths to the shared fixture CSVs +# --------------------------------------------------------------------------- + +DATA_DIR = Path(__file__).parent.parent / "data" +FILE1 = DATA_DIR / "file1.csv" +FILE2 = DATA_DIR / "file2.csv" + +# --------------------------------------------------------------------------- +# Ground-truth values derived by hand from the fixture files +# (independent of pandas – used as the reference in assertions) +# --------------------------------------------------------------------------- + +# file1.csv score column: Alice=88.5, Bob=91.0, Carol=76.3, Dave=83.7, Eve=95.2 +FILE1_SCORES = [88.5, 91.0, 76.3, 83.7, 95.2] +FILE1_SCORE_MEAN_REF = sum(FILE1_SCORES) / len(FILE1_SCORES) # 86.94 + +# file2.csv score column: Frank=70.1, Grace=88.9, Hank=65.4, Iris=79.8, Jack=82.3 +FILE2_SCORES = [70.1, 88.9, 65.4, 79.8, 82.3] +FILE2_SCORE_MEAN_REF = sum(FILE2_SCORES) / len(FILE2_SCORES) # 77.30 + +# file1.csv salary column +FILE1_SALARIES = [72000, 65000, 85000, 70000, 90000] +FILE1_SALARY_MEAN_REF = sum(FILE1_SALARIES) / len(FILE1_SALARIES) # 76400.0 + +# file2.csv salary column +FILE2_SALARIES = [95000, 58000, 110000, 68000, 88000] +FILE2_SALARY_MEAN_REF = sum(FILE2_SALARIES) / len(FILE2_SALARIES) # 83800.0 + + +# =========================================================================== +# Tests for load_csv() +# =========================================================================== + +class TestLoadCsv: + """Happy-path and structural checks for load_csv().""" + + def test_returns_csv_bundle(self): + """load_csv() must return a CsvBundle instance.""" + bundle = load_csv(FILE1) + assert isinstance(bundle, CsvBundle) + + def test_path_attribute_preserved(self): + """The bundle's .path must equal the path that was passed in.""" + bundle = load_csv(FILE1) + assert bundle.path == FILE1 + + def test_row_count_file1(self): + """file1.csv has 5 data rows.""" + bundle = load_csv(FILE1) + assert len(bundle.data) == 5 + + def test_row_count_file2(self): + """file2.csv has 5 data rows.""" + bundle = load_csv(FILE2) + assert len(bundle.data) == 5 + + def test_column_count(self): + """Both files have exactly 4 columns.""" + for path in (FILE1, FILE2): + bundle = load_csv(path) + assert len(bundle.data.columns) == 4, ( + f"{path.name} should have 4 columns, got {list(bundle.data.columns)}" + ) + + def test_expected_columns_present(self): + """Columns name, age, score, salary must all be present.""" + expected = {"name", "age", "score", "salary"} + for path in (FILE1, FILE2): + bundle = load_csv(path) + assert expected == set(bundle.data.columns) + + def test_data_is_dataframe(self): + """bundle.data must be a pandas DataFrame.""" + bundle = load_csv(FILE1) + assert isinstance(bundle.data, pd.DataFrame) + + def test_missing_file_raises(self, tmp_path): + """load_csv() must raise when the file does not exist.""" + with pytest.raises(Exception): + load_csv(tmp_path / "nonexistent.csv") + + +# =========================================================================== +# Tests for load_and_combine() +# =========================================================================== + +class TestLoadAndCombine: + """Tests for the Combine (union) pipeline step.""" + + def test_returns_three_tuple(self): + """load_and_combine() must return a 3-tuple.""" + result = load_and_combine(FILE1, FILE2) + assert len(result) == 3 + + def test_bundles_are_csv_bundles(self): + """First two elements of the tuple must be CsvBundle instances.""" + b1, b2, _ = load_and_combine(FILE1, FILE2) + assert isinstance(b1, CsvBundle) + assert isinstance(b2, CsvBundle) + + def test_combined_is_dataframe(self): + """Third element must be a pandas DataFrame.""" + _, _, combined = load_and_combine(FILE1, FILE2) + assert isinstance(combined, pd.DataFrame) + + def test_combined_row_count(self): + """Combined DataFrame must have len(file1) + len(file2) rows. + + Independent check: load each file separately with pd.read_csv and + add their lengths — no call to load_and_combine() in the reference. + """ + b1, b2, combined = load_and_combine(FILE1, FILE2) + + # Reference: independent row counts via direct pd.read_csv + ref_rows = len(pd.read_csv(FILE1)) + len(pd.read_csv(FILE2)) + + assert len(combined) == ref_rows + assert len(combined) == len(b1.data) + len(b2.data) + + def test_combined_preserves_columns(self): + """Combined DataFrame must retain all 4 original columns.""" + _, _, combined = load_and_combine(FILE1, FILE2) + assert set(combined.columns) == {"name", "age", "score", "salary"} + + def test_combined_index_is_reset(self): + """Combined DataFrame index must be 0-based and contiguous.""" + _, _, combined = load_and_combine(FILE1, FILE2) + expected_index = list(range(len(combined))) + assert list(combined.index) == expected_index + + def test_bundle_paths_are_correct(self): + """Each bundle must carry the path it was loaded from.""" + b1, b2, _ = load_and_combine(FILE1, FILE2) + assert b1.path == FILE1 + assert b2.path == FILE2 + + +# =========================================================================== +# Tests for analyse() +# =========================================================================== + +class TestAnalyse: + """Tests for the Analyse pipeline step — means and difference.""" + + @pytest.fixture(autouse=True) + def _bundles(self): + self.b1, self.b2, _ = load_and_combine(FILE1, FILE2) + + # --- happy-path: score column ------------------------------------------- + + def test_returns_analysis_result(self): + """analyse() must return an AnalysisResult instance.""" + result = analyse(self.b1, self.b2, "score") + assert isinstance(result, AnalysisResult) + + def test_column_attribute(self): + """result.column must equal the column name passed in.""" + result = analyse(self.b1, self.b2, "score") + assert result.column == "score" + + def test_mean_file1_score(self): + """mean_file1 for 'score' must match the hand-computed reference. + + Reference: sum(FILE1_SCORES) / len(FILE1_SCORES) — no pandas. + """ + result = analyse(self.b1, self.b2, "score") + assert math.isclose(result.mean_file1, FILE1_SCORE_MEAN_REF, rel_tol=1e-9) + + def test_mean_file2_score(self): + """mean_file2 for 'score' must match the hand-computed reference.""" + result = analyse(self.b1, self.b2, "score") + assert math.isclose(result.mean_file2, FILE2_SCORE_MEAN_REF, rel_tol=1e-9) + + def test_difference_score(self): + """difference must equal mean_file1 − mean_file2 (verified independently).""" + result = analyse(self.b1, self.b2, "score") + expected_diff = FILE1_SCORE_MEAN_REF - FILE2_SCORE_MEAN_REF + assert math.isclose(result.difference, expected_diff, rel_tol=1e-9) + # Also verify the internal consistency of the dataclass fields + assert math.isclose(result.difference, result.mean_file1 - result.mean_file2, rel_tol=1e-9) + + # --- happy-path: salary column ------------------------------------------ + + def test_mean_file1_salary(self): + """mean_file1 for 'salary' must match the hand-computed reference.""" + result = analyse(self.b1, self.b2, "salary") + assert math.isclose(result.mean_file1, FILE1_SALARY_MEAN_REF, rel_tol=1e-9) + + def test_mean_file2_salary(self): + """mean_file2 for 'salary' must match the hand-computed reference.""" + result = analyse(self.b1, self.b2, "salary") + assert math.isclose(result.mean_file2, FILE2_SALARY_MEAN_REF, rel_tol=1e-9) + + def test_difference_salary(self): + """difference for 'salary' must equal mean_file1 − mean_file2.""" + result = analyse(self.b1, self.b2, "salary") + expected_diff = FILE1_SALARY_MEAN_REF - FILE2_SALARY_MEAN_REF + assert math.isclose(result.difference, expected_diff, rel_tol=1e-9) + + # --- error path --------------------------------------------------------- + + def test_missing_column_raises_value_error(self): + """analyse() must raise ValueError for a column that does not exist.""" + with pytest.raises(ValueError, match="not found"): + analyse(self.b1, self.b2, "nonexistent_column") + + def test_error_message_contains_column_name(self): + """The ValueError message must name the missing column.""" + bad_col = "ghost_column" + with pytest.raises(ValueError, match=bad_col): + analyse(self.b1, self.b2, bad_col) + + def test_non_numeric_column_raises_or_returns_nan(self): + """Requesting the 'name' (string) column should either raise or return NaN mean.""" + try: + result = analyse(self.b1, self.b2, "name") + # If it doesn't raise, the means must be NaN (pandas behaviour for strings) + assert math.isnan(result.mean_file1) or math.isnan(result.mean_file2) + except (TypeError, ValueError): + pass # raising is also acceptable + + +# =========================================================================== +# Tests for write_report() +# =========================================================================== + +class TestWriteReport: + """Tests for the report-writing step.""" + + @pytest.fixture() + def result(self): + b1, b2, _ = load_and_combine(FILE1, FILE2) + return analyse(b1, b2, "score") + + def test_file_is_created(self, tmp_path, result): + """write_report() must create the output file.""" + out = tmp_path / "report.txt" + write_report(result, out) + assert out.exists() + + def test_file_contains_column_name(self, tmp_path, result): + """The report must mention the column that was analysed.""" + out = tmp_path / "report.txt" + write_report(result, out) + assert "score" in out.read_text() + + def test_file_contains_mean1(self, tmp_path, result): + """The report must contain the mean of file 1 (to 4 decimal places).""" + out = tmp_path / "report.txt" + write_report(result, out) + # Independent reference: format the hand-computed value the same way + expected_str = f"{FILE1_SCORE_MEAN_REF:.4f}" + assert expected_str in out.read_text() + + def test_file_contains_mean2(self, tmp_path, result): + """The report must contain the mean of file 2 (to 4 decimal places).""" + out = tmp_path / "report.txt" + write_report(result, out) + expected_str = f"{FILE2_SCORE_MEAN_REF:.4f}" + assert expected_str in out.read_text() + + def test_file_is_non_empty(self, tmp_path, result): + """The report file must not be empty.""" + out = tmp_path / "report.txt" + write_report(result, out) + assert out.stat().st_size > 0