"""
att_preflight.py
-----------------
Pre-flight check before running att_worker.py on the full batch.
Verifies that every path ID in pathids.csv has its required input
files: DAT_a, DAT_b, VEN CSV, and a matching TIF.

Run this BEFORE starting the multi-day batch so you catch missing
files early rather than discovering errors hours into the run.

Usage:
    python att_preflight.py
"""

import csv
from pathlib import Path

# ─────────────────────────────────────────────
# CONFIG — must match att_worker.py
# ─────────────────────────────────────────────

PATH_ID_CSV    = r"D:\KinderMorgan\Kinder_Morgan_Network_Definition_v2.csv"
PATH_ID_COLUMN = "Path_ID"
DATA_ROOT      = r"D:\KinderMorgan\complete"
TIF_DIR        = r"D:\KinderMorgan\clipped_dems"


def load_path_ids(csv_path, column):
    ids = []
    with open(csv_path, newline="", encoding="utf-8-sig") as f:
        if column is None:
            for row in csv.reader(f):
                if row: ids.append(row[0].strip())
        else:
            for row in csv.DictReader(f):
                ids.append(row[column].strip())
    return [i for i in ids if i]


def dat_path(path_id, receiver):
    return Path(DATA_ROOT) / path_id / f"{path_id}_dat_{receiver}_1.csv"


def ven_path(path_id):
    return Path(DATA_ROOT) / path_id / f"{path_id}_ven_1.csv"


def find_tif(path_id):
    for f in Path(TIF_DIR).glob("*.tif"):
        if f.stem.lower() == path_id.lower():
            return f
    return None


def main():
    path_ids = load_path_ids(PATH_ID_CSV, PATH_ID_COLUMN)
    print(f"Checking {len(path_ids)} path IDs...\n")

    missing_dat_a = []
    missing_dat_b = []
    missing_ven   = []
    missing_tif   = []
    empty_files   = []
    all_ok        = []

    for pid in path_ids:
        a  = dat_path(pid, "a")
        b  = dat_path(pid, "b")
        v  = ven_path(pid)
        t  = find_tif(pid)

        ok = True

        if not a.exists():
            missing_dat_a.append(pid)
            ok = False
        elif a.stat().st_size == 0:
            empty_files.append(str(a))
            ok = False

        if not b.exists():
            missing_dat_b.append(pid)
            ok = False
        elif b.stat().st_size == 0:
            empty_files.append(str(b))
            ok = False

        if not v.exists():
            missing_ven.append(pid)
            ok = False
        elif v.stat().st_size == 0:
            empty_files.append(str(v))
            ok = False

        if t is None:
            missing_tif.append(pid)
            ok = False

        if ok:
            all_ok.append(pid)

    print("=" * 60)
    print(f"READY:            {len(all_ok)} / {len(path_ids)}")
    print(f"Missing DAT_a:    {len(missing_dat_a)}")
    print(f"Missing DAT_b:    {len(missing_dat_b)}")
    print(f"Missing VEN:      {len(missing_ven)}")
    print(f"Missing TIF:      {len(missing_tif)}")
    print(f"Empty files:      {len(empty_files)}")
    print("=" * 60)

    def show(label, items, limit=20):
        if not items:
            return
        print(f"\n{label} ({len(items)}):")
        for x in items[:limit]:
            print(f"  {x}")
        if len(items) > limit:
            print(f"  ... and {len(items)-limit} more")

    show("Missing DAT_a", missing_dat_a)
    show("Missing DAT_b", missing_dat_b)
    show("Missing VEN",   missing_ven)
    show("Missing TIF",   missing_tif)
    show("Empty files",   empty_files)

    if len(all_ok) == len(path_ids):
        print("\nAll path IDs ready. Safe to run att_worker.py.")
    else:
        print(f"\n{len(path_ids) - len(all_ok)} path IDs have issues. "
              "Fix these before running the full batch, or they will "
              "fail individually during the run.")


if __name__ == "__main__":
    main()
