#!/usr/bin/env python3
"""
summarize_missing.py
=====================
Scans output/tables/*.csv and reports, per path, how many sample points
have source == "none" (no elevation data from USGS 1m or GLO-30).

Usage:
    python summarize_missing.py output
"""
import csv
import glob
import os
import sys

out_dir = sys.argv[1] if len(sys.argv) > 1 else "output"
tables = sorted(glob.glob(os.path.join(out_dir, "tables", "*.csv")))

print(f"{len(tables)} table(s) found.\n")

total_paths = 0
paths_with_missing = 0
fully_missing = 0
rows_out = []

for t in tables:
    with open(t, newline="") as f:
        reader = list(csv.DictReader(f))
    n = len(reader)
    missing = sum(1 for r in reader if r.get("source", "").strip().lower() in ("none", "", "nan"))
    total_paths += 1
    if missing > 0:
        paths_with_missing += 1
        if missing == n:
            fully_missing += 1
        rows_out.append((os.path.basename(t), n, missing))

print(f"Total paths:                {total_paths}")
print(f"Paths with ANY missing pts: {paths_with_missing}")
print(f"Paths FULLY missing:        {fully_missing}")
print()

if rows_out:
    print("path, total_points, missing_points")
    for name, n, m in sorted(rows_out, key=lambda x: -x[2])[:30]:
        print(f"  {name}, {n}, {m}")
    if len(rows_out) > 30:
        print(f"  ... and {len(rows_out) - 30} more")
