"""
Scans all DAT_A and DAT_B CSV files for all paths and reports
blank bldg_meanelevation entries.

Reads path list from ATT_Mobility_Network_Definition_v8.csv

Set TEST_OUTPUT_DIR to scan a test folder instead of production paths.
Set TEST_ONLY to True to only scan paths that exist in that folder.
"""

import os
import pandas as pd

# --- Configuration ---
TEST_OUTPUT_DIR = None   # set to None to scan production files
TEST_ONLY = False                             # if True, only scan paths found in TEST_OUTPUT_DIR
# ---------------------

df = pd.read_csv("ATT_Mobility_Network_Definition_v8.csv", dtype=str)

if TEST_OUTPUT_DIR and TEST_ONLY:
    # Only include paths that have output in the test folder
    df = df[df["Path_ID"].apply(
        lambda p: os.path.exists(os.path.join(TEST_OUTPUT_DIR, p, f"{p}_dat_a_1.csv"))
    )].copy()
    print(f"Scanning {len(df)} paths in test folder: {TEST_OUTPUT_DIR}\n")

total_rows = 0
total_blank = 0
blank_detail = []

for _, row in df.iterrows():
    path_id = row["Path_ID"]
    for label, suffix in [("DAT_A", "dat_a"), ("DAT_B", "dat_b")]:
        if TEST_OUTPUT_DIR:
            fpath = os.path.join(TEST_OUTPUT_DIR, path_id, f"{path_id}_{suffix}_1.csv")
        else:
            fpath = row[f"{suffix}_output"] if f"{suffix}_output" in row else row["dat_a_output" if suffix == "dat_a" else "dat_b_output"]

        if not os.path.exists(fpath):
            print(f"  MISSING: {fpath}")
            continue
        try:
            data = pd.read_csv(fpath, usecols=["bldg_meanelevation"])
            n_rows = len(data)
            n_blank = data["bldg_meanelevation"].isna().sum()
            total_rows += n_rows
            total_blank += n_blank
            if n_blank > 0:
                blank_detail.append({
                    "path_id": path_id,
                    "file": label,
                    "total_rows": n_rows,
                    "blank_count": n_blank,
                    "blank_pct": round(100 * n_blank / n_rows, 2) if n_rows > 0 else 0
                })
        except Exception as e:
            print(f"  ERROR reading {fpath}: {e}")

print(f"\n{'='*60}")
print(f"Total rows scanned:      {total_rows:,}")
print(f"Total blank meanelev:    {total_blank:,}")
print(f"Overall blank rate:      {100*total_blank/total_rows:.3f}%" if total_rows > 0 else "N/A")
print(f"Paths with any blanks:   {len(blank_detail)}")

if blank_detail:
    detail_df = pd.DataFrame(blank_detail)
    detail_df = detail_df.sort_values("blank_count", ascending=False)
    print(f"\nTop 20 worst paths:")
    print(detail_df.head(20).to_string(index=False))
    detail_df.to_csv("blank_meanelev_report.csv", index=False)
    print(f"\nFull detail saved to blank_meanelev_report.csv")
