"""
build_km_network_def_final.py
Converts Kinder_Morgan_Network_Definition_v1.csv into the standard
64-column network definition format matching Oncor_Network_Definition_v1.csv.
Pulls bandwidth and modulation from EM.dat. Samples site elevations from
clipped DEM TIFs with USGS 3DEP API fallback.

Output: D:\KinderMorgan\Kinder_Morgan_Network_Definition_v2.csv

Run: python build_km_network_def_final.py
"""
import os, re, math, time
import pandas as pd
import numpy as np

ULS_DIR  = r"D:\FCC_ULS"
DEM_DIR  = r"D:\KinderMorgan\clipped_dems"
IN_CSV   = r"D:\KinderMorgan\Kinder_Morgan_Network_Definition_v1.csv"
OUT_CSV  = r"D:\KinderMorgan\Kinder_Morgan_Network_Definition_v2.csv"

try:
    import rasterio
    HAS_RASTERIO = True
except ImportError:
    HAS_RASTERIO = False
    print("WARNING: rasterio not found, elevation sampling disabled")

import urllib.request, json

# ── Helpers ───────────────────────────────────────────────────────────────────
def sample_tif(tif_path, lon, lat):
    if not HAS_RASTERIO: return None
    try:
        with rasterio.open(tif_path) as src:
            vals = list(src.sample([(lon, lat)]))
            v = float(vals[0][0])
            nd = src.nodata
            if nd is not None and v == nd: return None
            if v < -500 or v > 9000: return None
            return round(v, 1)
    except Exception:
        return None

def usgs_elevation(lat, lon):
    url = (f"https://epqs.nationalmap.gov/v1/json?"
           f"x={lon}&y={lat}&wkid=4326&units=Meters&includeDate=false")
    try:
        req = urllib.request.Request(url, headers={"User-Agent":"Mozilla/5.0"})
        with urllib.request.urlopen(req, timeout=15) as r:
            data = json.loads(r.read())
        v = float(data.get("value") or data.get("elevation") or "nan")
        return round(v, 1) if -500 < v < 9000 else None
    except Exception:
        return None

def get_elev(path_id, lon, lat):
    if pd.isna(lon) or pd.isna(lat): return None
    tif = os.path.join(DEM_DIR, f"{path_id}.tif")
    if os.path.exists(tif):
        v = sample_tif(tif, lon, lat)
        if v is not None: return v
    # Fallback to USGS API
    v = usgs_elevation(lat, lon)
    if v is not None:
        print(f"    USGS fallback for {path_id}: {v}m")
    return v

def parse_bandwidth_mhz(designator):
    if not isinstance(designator, str) or len(designator) < 4: return None
    bw = designator[:4]
    # FCC format: digits + multiplier + decimal digit(s)
    # 10M0=10.0MHz  3M50=3.5MHz  7M00=7.0MHz  500K=0.5MHz  28M0=28.0MHz
    m = re.match(r'^(\d{1,3})([HKMGhkmg])(\d)$', bw)
    if not m:
        m = re.match(r'^(\d{1,2})([HKMGhkmg])(\d{2})$', bw)
    if m:
        number = float(m.group(1) + '.' + m.group(3))
        mult = m.group(2).upper()
        if mult == 'H': return round(number / 1e6, 6)
        if mult == 'K': return round(number / 1000, 4)
        if mult == 'M': return round(number, 4)
        if mult == 'G': return round(number * 1000, 4)
    return None

MODULATION_MAP = {
    'A':'AM','B':'SSB','C':'VSB','D':'Digital','F':'FM',
    'G':'Phase','H':'SSB full carrier','J':'SSB suppressed carrier',
    'N':'Unmodulated','P':'Pulse','R':'SSB reduced carrier',
    'V':'Analog combination','W':'Digital+Analog','X':'Other',
}

def parse_modulation(designator):
    if not isinstance(designator, str) or len(designator) < 5: return None
    return MODULATION_MAP.get(designator[4].upper(), designator[4].upper())

# ── Load EM.dat for bandwidth and modulation ─────────────────────────────────
# EM.dat col[4]=call_sign col[5]=loc_num col[6]=ant_num col[7]=frequency
# col[9]=emission_designator (e.g. 10M0D7W) col[11]=modulation (e.g. QAM/64)
print("Loading EM.dat for bandwidth and modulation...")
em = pd.read_csv(os.path.join(ULS_DIR,"EM.dat"), sep="|", header=None,
                 dtype=str, on_bad_lines="skip", encoding="latin-1")
em_cs  = em.iloc[:,4].str.strip()
em_des = em.iloc[:,9].str.strip() if em.shape[1]>9 else pd.Series([""] * len(em))
em_mod = em.iloc[:,11].str.strip() if em.shape[1]>11 else pd.Series([""] * len(em))

em_df = pd.DataFrame({"call_sign": em_cs, "designator": em_des, "modulation": em_mod})
em_df["bw_mhz"] = em_df["designator"].apply(parse_bandwidth_mhz)

em_valid = em_df[em_df["bw_mhz"].notna() & (em_df["bw_mhz"] > 0)]
cs_to_bw  = em_valid.groupby("call_sign")["bw_mhz"].max().to_dict()
cs_to_mod = (em_df[em_df["modulation"].str.len() > 0]
             .groupby("call_sign")["modulation"].first().to_dict())
cs_to_des = em_valid.groupby("call_sign")["designator"].first().to_dict()
fr_cs_bw2 = {}  # no fallback needed
print(f"  Call signs with EM bandwidth: {len(cs_to_bw)}")
print(f"  Call signs with EM modulation: {len(cs_to_mod)}")

# ── Load v1 CSV ───────────────────────────────────────────────────────────────
print("Loading v1...")
df = pd.read_csv(IN_CSV, dtype=str)
print(f"  Paths: {len(df)}")

for col in ["A_Latitude","A_Longitude","B_Latitude","B_Longitude",
            "Path_Length_Miles","A_Bearing","B_Bearing",
            "A_Min_Freq_MHz","B_Min_Freq_MHz","a_raat","b_raat"]:
    df[col] = pd.to_numeric(df[col], errors="coerce")

# ── Site elevations ───────────────────────────────────────────────────────────
print("Sampling site elevations...")
a_elevs, b_elevs = [], []
usgs_calls = 0
for i, row in df.iterrows():
    pid = row["Path_ID"]
    a_e = get_elev(pid, row["A_Longitude"], row["A_Latitude"])
    b_e = get_elev(pid, row["B_Longitude"], row["B_Latitude"])
    if a_e is None or b_e is None:
        usgs_calls += 1
        time.sleep(0.3)
    a_elevs.append(a_e)
    b_elevs.append(b_e)
    if (i+1) % 20 == 0:
        print(f"  {i+1}/{len(df)}...")

df["a_meanelev"] = a_elevs
df["b_meanelev"] = b_elevs

# ── Derived fields ────────────────────────────────────────────────────────────
df["Path_Length_KM"] = (df["Path_Length_Miles"] * 1.60934).round(4)
df["a_raat_m"]  = (df["a_raat"] * 0.3048).round(1)
df["b_raat_m"]  = (df["b_raat"] * 0.3048).round(1)
df["a_amsl"]    = (pd.to_numeric(df["a_meanelev"], errors="coerce") +
                   pd.to_numeric(df["a_raat_m"],   errors="coerce")).round(1)
df["b_amsl"]    = (pd.to_numeric(df["b_meanelev"], errors="coerce") +
                   pd.to_numeric(df["b_raat_m"],   errors="coerce")).round(1)
df["State"]     = df["a_state"].where(df["a_state"].notna() & (df["a_state"]!=""), df["b_state"])

# Bandwidth and modulation
df["bw_mhz"] = df["A_Call_Sign"].map(cs_to_bw).fillna(df["A_Call_Sign"].map(fr_cs_bw2))
df["mod"]    = df["A_Call_Sign"].map(cs_to_mod).fillna(df["B_Call_Sign"].map(cs_to_mod))
df["des_a"]  = df["A_Call_Sign"].map(cs_to_des)
df["des_b"]  = df["B_Call_Sign"].map(cs_to_des)
df["freq_note"] = ("A:" + df["des_a"].fillna("") + " B:" + df["des_b"].fillna("")).str.strip("AB: ")

# File paths
df["ven_file"]     = df["Path_ID"].apply(lambda p: rf"D:\KinderMorgan\complete\{p}\{p}_ven_1.csv")
df["kml_file"]     = df["Path_ID"].apply(lambda p: rf"D:\KinderMorgan\kml_files\{p}.kml")
df["dat_a_output"] = df["Path_ID"].apply(lambda p: rf"D:\KinderMorgan\complete\{p}\{p}_dat_a_1.csv")
df["dat_b_output"] = df["Path_ID"].apply(lambda p: rf"D:\KinderMorgan\complete\{p}\{p}_dat_b_1.csv")

# ── Build output in exact Oncor 64-column order ───────────────────────────────
out = pd.DataFrame()
out["path_num"]                              = df["path_num"]
out["entity_name"]                           = "Kinder Morgan Inc."
out["receiver_entity_name"]                  = "Kinder Morgan Inc."
out["unique_system_identifier"]              = ""
out["Path_ID"]                               = df["Path_ID"]
out["ven_file"]                              = df["ven_file"]
out["kml_file"]                              = df["kml_file"]
out["A_Call_Sign"]                           = df["A_Call_Sign"]
out["A_Latitude"]                            = df["A_Latitude"]
out["A_Longitude"]                           = df["A_Longitude"]
out["a_meanelev"]                            = df["a_meanelev"]
out["a_raat"]                                = df["a_raat_m"]
out["a_desc"]                                = df["a_site_name"]
out["b_Call_Sign"]                           = df["B_Call_Sign"]
out["b_Latitude"]                            = df["B_Latitude"]
out["b_Longitude"]                           = df["B_Longitude"]
out["b_meanelev"]                            = df["b_meanelev"]
out["b_raat"]                                = df["b_raat_m"]
out["b_desc"]                                = df["b_site_name"]
out["dat_a_output"]                          = df["dat_a_output"]
out["dat_b_output"]                          = df["dat_b_output"]
out["Path_Length (KM)"]                      = df["Path_Length_KM"]
out["State"]                                 = df["State"]
out["Path_Overall_Status"]                   = df["Path_Overall_Status"]
out["A Site Elev AMSL (M)"]                 = df["a_meanelev"]
out["A Site Main Ant Height AGL (M)"]       = df["a_raat_m"]
out["A Site Diversity Ant Height AGL (M)"]  = ""
out["A Site Main Ant Height AMSL (M)"]      = df["a_amsl"]
out["A Site Diversity Ant Height AMSL (M)"] = ""
out["A Main Antenna Make"]                   = df["A_Main_Antenna_Make"]
out["A Main Antenna Model"]                  = df["A_Main_Antenna_Model"]
out["A Diversity Antenna Make"]              = ""
out["A Diversity Antenna Model"]             = ""
out["B Site Elev AMSL (M)"]                 = df["b_meanelev"]
out["B Site Main Ant Height AGL (M)"]       = df["b_raat_m"]
out["B Site Diversity Ant Height AGL (M)"]  = ""
out["B Site Main Ant Height AMSL (M)"]      = df["b_amsl"]
out["B Site Diversity Ant Height AMSL (M)"] = ""
out["B Main Antenna Make"]                   = df["B_Main_Antenna_Make"]
out["B Main Antenna Model"]                  = df["B_Main_Antenna_Model"]
out["B Diversity Antenna Make"]              = ""
out["B Diversity Antenna Model"]             = ""
out["Path Length (Miles)"]                   = df["Path_Length_Miles"]
out["USGS 1M Points"]                        = ""
out["GLO 30 Points"]                         = ""
out["Total Points"]                          = ""
out["A Main Antenna WC File"]                = ""
out["A Diversity  Antenna WC"]               = ""
out["B Main Antenna WC"]                     = ""
out["B Diversity Antenna WC File"]           = ""
out["AIntFunction"]                          = ""
out["ATerrFunction"]                         = ""
out["channels"]                              = ""
out["bandwidth (MHz)"]                       = df["bw_mhz"]
out["Site_A _xmt_fr_frequency_assigned_MHz"] = df["A_Min_Freq_MHz"]
out["Site_B _xmt_fr_frequency_assigned_MHz"] = df["B_Min_Freq_MHz"]
out["Site_A_rx_ant_gain_dBi"]                = ""
out["Site_A_Rx_tilt_deg"]                    = ""
out["Site_B_rx_ant_gain_dBi"]                = ""
out["Site_B_Rx_tilt_deg"]                    = ""
out["A_to_B_Bearing_deg"]                    = df["A_Bearing"]
out["B_to_A_Bearing_deg"]                    = df["B_Bearing"]
out["bandwidth_modulation"]                  = df["mod"]
out["freq_note"]                             = df["freq_note"]

out.to_csv(OUT_CSV, index=False)
print(f"\nSaved: {OUT_CSV}")
print(f"Columns: {len(out.columns)}  Rows: {len(out)}")

# Summary
print(f"\nElevations: A={sum(1 for v in a_elevs if v is not None)}/{len(df)}  "
      f"B={sum(1 for v in b_elevs if v is not None)}/{len(df)}")
bw_filled = out["bandwidth (MHz)"].notna().sum()
mod_filled = out["bandwidth_modulation"].notna().sum()
print(f"Bandwidth: {bw_filled}/{len(out)}  Modulation: {mod_filled}/{len(out)}")
print(f"\nSample:")
print(out[["path_num","A_Call_Sign","b_Call_Sign","State","Path Length (Miles)",
           "bandwidth (MHz)","bandwidth_modulation",
           "Site_A _xmt_fr_frequency_assigned_MHz"]].head(10).to_string())
