"""
build_kinder_morgan_network_definition.py

Extracts Kinder Morgan 6 GHz microwave network from FCC ULS bulk DAT files.
Output: D:\KinderMorgan\Kinder_Morgan_Network_Definition_v1.csv
ULS files: D:\FCC_ULS\  (l_micro.zip extract)

Run: python build_kinder_morgan_network_definition.py
"""

import os, math
import pandas as pd
import numpy as np

ULS_DIR    = r"D:\FCC_ULS"
OUTPUT_DIR = r"D:\KinderMorgan"
OUTPUT_CSV = os.path.join(OUTPUT_DIR, "Kinder_Morgan_Network_Definition_v1.csv")
os.makedirs(OUTPUT_DIR, exist_ok=True)

FREQ_LOW    = 5925.0
FREQ_HIGH   = 7125.0
MW_SERVICES = {"MG","MW","CF","TP","TI","WU","YD","GB"}

def haversine_miles(lat1, lon1, lat2, lon2):
    R = 3958.8
    lat1,lon1,lat2,lon2 = map(math.radians,[lat1,lon1,lat2,lon2])
    dlat=lat2-lat1; dlon=lon2-lon1
    a=math.sin(dlat/2)**2+math.cos(lat1)*math.cos(lat2)*math.sin(dlon/2)**2
    return R*2*math.asin(math.sqrt(a))

def calc_bearing(lat1,lon1,lat2,lon2):
    lat1,lon1,lat2,lon2=map(math.radians,[lat1,lon1,lat2,lon2])
    dlon=lon2-lon1
    x=math.sin(dlon)*math.cos(lat2)
    y=math.cos(lat1)*math.sin(lat2)-math.sin(lat1)*math.cos(lat2)*math.cos(dlon)
    return (math.degrees(math.atan2(x,y))+360)%360

def vec_dms(deg_col, min_col, sec_col, dir_col):
    """Vectorized DMS to decimal degrees. Returns NaN where deg is null."""
    deg = pd.to_numeric(deg_col, errors="coerce")
    mn  = pd.to_numeric(min_col, errors="coerce").fillna(0)
    sec = pd.to_numeric(sec_col, errors="coerce").fillna(0)
    val = deg + mn/60 + sec/3600
    sw  = dir_col.astype(str).str.strip().isin(["S","W"])
    val = val.where(~sw, -val)
    return val.where(deg.notna(), other=np.nan)

# ── EN.dat -- KM call signs ───────────────────────────────────────────────────
print("Loading EN.dat...")
en = pd.read_csv(os.path.join(ULS_DIR,"EN.dat"), sep="|", header=None,
                 dtype=str, on_bad_lines="skip", encoding="latin-1")
km_mask = en[7].str.strip().str.lower().fillna("").str.contains("kinder morgan", na=False)
km_en = en[km_mask]
km_callsigns_all = set(km_en[4].str.strip().dropna().unique())
print(f"  KM call signs from EN: {len(km_callsigns_all)}")

# ── HD.dat -- active KM microwave licenses ────────────────────────────────────
print("Loading HD.dat...")
hd = pd.read_csv(os.path.join(ULS_DIR,"HD.dat"), sep="|", header=None,
                 dtype=str, on_bad_lines="skip", encoding="latin-1")
hd_cs   = hd.iloc[:,4].str.strip()
hd_stat = hd.iloc[:,5].str.strip()
hd_svc  = hd.iloc[:,6].str.strip()

km_hd = hd[hd_svc.isin(MW_SERVICES) & (hd_stat=="A") & hd_cs.isin(km_callsigns_all)]
print(f"  Active KM microwave licenses: {len(km_hd)}")
if len(km_hd) == 0:
    raise SystemExit("No active KM microwave licenses found.")

active_km_cs    = set(km_hd.iloc[:,4].str.strip().unique())
all_active_mw_cs = set(hd[hd_svc.isin(MW_SERVICES) & (hd_stat=="A")].iloc[:,4].str.strip().unique())
print(f"  Active KM call signs: {len(active_km_cs)}")

# ── FR.dat -- frequencies ─────────────────────────────────────────────────────
print("Loading FR.dat...")
fr = pd.read_csv(os.path.join(ULS_DIR,"FR.dat"), sep="|", header=None,
                 dtype=str, on_bad_lines="skip", encoding="latin-1")
fr_cs  = fr.iloc[:,4].str.strip()
fr_loc = fr.iloc[:,6].str.strip()
fr_ant = fr.iloc[:,7].str.strip()
fr_freq= pd.to_numeric(fr.iloc[:,10], errors="coerce")

fr_df = pd.DataFrame({"call_sign":fr_cs,"loc":fr_loc,"ant":fr_ant,"freq":fr_freq})
fr_ant_min = (fr_df.groupby(["call_sign","loc","ant"])["freq"]
              .min().reset_index().rename(columns={"freq":"min_freq"}))
fr_cs_min = fr_df.groupby("call_sign")["freq"].min().to_dict()

def get_freq(cs, loc, ant):
    m = fr_ant_min[
        (fr_ant_min["call_sign"]==cs) &
        (fr_ant_min["loc"]==str(loc)) &
        (fr_ant_min["ant"]==str(ant))
    ]
    return float(m.iloc[0]["min_freq"]) if not m.empty else fr_cs_min.get(cs)

# ── LO.dat -- site locations (fully vectorized) ───────────────────────────────
print("Loading LO.dat...")
lo = pd.read_csv(os.path.join(ULS_DIR,"LO.dat"), sep="|", header=None,
                 dtype=str, on_bad_lines="skip", encoding="latin-1")

lo_cs      = lo.iloc[:,4].str.strip()
lo_type    = lo.iloc[:,7].str.strip()   # T=transmit R=receive
lo_locnum  = lo.iloc[:,8].str.strip()
lo_city    = lo.iloc[:,12].str.strip() if lo.shape[1]>12 else pd.Series([""] * len(lo))
lo_state   = lo.iloc[:,14].str.strip() if lo.shape[1]>14 else pd.Series([""] * len(lo))
lo_name    = lo.iloc[:,43].str.strip() if lo.shape[1]>43 else pd.Series([""] * len(lo))
lo_desc    = lo.iloc[:,44].str.strip() if lo.shape[1]>44 else pd.Series([""] * len(lo))

lat = vec_dms(lo.iloc[:,19], lo.iloc[:,20], lo.iloc[:,21], lo.iloc[:,22])
lon = vec_dms(lo.iloc[:,23], lo.iloc[:,24], lo.iloc[:,25], lo.iloc[:,26])

# Precision: coords with fractional degrees (not integer-only) are reliable
lat_frac = lat % 1
lon_frac = lon % 1
precise = (lat_frac != 0) | (lon_frac != 0)

# Site name: location_name -> location_description -> city+state
site_name = lo_name.where(lo_name.str.len() > 1,
            lo_desc.where(lo_desc.str.len() > 1,
            (lo_city + ", " + lo_state).str.strip(", ")))

lo_df = pd.DataFrame({
    "call_sign":    lo_cs,
    "loc_num":      lo_locnum,
    "loc_type":     lo_type,
    "latitude":     lat,
    "longitude":    lon,
    "precise":      precise,
    "location_state": lo_state,
    "site_name":    site_name,
})

# Best location per call_sign: precise T first, then any precise, then T, then anything
lo_df["_p"] = (~lo_df["precise"]).astype(int)   # 0=precise, 1=not
lo_df["_t"] = (lo_df["loc_type"] != "T").astype(int)  # 0=T, 1=other

lo_sorted = lo_df.sort_values(["call_sign","_p","_t"])

# Index 1: (call_sign, loc_num) for PA-specified exact lookups
lo_exact = (lo_sorted.groupby(["call_sign","loc_num"]).first()
            .reset_index().set_index(["call_sign","loc_num"]))

# Index 2: best location per call_sign (for fallback)
lo_best = lo_sorted.groupby("call_sign").first()

def get_location(cs, loc_num):
    """Return best location dict. Uses PA-specified loc_num if precise, else best overall."""
    key = (cs, str(loc_num))
    if key in lo_exact.index:
        row = lo_exact.loc[key]
        if row["precise"]:
            return row.to_dict()
    if cs in lo_best.index:
        return lo_best.loc[cs].to_dict()
    return {}

# ── AN.dat -- antennas ────────────────────────────────────────────────────────
print("Loading AN.dat...")
an = pd.read_csv(os.path.join(ULS_DIR,"AN.dat"), sep="|", header=None,
                 dtype=str, on_bad_lines="skip", encoding="latin-1")
# col[4]=call_sign col[6]=loc_num col[7]=ant_num
# col[11]=raat col[12]=make col[13]=model
an_cs  = an.iloc[:,4].str.strip()
an_loc = an.iloc[:,6].str.strip()
an_ant = an.iloc[:,7].str.strip()
an_raat= an.iloc[:,11].str.strip() if an.shape[1]>11 else pd.Series([""] * len(an))
an_make= an.iloc[:,12].str.strip() if an.shape[1]>12 else pd.Series([""] * len(an))
an_model=an.iloc[:,13].str.strip() if an.shape[1]>13 else pd.Series([""] * len(an))

an_df = pd.DataFrame({
    "call_sign": an_cs, "loc_num": an_loc, "ant_num": an_ant,
    "raat": an_raat, "make": an_make, "model": an_model,
})
an_idx = (an_df.groupby(["call_sign","loc_num","ant_num"]).first()
          .reset_index().set_index(["call_sign","loc_num","ant_num"]))
an_cs_fb = an_df.groupby("call_sign").first()

def get_antenna(cs, loc, ant):
    key = (cs, str(loc), str(ant))
    if key in an_idx.index:
        return an_idx.loc[key].to_dict()
    if cs in an_cs_fb.index:
        return an_cs_fb.loc[cs].to_dict()
    return {}

# ── PA.dat -- paths ───────────────────────────────────────────────────────────
print("Loading PA.dat...")
pa = pd.read_csv(os.path.join(ULS_DIR,"PA.dat"), sep="|", header=None,
                 dtype=str, on_bad_lines="skip", encoding="latin-1")
# col[4]=tx_cs col[6]=tx_loc col[7]=tx_ant
# col[8]=rx_loc col[9]=rx_ant col[16]=rx_cs
pa_df = pd.DataFrame({
    "call_sign":  pa.iloc[:,4].str.strip(),
    "tx_loc":     pa.iloc[:,6].str.strip(),
    "tx_ant":     pa.iloc[:,7].str.strip(),
    "rx_loc":     pa.iloc[:,8].str.strip(),
    "rx_ant":     pa.iloc[:,9].str.strip(),
    "rx_cs":      pa.iloc[:,16].str.strip() if pa.shape[1]>16 else "",
})
km_pa = pa_df[pa_df["call_sign"].isin(active_km_cs)].copy()
print(f"  KM PA records: {len(km_pa)}")

# Build a tx-coord lookup from ALL PA records (not just KM):
# For every call sign that appears as a transmitter, find its precise coordinates
# from LO using the transmit_location_number. This solves the problem where
# a call sign only has integer-degree coords as a receiver but has precise
# coords when it files as a transmitter on its own PA records.
print("  Building precise tx-coord index from all PA records...")
all_tx = pa_df[["call_sign","tx_loc"]].drop_duplicates()
tx_precise = {}
for _, r in all_tx.iterrows():
    cs = r["call_sign"]; loc = r["tx_loc"]
    key = (cs, str(loc))
    if key in lo_exact.index:
        row = lo_exact.loc[key]
        if row["precise"]:
            tx_precise[cs] = row.to_dict()
            continue
    # Not found by exact key -- check lo_best
    if cs not in tx_precise and cs in lo_best.index:
        row = lo_best.loc[cs]
        if row["precise"]:
            tx_precise[cs] = row.to_dict()
print(f"  Call signs with precise tx coords: {len(tx_precise)}")

def get_location_best(cs, loc_num):
    """Location lookup with tx-coord fallback as final option."""
    key = (cs, str(loc_num))
    if key in lo_exact.index:
        row = lo_exact.loc[key].to_dict()
        if row.get("precise"):
            return row
    if cs in lo_best.index:
        row = lo_best.loc[cs].to_dict()
        if row.get("precise"):
            return row
    # Last resort: use the precise tx coordinates from this call sign's own PA records
    if cs in tx_precise:
        return tx_precise[cs]
    return {}

# ── Build network definition ──────────────────────────────────────────────────
print("\nBuilding network definition...")
rows = []
seen_pairs = set()

for _, rec in km_pa.iterrows():
    a_cs = rec["call_sign"]
    b_cs = str(rec["rx_cs"]).strip()
    if not b_cs or b_cs in ("nan","","0"):
        continue

    pair_key = tuple(sorted([a_cs, b_cs]))
    if pair_key in seen_pairs:
        continue
    seen_pairs.add(pair_key)

    a_loc_n = rec["tx_loc"]; a_ant_n = rec["tx_ant"]
    b_loc_n = rec["rx_loc"]; b_ant_n = rec["rx_ant"]

    # A-side
    a_loc = get_location_best(a_cs, a_loc_n)
    a_an  = get_antenna(a_cs, a_loc_n, a_ant_n)
    a_lat = a_loc.get("latitude")
    a_lon = a_loc.get("longitude")

    a_freq = get_freq(a_cs, a_loc_n, a_ant_n)
    if a_freq is None or not (FREQ_LOW <= a_freq <= FREQ_HIGH):
        continue

    # B-side
    b_has = b_loc_n not in ("nan","","0")
    b_loc = get_location_best(b_cs, b_loc_n) if b_has else get_location_best(b_cs, "1")
    b_an  = get_antenna(b_cs, b_loc_n, b_ant_n) if b_has else get_antenna(b_cs, "1", "1")
    b_lat = b_loc.get("latitude")
    b_lon = b_loc.get("longitude")

    b_freq = get_freq(b_cs, b_loc_n, b_ant_n) if b_has else fr_cs_min.get(b_cs)
    if b_freq is None: b_freq = fr_cs_min.get(b_cs)
    b_in_6ghz = b_freq is not None and FREQ_LOW <= b_freq <= FREQ_HIGH

    # Geometry
    path_mi = a_brg = b_brg = None
    try:
        if all(v is not None and not (isinstance(v, float) and math.isnan(v))
               for v in [a_lat, a_lon, b_lat, b_lon]):
            path_mi = round(haversine_miles(a_lat,a_lon,b_lat,b_lon), 3)
            a_brg   = round(calc_bearing(a_lat,a_lon,b_lat,b_lon), 2)
            b_brg   = round(calc_bearing(b_lat,b_lon,a_lat,a_lon), 2)
    except Exception:
        pass

    a_make  = str(a_an.get("make","")  or "").strip()
    a_model = str(a_an.get("model","") or "").strip()
    b_make  = str(b_an.get("make","")  or "").strip() or a_make
    b_model = str(b_an.get("model","") or "").strip() or a_model
    a_raat  = str(a_an.get("raat","")  or "").strip()
    b_raat  = str(b_an.get("raat","")  or "").strip()

    rows.append({
        "A_Call_Sign":          a_cs,
        "B_Call_Sign":          b_cs,
        "Path_Overall_Status":  f"A=Active, B={'Active' if b_cs in all_active_mw_cs else 'Unknown'}",
        "A_Tx_Loc_Num":         a_loc_n,
        "A_Tx_Ant_Num":         a_ant_n,
        "B_Rx_Loc_Num":         b_loc_n,
        "B_Rx_Ant_Num":         b_ant_n,
        "A_Latitude":           a_lat,
        "A_Longitude":          a_lon,
        "B_Latitude":           b_lat,
        "B_Longitude":          b_lon,
        "a_site_name":          a_loc.get("site_name",""),
        "b_site_name":          b_loc.get("site_name",""),
        "a_state":              str(a_loc.get("location_state","") or "").strip(),
        "b_state":              str(b_loc.get("location_state","") or "").strip(),
        "Path_Length_Miles":    path_mi,
        "A_Bearing":            a_brg,
        "B_Bearing":            b_brg,
        "A_Min_Freq_MHz":       a_freq,
        "B_Min_Freq_MHz":       b_freq,
        "B_In_6GHz_Band":       b_in_6ghz,
        "A_Main_Antenna_Make":  a_make,
        "A_Main_Antenna_Model": a_model,
        "B_Main_Antenna_Make":  b_make,
        "B_Main_Antenna_Model": b_model,
        "a_raat":               a_raat,
        "b_raat":               b_raat,
        "a_meanelev":           "",
        "b_meanelev":           "",
    })

print(f"  Paths built: {len(rows)}")
if len(rows) == 0:
    raise SystemExit("No paths found.")

df = pd.DataFrame(rows)
df = df.sort_values(["a_state","A_Call_Sign","B_Call_Sign"]).reset_index(drop=True)
df.insert(0, "path_num", [f"KMI_{i+1:04d}" for i in range(len(df))])
df.insert(1, "Path_ID",  df["A_Call_Sign"] + "_" + df["B_Call_Sign"])
df.to_csv(OUTPUT_CSV, index=False)

print(f"\n{'='*60}")
print(f"OUTPUT: {OUTPUT_CSV}")
print(f"Total 6 GHz paths: {len(df)}")
print(f"\nPath length distribution:")
pl = pd.to_numeric(df["Path_Length_Miles"], errors="coerce")
print(pl.describe())
print(f"\nState breakdown:")
print(df["a_state"].value_counts().to_string())

missing = df[df["A_Latitude"].isna() | df["B_Latitude"].isna()]
if not missing.empty:
    print(f"\nWARNING: {len(missing)} paths missing coordinates")
    print(missing[["path_num","A_Call_Sign","B_Call_Sign","A_Latitude","B_Latitude"]].to_string())
