"""
km_fetch_coords_web.py
Fetches precise coordinates for problem call signs directly from FCC ULS
location detail pages using the license key from HD.dat.
Patches the network definition CSV.

Run: python km_fetch_coords_web.py
"""
import os, re, time, math
import pandas as pd
import urllib.request

ULS_DIR = r"D:\FCC_ULS"
CSV     = r"D:\KinderMorgan\Kinder_Morgan_Network_Definition_v1.csv"
THRESHOLD = 55.0

def haversine_miles(lat1,lon1,lat2,lon2):
    R=3958.8
    lat1,lon1,lat2,lon2=map(math.radians,[lat1,lon1,lat2,lon2])
    dlat=lat2-lat1; dlon=lon2-lon1
    a=math.sin(dlat/2)**2+math.cos(lat1)*math.cos(lat2)*math.sin(dlon/2)**2
    return R*2*math.asin(math.sqrt(a))

def calc_bearing(lat1,lon1,lat2,lon2):
    lat1,lon1,lat2,lon2=map(math.radians,[lat1,lon1,lat2,lon2])
    dlon=lon2-lon1
    x=math.sin(dlon)*math.cos(lat2)
    y=math.cos(lat1)*math.sin(lat2)-math.sin(lat1)*math.cos(lat2)*math.cos(dlon)
    return (math.degrees(math.atan2(x,y))+360)%360

# ── Get problem call signs ────────────────────────────────────────────────────
df = pd.read_csv(CSV, dtype=str)
df["Path_Length_Miles"] = pd.to_numeric(df["Path_Length_Miles"], errors="coerce")
long_paths = df[df["Path_Length_Miles"] > THRESHOLD]
problem_cs = set(long_paths["B_Call_Sign"].unique()) | set(long_paths["A_Call_Sign"].unique())
problem_cs = {cs for cs in problem_cs if isinstance(cs, str) and cs not in ("nan","","NEW")}
print(f"Problem call signs: {sorted(problem_cs)}")

# ── Load HD to get license keys (usid = licKey on FCC website) ────────────────
print("\nLoading HD.dat for license keys...")
hd = pd.read_csv(os.path.join(ULS_DIR,"HD.dat"), sep="|", header=None,
                 dtype=str, on_bad_lines="skip", encoding="latin-1")
hd_cs  = hd.iloc[:,4].str.strip()
hd_usid= hd.iloc[:,1].str.strip()
cs_to_lickey = dict(zip(hd_cs, hd_usid))
print(f"  HD rows: {len(hd)}")

# ── Fetch location page for each problem call sign ────────────────────────────
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
    "Accept": "text/html,application/xhtml+xml",
    "Accept-Language": "en-US,en;q=0.9",
    "Referer": "https://wireless2.fcc.gov/UlsApp/UlsSearch/searchLicense.jsp",
}

coord_cache = {}

for cs in sorted(problem_cs):
    lickey = cs_to_lickey.get(cs)
    if not lickey:
        print(f"  {cs}: no licKey found in HD")
        continue

    url = f"https://wireless2.fcc.gov/UlsApp/UlsSearch/licenseLocSum.jsp?licKey={lickey}"
    print(f"  {cs} (licKey={lickey}): {url}")

    try:
        req = urllib.request.Request(url, headers=headers)
        with urllib.request.urlopen(req, timeout=20) as r:
            html = r.read().decode("latin-1")

        # Parse lat/lon from HTML -- look for decimal degree patterns
        # FCC location pages show coords like "40° 51' 47.0" N  108° 28' 59.3" W"
        # or as decimal in hidden fields / table cells
        lat_matches = re.findall(r'(\d{1,2})[°\s]+(\d{1,2})[\'′\s]+(\d+\.?\d*)[\"″\s]*([NS])', html)
        lon_matches = re.findall(r'(\d{2,3})[°\s]+(\d{1,2})[\'′\s]+(\d+\.?\d*)[\"″\s]*([EW])', html)

        if lat_matches and lon_matches:
            # Use first transmit location found
            for (ld,lm,ls,ldir),(od,om,os_,odir) in zip(lat_matches, lon_matches):
                lat = float(ld)+float(lm)/60+float(ls)/3600
                if ldir=='S': lat=-lat
                lon = float(od)+float(om)/60+float(os_)/3600
                if odir=='W': lon=-lon
                if lat % 1 != 0 or lon % 1 != 0:  # precise
                    coord_cache[cs] = (lat, lon)
                    print(f"    -> lat={lat:.6f} lon={lon:.6f}")
                    break
            else:
                print(f"    -> only integer coords found")
        else:
            print(f"    -> no coords found in HTML (len={len(html)})")

    except Exception as e:
        print(f"    -> ERROR: {e}")

    time.sleep(1.0)

# ── Patch CSV ─────────────────────────────────────────────────────────────────
print(f"\nCoords fetched for {len(coord_cache)} call signs")
patched = 0

df["A_Latitude"]  = pd.to_numeric(df["A_Latitude"],  errors="coerce")
df["A_Longitude"] = pd.to_numeric(df["A_Longitude"], errors="coerce")
df["B_Latitude"]  = pd.to_numeric(df["B_Latitude"],  errors="coerce")
df["B_Longitude"] = pd.to_numeric(df["B_Longitude"], errors="coerce")

for idx, row in df.iterrows():
    changed = False
    a_lat = row["A_Latitude"]; a_lon = row["A_Longitude"]
    b_lat = row["B_Latitude"]; b_lon = row["B_Longitude"]

    if row["A_Call_Sign"] in coord_cache and (pd.isna(a_lat) or row["Path_Length_Miles"] > THRESHOLD):
        a_lat, a_lon = coord_cache[row["A_Call_Sign"]]
        df.at[idx,"A_Latitude"] = a_lat; df.at[idx,"A_Longitude"] = a_lon
        changed = True

    if row["B_Call_Sign"] in coord_cache and (pd.isna(b_lat) or row["Path_Length_Miles"] > THRESHOLD):
        b_lat, b_lon = coord_cache[row["B_Call_Sign"]]
        df.at[idx,"B_Latitude"] = b_lat; df.at[idx,"B_Longitude"] = b_lon
        changed = True

    if changed and all(v is not None and not pd.isna(v) for v in [a_lat,a_lon,b_lat,b_lon]):
        old_mi = row["Path_Length_Miles"]
        new_mi = round(haversine_miles(a_lat,a_lon,b_lat,b_lon),3)
        df.at[idx,"Path_Length_Miles"] = new_mi
        df.at[idx,"A_Bearing"] = round(calc_bearing(a_lat,a_lon,b_lat,b_lon),2)
        df.at[idx,"B_Bearing"] = round(calc_bearing(b_lat,b_lon,a_lat,a_lon),2)
        print(f"  Patched {row['path_num']} {row['A_Call_Sign']}->{row['B_Call_Sign']}: {old_mi:.1f} -> {new_mi:.1f} mi")
        patched += 1

print(f"\nPatched {patched} paths")
df.to_csv(CSV, index=False)
print(f"Saved -> {CSV}")
print(f"\nMax path length: {df['Path_Length_Miles'].max():.1f} miles")
