r"""
diagnose_missing_callsigns.py
------------------------------
Identifies entity names for ALL B-side call signs that are not appearing
as transmitters in the Oncor Network Definition output.

Run this, paste output back to Claude to update ENTITY_SEARCH terms.

Input:  D:\FCC_ULS\EN.dat, D:\FCC_ULS\HD.dat
Output: console + D:\oncor\diagnose_missing_callsigns.csv
"""

import pandas as pd
from pathlib import Path

ULS_DIR    = Path(r"D:\FCC_ULS")
OUTPUT_CSV = Path(r"D:\oncor\diagnose_missing_callsigns.csv")

MISSING_CS = {
    'KBG63','KBJ25','KBM56','KFG40','KFG44','KHU62','KJL79','KNH46','KOE99',
    'KOK44','KON38','KQL42','KRX52','KTF53','KVG93','KYL27','WAN228','WAP273',
    'WAY496','WAZ440','WAZ508','WBD289','WBH606','WBH617','WCP38','WEG705',
    'WEH229','WHH555','WHH904','WHH911','WHI276','WHI448','WHJ253','WHJ254',
    'WHJ616','WLB942','WLN600','WNEG758','WNEY543','WNEY544','WNEY546',
    'WNTF518','WNTS361','WNTU923','WNTU927','WNTV762','WNTZ334','WPNE294',
    'WPNN284','WPNN815','WPOP817','WPQQ322','WPQS399','WPSG893','WPTR233',
    'WPVB918','WPVP749','WPVP752','WPVQ588','WPVQ718','WPVQ719','WPVT230',
    'WPVT785','WPVT787','WPVT789','WPVU627','WPVU630','WPVV225','WPVV345',
    'WPVW608','WPVW612','WPVX246','WPVX660','WPVX661','WPVX662','WPVY281',
    'WPVY502','WPWZ415','WPWZ420','WPWZ421','WPWZ425','WPWZ428','WPWZ430',
    'WPWZ431','WPWZ445','WPWZ446','WPWZ453','WPWZ939','WPWZ941','WPWZ942',
    'WPXA427','WPXA448','WPXA451','WPXA455','WPXA458','WPXA459','WPXA462',
    'WPXA468','WPXA591','WPXB861','WPXC416','WPXC417','WPXC420','WPXC421',
    'WPXC423','WPXC424','WPXC427','WPXC573','WPYW984','WQAS805','WQBI588',
    'WQBQ359','WQBQ361','WQBS967','WQFA915','WQFP213','WQFP237','WQFU233',
    'WQGB579','WQGL495','WQGP525','WQGV329','WQHD537','WQHG512','WQHT853',
    'WQIF496','WQIF898','WQIJ236','WQIJ238','WQIL708','WQIW328','WQIW860',
    'WQIW928','WQIX814','WQJF596','WQKA556','WQKA962','WQKB543','WQKF561',
    'WQKF573','WQKH631','WQKP716','WQKQ780','WQKT527','WQLA286','WQLB714',
    'WQLG510','WQLP970','WQLS873','WQLX285','WQLX287','WQMI768','WQMI774',
    'WQMI777','WQMK876','WQMN491','WQMR215','WQMY915','WQNE687','WQNU903',
    'WQNW580','WQNZ352','WQOM388','WQOV509','WQOV886','WQOW461','WQOX557',
    'WQOY865','WQPJ686','WQPM485','WQPM784','WQRC541','WQRG381','WQRK250',
    'WQRM583','WQRN350','WQRP282','WQRQ582','WQRY949','WQSK473','WQSL499',
    'WQST655','WQST925','WQSY203','WQTE603','WQTE607','WQTF413','WQTG348',
    'WQTN763','WQTQ374','WQTU662','WQTW277','WQTW401','WQTW408','WQTY220',
    'WQUA557','WQUJ326','WQUK256','WQUQ316','WQUQ660','WQUR637','WQUS577',
    'WQUW232','WQUW533','WQUW699','WQUW786','WQUW863','WQUW866','WQUY414',
    'WQVC495','WQVE680','WQVG892','WQVH303','WQVH308','WQVH490','WQVJ543',
    'WQVK883','WQVX924','WQVX952','WQVX973','WQVZ817','WQWC953','WQWE611',
    'WQWZ774','WQXA376','WQXA470','WQXA471','WQXA477','WQXJ607','WQXM299',
    'WQXN336','WQXP435','WQXS340','WQXW299','WQXW950','WQXW955','WQXX981',
    'WQYC726','WQYF938','WQYH720','WQYT709','WQYU803','WQYW383','WQYX619',
    'WQYZ875','WQZB527','WQZB538','WQZB751','WQZB752','WQZD584','WQZE953',
    'WQZI731','WQZJ887','WQZJ900','WQZP350','WQZP356','WQZP675','WQZP903',
    'WQZT505','WQZU808','WQZU974','WQZV874','WQZW450','WRAD222','WRAE692',
    'WRAF370','WRAH396','WRAP772','WRAR736','WRAV548','WRBN347','WRBT525',
    'WRBW859','WRBW861','WRBX510','WRCA644','WRCB230','WRCD697','WRCJ647',
    'WRCJ819','WRCL333','WRCM846','WRCN590','WRCP501','WRCQ505','WRCT436',
    'WRCT818','WRCW969','WRDF848','WRDH299','WRDK439','WRDL220','WRDM304',
    'WRDN453','WRHS837','WRHS840','WRHS843','WRHZ753','WRJH245','WRJK668',
    'WRJK811','WRJT643','WRJX621','WRJX655','WRJY220','WRKC345','WRKQ476',
    'WRMC467','WRMV335','WRNE229','WRNT620','WRNW587','WRON499','WROP821',
    'WROQ578','WRPA596','WRPZ407','WRTE989','WRTE990','WRUH215','WRUH460',
    'WRUL883','WRUN217','WRUS807','WRWN379','WRXG652','WRYD402','WRYS587',
    'WRZE765','WSAY229','WSDK427','WSEM858','WSFP288','WSFX929','WSFX930',
    'WSGB290','WSGE639','WSHR433','WSHV821','WSIR663',
}

print(f"\n{'='*60}")
print("  Diagnose Missing B-Side Call Signs")
print(f"{'='*60}\n")
print(f"  Checking {len(MISSING_CS)} missing call signs...\n")

# ── Load EN ───────────────────────────────────────────────────────────────────
print("Loading EN.dat...")
en = pd.read_csv(
    ULS_DIR / "EN.dat", sep="|", header=None, dtype=str,
    encoding="latin-1", on_bad_lines="skip", low_memory=False,
)
en_hits = en[en[4].str.strip().str.upper().isin(MISSING_CS)][[1, 4, 7]].copy()
en_hits.columns = ["unique_system_identifier", "call_sign", "entity_name"]
en_hits["call_sign"]   = en_hits["call_sign"].str.strip().str.upper()
en_hits["entity_name"] = en_hits["entity_name"].str.strip()

# ── Load HD for license status + service ──────────────────────────────────────
print("Loading HD.dat...")
hd = pd.read_csv(
    ULS_DIR / "HD.dat", sep="|", header=None, dtype=str,
    encoding="latin-1", on_bad_lines="skip", low_memory=False,
)
hd_sub = hd[[1, 4, 5, 6]].copy()
hd_sub.columns = ["unique_system_identifier", "call_sign", "license_status", "radio_service"]
hd_sub["call_sign"] = hd_sub["call_sign"].str.strip().str.upper()

result = en_hits.merge(
    hd_sub[["unique_system_identifier", "license_status", "radio_service"]],
    on="unique_system_identifier", how="left"
).drop_duplicates().sort_values("entity_name")

# ── Print summary ─────────────────────────────────────────────────────────────
print(f"\n  {result['call_sign'].nunique()} of {len(MISSING_CS)} call signs found in EN.dat\n")

print(f"  Unique entity names and counts:")
print(f"  {'Count':>6}  {'Status mix':<20}  Entity Name")
print(f"  {'-'*60}")
for name, grp in result.groupby("entity_name"):
    statuses = grp["license_status"].value_counts().to_dict()
    status_str = ", ".join(f"{k}:{v}" for k, v in statuses.items())
    print(f"  {len(grp):>6}  {status_str:<20}  {name}")

print(f"\n  Call signs NOT found in EN.dat:")
found_cs = set(result["call_sign"].unique())
not_found = MISSING_CS - found_cs
for cs in sorted(not_found):
    print(f"    {cs}")

# ── Write CSV ─────────────────────────────────────────────────────────────────
OUTPUT_CSV.parent.mkdir(parents=True, exist_ok=True)
result.to_csv(OUTPUT_CSV, index=False)
print(f"\n  Full results: {OUTPUT_CSV}")
print(f"{'='*60}\n")
