"""
fix_oob_frequencies.py
Fixes 196 out-of-band frequency rows using local FCC ULS data.
Run from D:\ATT\ with geo_env:  python fix_oob_frequencies.py
"""

import pandas as pd
import os

BASE       = r'D:\ATT'
FCC_DIR    = r'D:\ATT\fcc_data'
INPUT_CSV  = os.path.join(BASE, 'ATT_Mobility_Network_Definition_v13.csv')
OUTPUT_CSV = os.path.join(BASE, 'ATT_Mobility_Network_Definition_v14.csv')

# ── Load network definition ───────────────────────────────────────────────────
print('Loading network definition...')
df = pd.read_csv(INPUT_CSV, low_memory=False)
oob_mask = df['freq_note'] == 'FREQ_OOB_NEEDS_FCC_LOOKUP'
print(f'  Total rows: {len(df)}')
print(f'  Out-of-band rows to fix: {oob_mask.sum()}')

oob_calls = set(df.loc[oob_mask, 'A_Call_Sign'].str.strip().str.upper().unique())
print(f'  Unique call signs to look up: {len(oob_calls)}')

# ── Helper: sniff a pipe-delimited dat file ───────────────────────────────────
def sniff_dat(path, nlines=5):
    lines = []
    with open(path, 'r', encoding='latin-1') as f:
        for _ in range(nlines):
            line = f.readline()
            if not line:
                break
            lines.append(line.rstrip('\n'))
    ncols = len(lines[0].split('|')) if lines else 0
    return ncols, lines

def read_dat(path, ncols):
    return pd.read_csv(
        path, sep='|', header=None,
        names=range(ncols), dtype=str,
        low_memory=False, on_bad_lines='skip',
        encoding='latin-1'
    )

# ── Load FR.dat ───────────────────────────────────────────────────────────────
fr_path = os.path.join(FCC_DIR, 'FR.dat')
print(f'\nInspecting FR.dat...')
ncols_fr, fr_samples = sniff_dat(fr_path)
print(f'  Columns: {ncols_fr}')
for i, s in enumerate(fr_samples):
    print(f'  row {i}: {s[:120]}')

# Find which column has the call sign by looking for W-prefix strings
fr_sample_row = fr_samples[0].split('|')
print('\n  Column index -> value:')
for i, v in enumerate(fr_sample_row):
    print(f'    [{i:2d}] {v}')

# Load full table and auto-detect call_sign and frequency columns
fr_full = read_dat(fr_path, ncols_fr)

# Find call_sign column: contains values matching FCC call sign pattern (W/K + letters/digits)
import re
cs_pattern = re.compile(r'^[WK][A-Z0-9]{3,9}$')
call_col = None
freq_col = None
status_col = None

for col in range(ncols_fr):
    col_vals = fr_full[col].dropna().str.strip()
    # call sign column: majority match W/K pattern
    if call_col is None:
        matches = col_vals.str.match(r'^[WK][A-Z0-9]{3,9}$', na=False).sum()
        if matches > len(fr_full) * 0.3:
            call_col = col
            print(f'\n  Auto-detected call_sign column: {col}  (sample: {col_vals.iloc[0] if len(col_vals) else "?"})')
    # frequency column: numeric, 4-6 digits with decimals, reasonable MW range
    if freq_col is None:
        num_vals = pd.to_numeric(col_vals, errors='coerce')
        in_range = ((num_vals >= 1000) & (num_vals <= 90000)).sum()
        if in_range > len(fr_full) * 0.2:
            freq_col = col
            print(f'  Auto-detected frequency column: {col}  (sample: {col_vals.iloc[0] if len(col_vals) else "?"})')

# Status column: look for column with mostly single uppercase letters A/L/C/T
for col in range(ncols_fr):
    col_vals = fr_full[col].dropna().str.strip()
    status_matches = col_vals.isin(['A','L','C','T','X']).sum()
    if status_matches > len(fr_full) * 0.3:
        status_col = col
        print(f'  Auto-detected status column: {col}  (sample: {col_vals.value_counts().head(3).to_dict()})')
        break

print(f'\n  Using: call_col={call_col}, freq_col={freq_col}, status_col={status_col}')

if call_col is None or freq_col is None:
    raise ValueError('Could not auto-detect call_sign or frequency columns in FR.dat. Check sample above.')

fr_work = fr_full[[call_col, freq_col] + ([status_col] if status_col else [])].copy()
fr_work.columns = ['call_sign', 'frequency_assigned'] + (['status'] if status_col else [])
fr_work['call_sign']          = fr_work['call_sign'].str.strip().str.upper()
fr_work['frequency_assigned'] = pd.to_numeric(fr_work['frequency_assigned'], errors='coerce')
if status_col:
    active_statuses = ['A', 'L']
    fr_active = fr_work[
        fr_work['status'].str.strip().str.upper().isin(active_statuses) &
        fr_work['call_sign'].isin(oob_calls) &
        fr_work['frequency_assigned'].notna()
    ].copy()
else:
    fr_active = fr_work[
        fr_work['call_sign'].isin(oob_calls) &
        fr_work['frequency_assigned'].notna()
    ].copy()

print(f'  Active FR records for OOB call signs: {len(fr_active)}')
if len(fr_active) == 0:
    print('  WARNING: No matching records. Sample of call signs in FR.dat:')
    print(' ', fr_work['call_sign'].dropna().unique()[:20])
    print('  Sample of OOB call signs we need:')
    print(' ', sorted(oob_calls)[:20])

# ── Load F2.dat if present ────────────────────────────────────────────────────
f2_path = os.path.join(FCC_DIR, 'F2.dat')
bw_lookup = {}
if os.path.exists(f2_path):
    print(f'\nInspecting F2.dat...')
    ncols_f2, f2_samples = sniff_dat(f2_path)
    print(f'  Columns: {ncols_f2}')
    f2_sample_row = f2_samples[0].split('|')
    print('  Column index -> value:')
    for i, v in enumerate(f2_sample_row):
        print(f'    [{i:2d}] {v}')

    f2_full = read_dat(f2_path, ncols_f2)

    # Auto-detect columns same way
    f2_call_col = f2_freq_col = f2_bw_col = f2_status_col = None
    for col in range(ncols_f2):
        col_vals = f2_full[col].dropna().str.strip()
        if f2_call_col is None:
            if col_vals.str.match(r'^[WK][A-Z0-9]{3,9}$', na=False).sum() > len(f2_full) * 0.3:
                f2_call_col = col
        if f2_freq_col is None:
            num = pd.to_numeric(col_vals, errors='coerce')
            if ((num >= 1000) & (num <= 90000)).sum() > len(f2_full) * 0.2:
                f2_freq_col = col
        if f2_bw_col is None and col != f2_freq_col:
            num = pd.to_numeric(col_vals, errors='coerce')
            # BW column: values mostly 5-200 (MHz)
            if ((num >= 1) & (num <= 500)).sum() > len(f2_full) * 0.2:
                f2_bw_col = col
        if f2_status_col is None:
            if col_vals.isin(['A','L','C','T','X']).sum() > len(f2_full) * 0.3:
                f2_status_col = col

    print(f'  Using: call={f2_call_col}, freq={f2_freq_col}, bw={f2_bw_col}, status={f2_status_col}')

    if f2_call_col and f2_freq_col and f2_bw_col:
        cols_to_use = [f2_call_col, f2_freq_col, f2_bw_col] + ([f2_status_col] if f2_status_col else [])
        f2_work = f2_full[cols_to_use].copy()
        f2_work.columns = ['call_sign','frequency_assigned','bandwidth'] + (['status'] if f2_status_col else [])
        f2_work['call_sign']          = f2_work['call_sign'].str.strip().str.upper()
        f2_work['frequency_assigned'] = pd.to_numeric(f2_work['frequency_assigned'], errors='coerce')
        f2_work['bandwidth']          = pd.to_numeric(f2_work['bandwidth'], errors='coerce')
        if f2_status_col:
            f2_active = f2_work[
                f2_work['status'].str.strip().str.upper().isin(['A','L']) &
                f2_work['call_sign'].isin(oob_calls) &
                f2_work['bandwidth'].notna()
            ]
        else:
            f2_active = f2_work[
                f2_work['call_sign'].isin(oob_calls) &
                f2_work['bandwidth'].notna()
            ]
        bw_lookup = (
            f2_active
            .groupby(['call_sign','frequency_assigned'])['bandwidth']
            .first().to_dict()
        )
        print(f'  BW records loaded: {len(f2_active)}')
    else:
        print('  Could not auto-detect F2 columns — will infer BW from band')

# ── BW fallback by band ───────────────────────────────────────────────────────
def infer_bw(freq):
    if   5925  <= freq <= 6425:  return 30
    elif 6425  < freq <= 6875:   return 40
    elif 10700 <= freq <= 11700: return 30
    elif 17700 <= freq <= 19700: return 28
    elif 21200 <= freq <= 23600: return 28
    return None

# ── Build call_sign -> best frequency ─────────────────────────────────────────
def best_freq(grp):
    in_6g = grp[(grp['frequency_assigned'] >= 5925) & (grp['frequency_assigned'] <= 6875)]
    return in_6g.iloc[0]['frequency_assigned'] if len(in_6g) else grp.iloc[0]['frequency_assigned']

freq_lookup = (
    fr_active.groupby('call_sign')
    .apply(best_freq, include_groups=False)
    .to_dict()
)
print(f'\nCall signs resolved: {len(freq_lookup)} / {len(oob_calls)}')

# ── Apply fixes ───────────────────────────────────────────────────────────────
fixed = not_found = 0
fix_log = []

for idx in df[oob_mask].index:
    call = str(df.at[idx, 'A_Call_Sign']).strip().upper()
    if call in freq_lookup:
        new_freq = freq_lookup[call]
        bw = bw_lookup.get((call, new_freq)) or infer_bw(new_freq)
        df.at[idx, 'fr_frequency_assigned_MHz'] = new_freq
        df.at[idx, 'bandwidth']  = int(bw) if bw is not None else ''
        df.at[idx, 'freq_note']  = 'FREQ_CORRECTED_FROM_FCC'
        fixed += 1
        fix_log.append((df.at[idx, 'path_num'], call, new_freq, bw))
    else:
        df.at[idx, 'freq_note'] = 'FREQ_OOB_CALLSIGN_NOT_IN_FR_TABLE'
        not_found += 1

print(f'\nResults:  corrected={fixed}  unresolved={not_found}')
if fix_log:
    print('\nSample corrections:')
    for p, c, f, b in fix_log[:20]:
        print(f'  path {p:>6}  {c:<12}  {f:>10.3f} MHz  bw={b}')

df.to_csv(OUTPUT_CSV, index=False)
print(f'\nSaved: {OUTPUT_CSV}')

if not_found:
    still = df[df['freq_note'] == 'FREQ_OOB_CALLSIGN_NOT_IN_FR_TABLE']
    print(f'\nUnresolved ({not_found}):')
    print(still[['path_num','A_Call_Sign','fr_frequency_assigned_MHz']].to_string(index=False))