import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeClassifier
from sklearn.cluster import KMeans
from sklearn.metrics import mean_squared_error, r2_score, classification_report, confusion_matrix
import warnings

# Suppress annoying pandas warnings for clean terminal output
warnings.filterwarnings('ignore')

username = "LJRW20"

def load_ltspice_data(file_path):
    df = pd.read_csv(
        file_path,
        sep=r'\s+',
        comment='S',
        header=0,
        encoding='latin-1',
        on_bad_lines='skip'
    )

    df = df.rename(columns={
        'V(n009)': 'Vout',
        'V(c_measure)': 'C1_val',
        'V(r_measure)': 'R_val',
        'V(v_in)': 'Vin_val'
    })

    if 'Vout' not in df.columns:
        if df.shape[1] == 3:
            df.columns = ['time', 'Vout', 'Lr_val']
        elif df.shape[1] == 6:
            df.columns = ['time', 'C1_val', 'R_val', 'Vin_val', 'Vout', 'Lr_val']
        else:
            raise ValueError(f"Unexpected column layout in {file_path}: {df.columns.tolist()}")

    return df

# ---------------------------------------------------------
# 1. DATA PREPROCESSING (Bulletproof Version)
# ---------------------------------------------------------
print(f"--- INIT PORTFOLIO AUTH: {username} ---")
print("Loading and cleaning LTspice data... (This may take a moment for 100MB)")

try:
    # Use a unified loader to handle both 6-column and 3-column LTspice exports
    df_raw = load_ltspice_data('bigdata.txt')  # ---> DOUBLE CHECK THIS MATCHES YOUR 100MB FILENAME!
    
    print(f"Raw rows loaded into memory: {len(df_raw)}")
    df_raw = df_raw.dropna()
    print(f"Clean rows after dropping NaNs: {len(df_raw)}")

    if len(df_raw) == 0:
        raise ValueError("The dataframe is empty! LTspice data did not load correctly.")

    # Extract the STEADY STATE voltage AND the actual Capacitance for each run
    df_steady = df_raw.groupby('Lr_val').apply(
        lambda x: pd.Series({
            'Vout_Steady': x.tail(int(len(x) * 0.1))['Vout'].mean(),
            'C1_Actual': x.tail(int(len(x) * 0.1))['C1_val'].mean()
        })
    ).reset_index()

    X = (df_steady[['Lr_val']] * 1e6).values 
    y_reg = df_steady['Vout_Steady'].values
    C1_colors = df_steady['C1_Actual'].values * 1e9  # Convert C1 to nF

    print(f"Successfully extracted {len(X)} steady-state data points!\n")

except Exception as e:
    print(f"\nCRITICAL ERROR during data loading: {e}")
    print("Please double-check your filename and text file contents.")
    exit()
# ---------------------------------------------------------
# 2. REGRESSION TRAINING & METRICS
# ---------------------------------------------------------
poly = PolynomialFeatures(degree=2)
X_poly = poly.fit_transform(X)
reg_model = LinearRegression().fit(X_poly, y_reg)

# Calculate Training Error
y_train_pred_reg = reg_model.predict(X_poly)
mse = mean_squared_error(y_reg, y_train_pred_reg)
r2 = r2_score(y_reg, y_train_pred_reg)

print(f"=== REGRESSION TRAINING PROCESS ({username}) ===")
print("Equation: h_theta(x) = theta_0 + theta_1*x + theta_2*x^2")
print(f"Theta 0 (Intercept): {reg_model.intercept_:.6f}")
print(f"Theta 1 (Coef x):    {reg_model.coef_[1]:.6f}")
print(f"Theta 2 (Coef x^2):  {reg_model.coef_[2]:.6f}")
print("--- Error Metrics ---")
print(f"Training MSE:        {mse:.6f}")
print(f"Training R-Squared:  {r2:.6f}")
print("=================================================\n")

# Set up the 3-panel figure for regression/classification/clustering plots
fig, axs = plt.subplots(1, 3, figsize=(18, 5))

# UPDATE: Scatter plot now uses C1_colors and a colormap!
scatter_reg = axs[0].scatter(X, y_reg, c=C1_colors, cmap='coolwarm', edgecolor='k', alpha=0.8, label='Monte Carlo Data')
cbar = fig.colorbar(scatter_reg, ax=axs[0])
cbar.set_label('Capacitance C1 (nF)')

X_smooth = np.linspace(X.min(), X.max(), 100).reshape(-1, 1)
axs[0].plot(X_smooth, reg_model.predict(poly.transform(X_smooth)), color='black', linestyle='--', linewidth=2, label='Poly Fit Trendline')
axs[0].set_title('Regression: Predicting Output Voltage')
axs[0].set_xlabel('Resonant Inductance (uH)')
axs[0].set_ylabel('Output Voltage (V)')
axs[0].legend()

# ---------------------------------------------------------
# 3. CLASSIFICATION TRAINING & METRICS
# ---------------------------------------------------------
y_class = np.where((y_reg >= 11.4) & (y_reg <= 12.6), 1, 0)
clf_model = DecisionTreeClassifier(max_depth=3).fit(X, y_class)

# Calculate Classification Metrics
y_train_pred_cls = clf_model.predict(X)

print(f"=== CLASSIFICATION TRAINING PROCESS ({username}) ===")
print("Parameters: max_depth=3")
print("--- Classification Report ---")
print(classification_report(y_class, y_train_pred_cls, target_names=['Fail (0)', 'Pass (1)']))
print("--- Confusion Matrix ---")
print(confusion_matrix(y_class, y_train_pred_cls))
print("=====================================================\n")

colors = {1: 'green', 0: 'red'}
point_colors = [colors[val] for val in y_class]
axs[1].scatter(X, y_reg, c=point_colors, edgecolor='k', alpha=0.8)
axs[1].axhspan(11.4, 12.6, color='green', alpha=0.2, label='12V Target Window')
axs[1].set_title('Classification: Operational Boundary')
axs[1].set_xlabel('Resonant Inductance (uH)')
axs[1].legend()

# ---------------------------------------------------------
# 4. UNSUPERVISED LEARNING TRAINING & METRICS
# ---------------------------------------------------------
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10).fit(df_steady[['Lr_val', 'Vout_Steady']])
clusters = kmeans.labels_

print(f"=== UNSUPERVISED TRAINING PROCESS ({username}) ===")
print("Parameters: k=3, n_init=10")
print("--- Convergence Metrics ---")
print(f"Final Inertia (WCSS): {kmeans.inertia_:.2f}")
print(f"Iterations to converge: {kmeans.n_iter_}")
print("--- Cluster Centroids [Lr_val, Vout_Steady] ---")
for i, center in enumerate(kmeans.cluster_centers_):
    print(f"Cluster {i}: [{center[0]:.2f}, {center[1]:.2f}]")
print("=================================================\n")

axs[2].scatter(X, y_reg, c=clusters, cmap='viridis', edgecolor='k', alpha=0.8)
axs[2].set_title('Unsupervised: K-Means Operating Islands')
axs[2].set_xlabel('Resonant Inductance (uH)')

# ---------------------------------------------------------
# 5. PORTFOLIO AUTHENTICATION
# ---------------------------------------------------------
for ax in axs:
    ax.text(0.5, 0.05, f'Auth: {username}', transform=ax.transAxes, 
            fontsize=12, color='gray', alpha=0.7, ha='center',
            bbox=dict(facecolor='white', alpha=0.8, edgecolor='none', pad=2))

plt.tight_layout()
plt.show(block=False) 

# ---------------------------------------------------------
# 6. EXTRAPOLATION TEST (Comparing New Data)
# ---------------------------------------------------------
print("--- EXTRAPOLATION TEST PROCESS ---")
# Using the SAME Draft5 file here just for demonstration, since you generated 
# a massive master file. If you have a separate extrapolation file, change this name!
df_new_raw = load_ltspice_data('llc_new_raw.txt')
df_new_raw = df_new_raw.dropna()

df_new_steady = df_new_raw.groupby('Lr_val').apply(
    lambda x: x.tail(int(len(x) * 0.1))['Vout'].mean()
).reset_index(name='Actual_Vout')

X_new = (df_new_steady[['Lr_val']] * 1e6).values 
y_new_actual = df_new_steady['Actual_Vout'].values

X_new_poly = poly.transform(X_new) 
y_new_pred = reg_model.predict(X_new_poly)

error_pct = np.abs((y_new_pred - y_new_actual) / y_new_actual) * 100

print(f"{'Inductor (uH)':<15} | {'Actual Vout (V)':<15} | {'Predicted Vout (V)':<20} | {'Error (%)':<10}")
print("-" * 68)
# Only print the first 20 to avoid blowing up the terminal if you have 300 points
for i in range(min(20, len(X_new))):
    print(f"{X_new[i][0]:<15.2f} | {y_new_actual[i]:<15.2f} | {y_new_pred[i]:<20.2f} | {error_pct[i]:<10.2f}")

# ---------------------------------------------------------
# VISUALIZING THE COMPARISON
# ---------------------------------------------------------
plt.figure(figsize=(10, 6))

X_smooth_extended = np.linspace(min(X.min(), X_new.min()), max(X.max(), X_new.max()), 200).reshape(-1, 1)
plt.plot(X_smooth_extended, reg_model.predict(poly.transform(X_smooth_extended)), 
         color='blue', linestyle='--', linewidth=2, label='ML Prediction Curve (Extrapolated)')

plt.scatter(X, y_reg, color='black', alpha=0.3, label='Training Data Cloud')
plt.scatter(X_new, y_new_actual, color='green', marker='s', s=40, label='Actual Data')
plt.scatter(X_new, y_new_pred, color='red', marker='x', s=40, label='Predicted Data')

plt.title('Extrapolation Test: ML Model vs Hardware Data Variance', fontsize=14)
plt.xlabel('Resonant Inductance (uH)')
plt.ylabel('Output Voltage (V)')
plt.legend()
plt.grid(True, alpha=0.3)

plt.text(0.5, 0.02, f'Auth: {username}', transform=plt.gca().transAxes, 
         fontsize=12, color='gray', alpha=0.7, ha='center',
         bbox=dict(facecolor='white', alpha=0.8, edgecolor='none', pad=2))

plt.tight_layout()
plt.show()