import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures from sklearn.tree import DecisionTreeClassifier from sklearn.cluster import KMeans from sklearn.metrics import mean_squared_error, r2_score, classification_report, confusion_matrix import warnings # Suppress annoying pandas warnings for clean terminal output warnings.filterwarnings('ignore') username = "LJRW20" def load_ltspice_data(file_path): df = pd.read_csv( file_path, sep=r'\s+', comment='S', header=0, encoding='latin-1', on_bad_lines='skip' ) df = df.rename(columns={ 'V(n009)': 'Vout', 'V(c_measure)': 'C1_val', 'V(r_measure)': 'R_val', 'V(v_in)': 'Vin_val' }) if 'Vout' not in df.columns: if df.shape[1] == 3: df.columns = ['time', 'Vout', 'Lr_val'] elif df.shape[1] == 6: df.columns = ['time', 'C1_val', 'R_val', 'Vin_val', 'Vout', 'Lr_val'] else: raise ValueError(f"Unexpected column layout in {file_path}: {df.columns.tolist()}") return df # --------------------------------------------------------- # 1. DATA PREPROCESSING (Bulletproof Version) # --------------------------------------------------------- print(f"--- INIT PORTFOLIO AUTH: {username} ---") print("Loading and cleaning LTspice data... (This may take a moment for 100MB)") try: # Use a unified loader to handle both 6-column and 3-column LTspice exports df_raw = load_ltspice_data('bigdata.txt') # ---> DOUBLE CHECK THIS MATCHES YOUR 100MB FILENAME! print(f"Raw rows loaded into memory: {len(df_raw)}") df_raw = df_raw.dropna() print(f"Clean rows after dropping NaNs: {len(df_raw)}") if len(df_raw) == 0: raise ValueError("The dataframe is empty! LTspice data did not load correctly.") # Extract the STEADY STATE voltage AND the actual Capacitance for each run df_steady = df_raw.groupby('Lr_val').apply( lambda x: pd.Series({ 'Vout_Steady': x.tail(int(len(x) * 0.1))['Vout'].mean(), 'C1_Actual': x.tail(int(len(x) * 0.1))['C1_val'].mean() }) ).reset_index() X = (df_steady[['Lr_val']] * 1e6).values y_reg = df_steady['Vout_Steady'].values C1_colors = df_steady['C1_Actual'].values * 1e9 # Convert C1 to nF print(f"Successfully extracted {len(X)} steady-state data points!\n") except Exception as e: print(f"\nCRITICAL ERROR during data loading: {e}") print("Please double-check your filename and text file contents.") exit() # --------------------------------------------------------- # 2. REGRESSION TRAINING & METRICS # --------------------------------------------------------- poly = PolynomialFeatures(degree=2) X_poly = poly.fit_transform(X) reg_model = LinearRegression().fit(X_poly, y_reg) # Calculate Training Error y_train_pred_reg = reg_model.predict(X_poly) mse = mean_squared_error(y_reg, y_train_pred_reg) r2 = r2_score(y_reg, y_train_pred_reg) print(f"=== REGRESSION TRAINING PROCESS ({username}) ===") print("Equation: h_theta(x) = theta_0 + theta_1*x + theta_2*x^2") print(f"Theta 0 (Intercept): {reg_model.intercept_:.6f}") print(f"Theta 1 (Coef x): {reg_model.coef_[1]:.6f}") print(f"Theta 2 (Coef x^2): {reg_model.coef_[2]:.6f}") print("--- Error Metrics ---") print(f"Training MSE: {mse:.6f}") print(f"Training R-Squared: {r2:.6f}") print("=================================================\n") # Set up the 3-panel figure for regression/classification/clustering plots fig, axs = plt.subplots(1, 3, figsize=(18, 5)) # UPDATE: Scatter plot now uses C1_colors and a colormap! scatter_reg = axs[0].scatter(X, y_reg, c=C1_colors, cmap='coolwarm', edgecolor='k', alpha=0.8, label='Monte Carlo Data') cbar = fig.colorbar(scatter_reg, ax=axs[0]) cbar.set_label('Capacitance C1 (nF)') X_smooth = np.linspace(X.min(), X.max(), 100).reshape(-1, 1) axs[0].plot(X_smooth, reg_model.predict(poly.transform(X_smooth)), color='black', linestyle='--', linewidth=2, label='Poly Fit Trendline') axs[0].set_title('Regression: Predicting Output Voltage') axs[0].set_xlabel('Resonant Inductance (uH)') axs[0].set_ylabel('Output Voltage (V)') axs[0].legend() # --------------------------------------------------------- # 3. CLASSIFICATION TRAINING & METRICS # --------------------------------------------------------- y_class = np.where((y_reg >= 11.4) & (y_reg <= 12.6), 1, 0) clf_model = DecisionTreeClassifier(max_depth=3).fit(X, y_class) # Calculate Classification Metrics y_train_pred_cls = clf_model.predict(X) print(f"=== CLASSIFICATION TRAINING PROCESS ({username}) ===") print("Parameters: max_depth=3") print("--- Classification Report ---") print(classification_report(y_class, y_train_pred_cls, target_names=['Fail (0)', 'Pass (1)'])) print("--- Confusion Matrix ---") print(confusion_matrix(y_class, y_train_pred_cls)) print("=====================================================\n") colors = {1: 'green', 0: 'red'} point_colors = [colors[val] for val in y_class] axs[1].scatter(X, y_reg, c=point_colors, edgecolor='k', alpha=0.8) axs[1].axhspan(11.4, 12.6, color='green', alpha=0.2, label='12V Target Window') axs[1].set_title('Classification: Operational Boundary') axs[1].set_xlabel('Resonant Inductance (uH)') axs[1].legend() # --------------------------------------------------------- # 4. UNSUPERVISED LEARNING TRAINING & METRICS # --------------------------------------------------------- kmeans = KMeans(n_clusters=3, random_state=42, n_init=10).fit(df_steady[['Lr_val', 'Vout_Steady']]) clusters = kmeans.labels_ print(f"=== UNSUPERVISED TRAINING PROCESS ({username}) ===") print("Parameters: k=3, n_init=10") print("--- Convergence Metrics ---") print(f"Final Inertia (WCSS): {kmeans.inertia_:.2f}") print(f"Iterations to converge: {kmeans.n_iter_}") print("--- Cluster Centroids [Lr_val, Vout_Steady] ---") for i, center in enumerate(kmeans.cluster_centers_): print(f"Cluster {i}: [{center[0]:.2f}, {center[1]:.2f}]") print("=================================================\n") axs[2].scatter(X, y_reg, c=clusters, cmap='viridis', edgecolor='k', alpha=0.8) axs[2].set_title('Unsupervised: K-Means Operating Islands') axs[2].set_xlabel('Resonant Inductance (uH)') # --------------------------------------------------------- # 5. PORTFOLIO AUTHENTICATION # --------------------------------------------------------- for ax in axs: ax.text(0.5, 0.05, f'Auth: {username}', transform=ax.transAxes, fontsize=12, color='gray', alpha=0.7, ha='center', bbox=dict(facecolor='white', alpha=0.8, edgecolor='none', pad=2)) plt.tight_layout() plt.show(block=False) # --------------------------------------------------------- # 6. EXTRAPOLATION TEST (Comparing New Data) # --------------------------------------------------------- print("--- EXTRAPOLATION TEST PROCESS ---") # Using the SAME Draft5 file here just for demonstration, since you generated # a massive master file. If you have a separate extrapolation file, change this name! df_new_raw = load_ltspice_data('llc_new_raw.txt') df_new_raw = df_new_raw.dropna() df_new_steady = df_new_raw.groupby('Lr_val').apply( lambda x: x.tail(int(len(x) * 0.1))['Vout'].mean() ).reset_index(name='Actual_Vout') X_new = (df_new_steady[['Lr_val']] * 1e6).values y_new_actual = df_new_steady['Actual_Vout'].values X_new_poly = poly.transform(X_new) y_new_pred = reg_model.predict(X_new_poly) error_pct = np.abs((y_new_pred - y_new_actual) / y_new_actual) * 100 print(f"{'Inductor (uH)':<15} | {'Actual Vout (V)':<15} | {'Predicted Vout (V)':<20} | {'Error (%)':<10}") print("-" * 68) # Only print the first 20 to avoid blowing up the terminal if you have 300 points for i in range(min(20, len(X_new))): print(f"{X_new[i][0]:<15.2f} | {y_new_actual[i]:<15.2f} | {y_new_pred[i]:<20.2f} | {error_pct[i]:<10.2f}") # --------------------------------------------------------- # VISUALIZING THE COMPARISON # --------------------------------------------------------- plt.figure(figsize=(10, 6)) X_smooth_extended = np.linspace(min(X.min(), X_new.min()), max(X.max(), X_new.max()), 200).reshape(-1, 1) plt.plot(X_smooth_extended, reg_model.predict(poly.transform(X_smooth_extended)), color='blue', linestyle='--', linewidth=2, label='ML Prediction Curve (Extrapolated)') plt.scatter(X, y_reg, color='black', alpha=0.3, label='Training Data Cloud') plt.scatter(X_new, y_new_actual, color='green', marker='s', s=40, label='Actual Data') plt.scatter(X_new, y_new_pred, color='red', marker='x', s=40, label='Predicted Data') plt.title('Extrapolation Test: ML Model vs Hardware Data Variance', fontsize=14) plt.xlabel('Resonant Inductance (uH)') plt.ylabel('Output Voltage (V)') plt.legend() plt.grid(True, alpha=0.3) plt.text(0.5, 0.02, f'Auth: {username}', transform=plt.gca().transAxes, fontsize=12, color='gray', alpha=0.7, ha='center', bbox=dict(facecolor='white', alpha=0.8, edgecolor='none', pad=2)) plt.tight_layout() plt.show()