import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
from scipy.stats import pearsonr
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import os


script_path = os.path.abspath(__file__)
script_dir = os.path.dirname(script_path)
os.chdir(script_dir)


data = pd.read_csv('data.csv')


X = data.drop(columns=['OS.time']).values
y = data['OS.time'].values


print(np.isnan(X).sum(), np.isnan(y).sum())
print(np.isinf(X).sum(), np.isinf(y).sum())

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


class SimpleNN(nn.Module):
    def __init__(self, input_dim):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 100)
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(100, 100)
        self.dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(100, 1)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout1(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)
        return x


def weights_init(m):
    if isinstance(m, nn.Linear):
        nn.init.kaiming_uniform_(m.weight)
        nn.init.zeros_(m.bias)

model = SimpleNN(X_train.shape[1])
model.apply(weights_init)

criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)


best_test_loss = float('inf')
best_model_state = None


num_epochs = 10000
train_losses = []
test_losses = []
all_predictions = []
gradients = []
r2_scores = []

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    epoch_gradients = []
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        
        for param in model.parameters():
            epoch_gradients.append(param.grad.abs().mean().item())
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        train_loss += loss.item()
    
    train_loss /= len(train_loader)
    train_losses.append(train_loss)
    gradients.append(epoch_gradients)
    print(f'Epoch {epoch+1}, Train Loss: {train_loss}')
    
    model.eval()
    test_loss = 0.0
    predictions = []
    with torch.no_grad():
        for inputs, targets in test_loader:
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            test_loss += loss.item()
            predictions.append(outputs.numpy())
    
    test_loss /= len(test_loader)
    test_losses.append(test_loss)
    all_predictions.append(predictions)
    
    
    predictions_flat = np.concatenate(predictions).flatten()
    r2 = r2_score(y_test, predictions_flat)
    r2_scores.append(r2)
    print(f'Epoch {epoch+1}, R^2: {r2}')
    
    
    if test_loss < best_test_loss:
        best_test_loss = test_loss
        best_model_state = model.state_dict()
        torch.save(best_model_state, 'best_model.pth')
        print(f'Saved new best model at epoch {epoch+1} with test loss {test_loss}')





plt.figure(figsize=(10, 5))
plt.plot(range(1, num_epochs + 1), train_losses, label='Train Loss')
plt.plot(range(1, num_epochs + 1), test_losses, label='Test Loss')


window_size = 50
train_losses_ma = pd.Series(train_losses).rolling(window=window_size).mean()
test_losses_ma = pd.Series(test_losses).rolling(window=window_size).mean()

plt.plot(range(1, num_epochs + 1), train_losses_ma, label='Train Loss (MA)', linestyle='--')
plt.plot(range(1, num_epochs + 1), test_losses_ma, label='Test Loss (MA)', linestyle='--')

plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Train and Test Loss with Moving Average')
plt.legend()
plt.savefig('train_test_loss.png')
plt.close()


final_predictions = np.array(all_predictions[-1]).flatten()
actuals = y_test_tensor.numpy().flatten()


correlation, p_value = pearsonr(actuals, final_predictions)
print(f'Pearson Correlation: {correlation}')
print(f'P-value: {p_value}')


plt.figure(figsize=(10, 5))
plt.scatter(actuals, final_predictions, color='blue', label=f'Predictions vs Actuals (r={correlation:.2f}, p={p_value:.2g})')
plt.plot([min(actuals), max(actuals)], [min(actuals), max(actuals)], color='red', linestyle='--', label='Ideal Fit')
plt.xlabel('Actual OS.time')
plt.ylabel('Predicted OS.time')
plt.title('Predictions vs Actuals')
plt.legend()
plt.savefig('predictions_vs_actuals.png')
plt.close()


errors = final_predictions - actuals
plt.figure(figsize=(10, 5))
plt.hist(errors, bins=30, color='purple', alpha=0.7)
plt.xlabel('Prediction Error')
plt.ylabel('Frequency')
plt.title('Error Distribution')
plt.savefig('error_distribution.png')
plt.close()


actuals = y_test_tensor.numpy()
colors = cm.viridis(np.linspace(0, 1, num_epochs))

plt.figure(figsize=(10, 5))
plt.plot(actuals, label='Actual Values', color='b', marker='o', linestyle='-')

for i in range(0, num_epochs, max(1, num_epochs // 100)):
    predictions = np.array(all_predictions[i]).flatten()
    plt.plot(predictions, label=f'Epoch {i+1}', color=colors[i], linestyle='--')

plt.xlabel('Sample Index')
plt.ylabel('OS.time')
plt.title('Actual vs Predicted Values Over Time')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.savefig('actual_vs_predicted_over_time.png')
plt.close()


for i, layer in enumerate(model.children()):
    if isinstance(layer, nn.Linear):
        plt.figure(figsize=(10, 5))
        plt.hist(layer.weight.detach().numpy().flatten(), bins=30, alpha=0.6, color='blue')
        plt.xlabel(f'Layer {i+1} Weights')
        plt.ylabel('Frequency')
        plt.title(f'Weight Distribution of Layer {i+1}')
        plt.savefig(f'layer_{i+1}_weight_distribution.png')
        plt.close()


importances = np.abs(model.fc1.weight.detach().numpy()).sum(axis=0)
indices = np.argsort(importances)

plt.figure(figsize=(10, 5))
plt.barh(range(X_train.shape[1]), importances[indices], align='center')
plt.xlabel('Importance')
plt.ylabel('Feature Index')
plt.title('Feature Importances in the First Layer')
plt.savefig('feature_importances.png')
plt.close()


for i, layer in enumerate(model.children()):
    if isinstance(layer, nn.Linear):
        plt.figure(figsize=(10, 5))
        plt.imshow(layer.weight.detach().numpy(), aspect='auto', cmap='viridis')
        plt.colorbar()
        plt.title(f'Weight Heatmap of Layer {i+1}')
        plt.xlabel('Input Features')
        plt.ylabel('Neurons')
        plt.savefig(f'layer_{i+1}_weight_heatmap.png')
        plt.close()


plt.figure(figsize=(10, 5))
plt.plot(range(1, num_epochs + 1), r2_scores, label='R^2 Score')


r2_scores_ma = pd.Series(r2_scores).rolling(window=window_size).mean()
plt.plot(range(1, num_epochs + 1), r2_scores_ma, label='R^2 Score (MA)', linestyle='--')

plt.xlabel('Epoch')
plt.ylabel('R^2 Score')
plt.title('R^2 Score over Epochs')
plt.legend()
plt.savefig('r2_over_epochs.png')
plt.close()