import os
import time
import torch
import torch.nn as nn
import torch.nn.functional as F

from PIL import Image
from torchvision import transforms
from thop import profile
from transformers import T5ForConditionalGeneration, T5Tokenizer
from huggingface_hub import hf_hub_download, snapshot_download

CLASSIFICATION_MODEL_REPO = "Neurazum/Vbai-DPA-2.3"
CLASSIFICATION_MODEL_FILENAME_F = "Vbai-DPA 2.3f.pt"
CLASSIFICATION_MODEL_FILENAME_C = "Vbai-DPA 2.3c.pt"
CLASSIFICATION_MODEL_FILENAME_Q = "Vbai-DPA 2.3q.pt"

T5_MODEL_REPO = "Neurazum/Tbai-DPA-1.0"
T5_MODEL_SUBFOLDER = "Tbai-1.0-Od-300m-turkish-BETA"


class SimpleCNN(nn.Module):
    def __init__(self, model_type="f", num_classes=6):
        super(SimpleCNN, self).__init__()
        self.num_classes = num_classes

        if model_type == "f":
            self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
            self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
            self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
            self.fc1 = nn.Linear(64 * 28 * 28, 256)
        elif model_type == "c":
            self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
            self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
            self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
            self.fc1 = nn.Linear(128 * 28 * 28, 512)
        elif model_type == "q":
            self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
            self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
            self.conv3 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
            self.conv4 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
            self.fc1 = nn.Linear(512 * 14 * 14, 1024)

        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(self.fc1.out_features, num_classes)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        if hasattr(self, "conv4"):
            x = self.pool(self.relu(self.conv4(x)))
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

def load_classification_model(device, model_type="f", num_classes=6):
    if model_type == "f":
        filename = CLASSIFICATION_MODEL_FILENAME_F
    elif model_type == "c":
        filename = CLASSIFICATION_MODEL_FILENAME_C
    elif model_type == "q":
        filename = CLASSIFICATION_MODEL_FILENAME_Q
    else:
        raise ValueError(f"model_type hatalı: {model_type}")

    local_pt = hf_hub_download(
        repo_id=CLASSIFICATION_MODEL_REPO,
        filename=filename,
        use_auth_token=False
    )

    try:
        state_dict = torch.load(local_pt, map_location=device)
        model = SimpleCNN(model_type=model_type, num_classes=num_classes).to(device)
        model.load_state_dict(state_dict)
    except RuntimeError:
        model = torch.jit.load(local_pt, map_location=device)

    model.eval()
    return model


def load_t5_model(device):
    local_dir = snapshot_download(repo_id=T5_MODEL_REPO)
    model_dir = os.path.join(local_dir, T5_MODEL_SUBFOLDER)

    tokenizer = T5Tokenizer.from_pretrained(model_dir, local_files_only=True)
    model = T5ForConditionalGeneration.from_pretrained(model_dir, local_files_only=True).to(device)

    model.eval()
    return tokenizer, model


transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

def predict_image(model, image: Image.Image, device):
    img_tensor = transform(image).unsqueeze(0).to(device)
    with torch.no_grad():
        logits = model(img_tensor)
        probs = F.softmax(logits, dim=1)[0]
        conf, idx = torch.max(probs, dim=0)
    return idx.item(), conf.item() * 100, img_tensor, probs.cpu().numpy()

def generate_comment_turkce(tokenizer, model, sinif_adi: str, device, max_length=64):
    input_text = f"Sınıf: {sinif_adi}"
    inputs = tokenizer(
        input_text,
        return_tensors="pt",
        padding="longest",
        truncation=True,
        max_length=32
    ).to(device)

    out_ids = model.generate(
        **inputs,
        max_length=max_length,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        no_repeat_ngram_size=2,
        early_stopping=True
    )
    return tokenizer.decode(out_ids[0], skip_special_tokens=True)

def calculate_performance_metrics(model, device):
    model = model.to(device)
    test_input = torch.randn((1, 3, 224, 224)).to(device)
    flops, params = profile(model, inputs=(test_input,), verbose=False)
    start = time.time()
    _ = model(test_input)
    cpu_time = (time.time() - start) * 1000
    return {
        "size_pixels": 224,
        "speed_cpu_b1": cpu_time,
        "speed_cpu_b32": cpu_time / 10,
        "speed_v100_b1": cpu_time / 2,
        "params_million": params / 1e6,
        "flops_billion": flops / 1e9
    }