Spaces:

Neurazum
/

Vbai-DPA-2.3-and-Tbai-1.0-TR

Running

File size: 5,268 Bytes

0cb86dd
8fd258b
e2c4c7d
f2bace7
 
8fd258b
549beca
f2bace7
 
549beca
8c4b40b
549beca
 
f2bace7
 
 
 
12d89e8
 
 
f2bace7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
549beca
f2bace7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8fd258b
f2bace7
 
 
 
 
 
 
 
 
8fd258b
f2bace7
 
 
8fd258b
f2bace7
 
 
b50d41e
f2bace7
76d8cd8
 
 
 
 
c8a4d76
f2bace7
 
549beca
12d89e8
76d8cd8
549beca
 
f2bace7
 
 
549beca
 
f2bace7
 
549beca
f2bace7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8fd258b
f2bace7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8a4d76

import os
import time
import torch
import torch.nn as nn
import torch.nn.functional as F

from PIL import Image
from torchvision import transforms
from thop import profile
from transformers import T5ForConditionalGeneration, T5Tokenizer
from huggingface_hub import hf_hub_download, snapshot_download

CLASSIFICATION_MODEL_REPO = "Neurazum/Vbai-DPA-2.3"
CLASSIFICATION_MODEL_FILENAME_F = "Vbai-DPA 2.3f.pt"
CLASSIFICATION_MODEL_FILENAME_C = "Vbai-DPA 2.3c.pt"
CLASSIFICATION_MODEL_FILENAME_Q = "Vbai-DPA 2.3q.pt"

T5_MODEL_REPO = "Neurazum/Tbai-DPA-1.0"
T5_MODEL_SUBFOLDER = "Tbai-1.0-Od-300m-turkish-BETA"


class SimpleCNN(nn.Module):
    def __init__(self, model_type="f", num_classes=6):
        super(SimpleCNN, self).__init__()
        self.num_classes = num_classes

        if model_type == "f":
            self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
            self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
            self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
            self.fc1 = nn.Linear(64 * 28 * 28, 256)
        elif model_type == "c":
            self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
            self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
            self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
            self.fc1 = nn.Linear(128 * 28 * 28, 512)
        elif model_type == "q":
            self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
            self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
            self.conv3 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
            self.conv4 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
            self.fc1 = nn.Linear(512 * 14 * 14, 1024)

        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(self.fc1.out_features, num_classes)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        if hasattr(self, "conv4"):
            x = self.pool(self.relu(self.conv4(x)))
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

def load_classification_model(device, model_type="f", num_classes=6):
    if model_type == "f":
        filename = CLASSIFICATION_MODEL_FILENAME_F
    elif model_type == "c":
        filename = CLASSIFICATION_MODEL_FILENAME_C
    elif model_type == "q":
        filename = CLASSIFICATION_MODEL_FILENAME_Q
    else:
        raise ValueError(f"model_type hatalı: {model_type}")

    local_pt = hf_hub_download(
        repo_id=CLASSIFICATION_MODEL_REPO,
        filename=filename,
        use_auth_token=False
    )

    try:
        state_dict = torch.load(local_pt, map_location=device)
        model = SimpleCNN(model_type=model_type, num_classes=num_classes).to(device)
        model.load_state_dict(state_dict)
    except RuntimeError:
        model = torch.jit.load(local_pt, map_location=device)

    model.eval()
    return model


def load_t5_model(device):
    local_dir = snapshot_download(repo_id=T5_MODEL_REPO)
    model_dir = os.path.join(local_dir, T5_MODEL_SUBFOLDER)

    tokenizer = T5Tokenizer.from_pretrained(model_dir, local_files_only=True)
    model = T5ForConditionalGeneration.from_pretrained(model_dir, local_files_only=True).to(device)

    model.eval()
    return tokenizer, model



transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

def predict_image(model, image: Image.Image, device):
    img_tensor = transform(image).unsqueeze(0).to(device)
    with torch.no_grad():
        logits = model(img_tensor)
        probs = F.softmax(logits, dim=1)[0]
        conf, idx = torch.max(probs, dim=0)
    return idx.item(), conf.item() * 100, img_tensor, probs.cpu().numpy()

def generate_comment_turkce(tokenizer, model, sinif_adi: str, device, max_length=64):
    input_text = f"Sınıf: {sinif_adi}"
    inputs = tokenizer(
        input_text,
        return_tensors="pt",
        padding="longest",
        truncation=True,
        max_length=32
    ).to(device)

    out_ids = model.generate(
        **inputs,
        max_length=max_length,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        no_repeat_ngram_size=2,
        early_stopping=True
    )
    return tokenizer.decode(out_ids[0], skip_special_tokens=True)

def calculate_performance_metrics(model, device):
    model = model.to(device)
    test_input = torch.randn((1, 3, 224, 224)).to(device)
    flops, params = profile(model, inputs=(test_input,), verbose=False)
    start = time.time()
    _ = model(test_input)
    cpu_time = (time.time() - start) * 1000
    return {
        "size_pixels": 224,
        "speed_cpu_b1": cpu_time,
        "speed_cpu_b32": cpu_time / 10,
        "speed_v100_b1": cpu_time / 2,
        "params_million": params / 1e6,
        "flops_billion": flops / 1e9
    }