File size: 5,268 Bytes
0cb86dd
8fd258b
e2c4c7d
f2bace7
 
8fd258b
549beca
f2bace7
 
549beca
8c4b40b
549beca
 
f2bace7
 
 
 
12d89e8
 
 
f2bace7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
549beca
f2bace7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8fd258b
f2bace7
 
 
 
 
 
 
 
 
8fd258b
f2bace7
 
 
8fd258b
f2bace7
 
 
b50d41e
f2bace7
76d8cd8
 
 
 
 
c8a4d76
f2bace7
 
549beca
12d89e8
76d8cd8
549beca
 
f2bace7
 
 
549beca
 
f2bace7
 
549beca
f2bace7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8fd258b
f2bace7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8a4d76
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import os
import time
import torch
import torch.nn as nn
import torch.nn.functional as F

from PIL import Image
from torchvision import transforms
from thop import profile
from transformers import T5ForConditionalGeneration, T5Tokenizer
from huggingface_hub import hf_hub_download, snapshot_download

CLASSIFICATION_MODEL_REPO = "Neurazum/Vbai-DPA-2.3"
CLASSIFICATION_MODEL_FILENAME_F = "Vbai-DPA 2.3f.pt"
CLASSIFICATION_MODEL_FILENAME_C = "Vbai-DPA 2.3c.pt"
CLASSIFICATION_MODEL_FILENAME_Q = "Vbai-DPA 2.3q.pt"

T5_MODEL_REPO = "Neurazum/Tbai-DPA-1.0"
T5_MODEL_SUBFOLDER = "Tbai-1.0-Od-300m-turkish-BETA"


class SimpleCNN(nn.Module):
    def __init__(self, model_type="f", num_classes=6):
        super(SimpleCNN, self).__init__()
        self.num_classes = num_classes

        if model_type == "f":
            self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
            self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
            self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
            self.fc1 = nn.Linear(64 * 28 * 28, 256)
        elif model_type == "c":
            self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
            self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
            self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
            self.fc1 = nn.Linear(128 * 28 * 28, 512)
        elif model_type == "q":
            self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
            self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
            self.conv3 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
            self.conv4 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
            self.fc1 = nn.Linear(512 * 14 * 14, 1024)

        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(self.fc1.out_features, num_classes)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        if hasattr(self, "conv4"):
            x = self.pool(self.relu(self.conv4(x)))
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

def load_classification_model(device, model_type="f", num_classes=6):
    if model_type == "f":
        filename = CLASSIFICATION_MODEL_FILENAME_F
    elif model_type == "c":
        filename = CLASSIFICATION_MODEL_FILENAME_C
    elif model_type == "q":
        filename = CLASSIFICATION_MODEL_FILENAME_Q
    else:
        raise ValueError(f"model_type hatalı: {model_type}")

    local_pt = hf_hub_download(
        repo_id=CLASSIFICATION_MODEL_REPO,
        filename=filename,
        use_auth_token=False
    )

    try:
        state_dict = torch.load(local_pt, map_location=device)
        model = SimpleCNN(model_type=model_type, num_classes=num_classes).to(device)
        model.load_state_dict(state_dict)
    except RuntimeError:
        model = torch.jit.load(local_pt, map_location=device)

    model.eval()
    return model


def load_t5_model(device):
    local_dir = snapshot_download(repo_id=T5_MODEL_REPO)
    model_dir = os.path.join(local_dir, T5_MODEL_SUBFOLDER)

    tokenizer = T5Tokenizer.from_pretrained(model_dir, local_files_only=True)
    model = T5ForConditionalGeneration.from_pretrained(model_dir, local_files_only=True).to(device)

    model.eval()
    return tokenizer, model



transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

def predict_image(model, image: Image.Image, device):
    img_tensor = transform(image).unsqueeze(0).to(device)
    with torch.no_grad():
        logits = model(img_tensor)
        probs = F.softmax(logits, dim=1)[0]
        conf, idx = torch.max(probs, dim=0)
    return idx.item(), conf.item() * 100, img_tensor, probs.cpu().numpy()

def generate_comment_turkce(tokenizer, model, sinif_adi: str, device, max_length=64):
    input_text = f"Sınıf: {sinif_adi}"
    inputs = tokenizer(
        input_text,
        return_tensors="pt",
        padding="longest",
        truncation=True,
        max_length=32
    ).to(device)

    out_ids = model.generate(
        **inputs,
        max_length=max_length,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        no_repeat_ngram_size=2,
        early_stopping=True
    )
    return tokenizer.decode(out_ids[0], skip_special_tokens=True)

def calculate_performance_metrics(model, device):
    model = model.to(device)
    test_input = torch.randn((1, 3, 224, 224)).to(device)
    flops, params = profile(model, inputs=(test_input,), verbose=False)
    start = time.time()
    _ = model(test_input)
    cpu_time = (time.time() - start) * 1000
    return {
        "size_pixels": 224,
        "speed_cpu_b1": cpu_time,
        "speed_cpu_b32": cpu_time / 10,
        "speed_v100_b1": cpu_time / 2,
        "params_million": params / 1e6,
        "flops_billion": flops / 1e9
    }