In [2]:
import kagglehub
path = kagglehub.dataset_download("birdy654/cifake-real-and-ai-generated-synthetic-images")
print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/cifake-real-and-ai-generated-synthetic-images


In [3]:
path

'/kaggle/input/cifake-real-and-ai-generated-synthetic-images'

In [4]:
ls '/kaggle/input/cifake-real-and-ai-generated-synthetic-images'

[0m[01;34mtest[0m/ [01;34mtrain[0m/


In [None]:
import os
import glob

data_dir = str(path)

training_dir = os.path.join(data_dir,"train")
if not os.path.isdir(training_dir):
 os.mkdir(training_dir)

dog_training_dir = os.path.join(training_dir,"REAL")
if not os.path.isdir(dog_training_dir):
 os.mkdir(dog_training_dir)


cat_training_dir = os.path.join(training_dir,"FAKE")
if not os.path.isdir(cat_training_dir):
 os.mkdir(cat_training_dir)


validation_dir = os.path.join(data_dir,"test")
if not os.path.isdir(validation_dir):
 os.mkdir(validation_dir)

dog_validation_dir = os.path.join(validation_dir,"REAL")
if not os.path.isdir(dog_validation_dir):
 os.mkdir(dog_validation_dir)


cat_validation_dir = os.path.join(validation_dir,"FAKE")
if not os.path.isdir(cat_validation_dir):
 os.mkdir(cat_validation_dir)

In [6]:
import shutil

split_size = 0.80
cat_imgs_size = len(glob.glob("/content/data/train/FAKE*"))
dog_imgs_size = len(glob.glob("/content/data/train/REAL*"))

for i,img in enumerate(glob.glob("/content/data/train/FAKE*")):
 if i < (cat_imgs_size * split_size):
 shutil.move(img,cat_training_dir)
 else:
 shutil.move(img,cat_validation_dir)

for i,img in enumerate(glob.glob("/content/data/train/REAL*")):
 if i < (dog_imgs_size * split_size):
 shutil.move(img,dog_training_dir)
 else:
 shutil.move(img,dog_validation_dir)

In [None]:
import torch
import torchvision
from torchvision import datasets, transforms

traindir = path+"/train"
testdir = path+"/test"

train_transforms = transforms.Compose([transforms.Resize((224,224)),
 transforms.ToTensor(), 
 torchvision.transforms.Normalize(
 mean=[0.485, 0.456, 0.406],
 std=[0.229, 0.224, 0.225],
 ),
 ])
test_transforms = transforms.Compose([transforms.Resize((224,224)),
 transforms.ToTensor(),
 torchvision.transforms.Normalize(
 mean=[0.485, 0.456, 0.406],
 std=[0.229, 0.224, 0.225],
 ),
 ])

train_data = datasets.ImageFolder(traindir,transform=train_transforms)
test_data = datasets.ImageFolder(testdir,transform=test_transforms)

trainloader = torch.utils.data.DataLoader(train_data, shuffle = True, batch_size=16)
testloader = torch.utils.data.DataLoader(test_data, shuffle = True, batch_size=16)


In [None]:
def make_train_step(model, optimizer, loss_fn):
 def train_step(x,y):
 yhat = model(x)
 model.train()
 loss = loss_fn(yhat,y)

 loss.backward()
 optimizer.step()
 optimizer.zero_grad()
 #optimizer.cleargrads()

 return loss
 return train_step

In [None]:
from torchvision import datasets, models, transforms
import torch.nn as nn

device = "cuda" if torch.cuda.is_available() else "cpu"
model = models.resnet18(pretrained=True)

for params in model.parameters():
 params.requires_grad_ = False

nr_filters = model.fc.in_features 
model.fc = nn.Linear(nr_filters, 1)

model = model.to(device)

In [None]:
from torch.nn.modules.loss import BCEWithLogitsLoss
from torch.optim import lr_scheduler

loss_fn = BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.fc.parameters()) 

train_step = make_train_step(model, optimizer, loss_fn)

In [None]:
%%capture
!pip install tqdm

from tqdm import tqdm
import torch

losses = []
val_losses = []

epoch_train_losses = []
epoch_test_losses = []

n_epochs = 10
early_stopping_tolerance = 3
early_stopping_threshold = 0.03
early_stopping_counter = 0 

best_loss = float("inf") 

for epoch in range(n_epochs):
 optimizer.zero_grad()

 epoch_loss = 0
 model.train() 

 for i, data in tqdm(enumerate(trainloader), total=len(trainloader)):
 x_batch, y_batch = data
 x_batch = x_batch.to(device)
 y_batch = y_batch.unsqueeze(1).float().to(device)

 loss = train_step(x_batch, y_batch)
 epoch_loss += loss / len(trainloader)
 losses.append(loss)

 epoch_train_losses.append(epoch_loss)
 print(f"\nEpoch: {epoch+1}, train loss: {epoch_loss:.4f}")

 model.eval()
 with torch.no_grad():
 cum_loss = 0
 for x_batch, y_batch in testloader:
 x_batch = x_batch.to(device)
 y_batch = y_batch.unsqueeze(1).float().to(device)

 yhat = model(x_batch)
 val_loss = loss_fn(yhat, y_batch)
 cum_loss += val_loss.item() / len(testloader)
 val_losses.append(val_loss.item())

 epoch_test_losses.append(cum_loss)
 print(f"Epoch: {epoch+1}, val loss: {cum_loss:.4f}")

 if cum_loss < best_loss:
 best_loss = cum_loss
 best_model_wts = model.state_dict()
 early_stopping_counter = 0
 else:
 early_stopping_counter += 1

 if early_stopping_counter == early_stopping_tolerance or best_loss <= early_stopping_threshold:
 print("\nTerminating: early stopping")
 break

model.load_state_dict(best_model_wts)


KeyboardInterrupt: 

In [13]:
!nvidia-smi

Fri Apr 4 13:00:51 2025 
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.03 Driver Version: 560.35.03 CUDA Version: 12.6 |
|-----------------------------------------+------------------------+----------------------+
| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
| 0 Tesla P100-PCIE-16GB Off | 00000000:00:04.0 Off | 0 |
| N/A 36C P0 32W / 250W | 929MiB / 16384MiB | 0% Default |
| | | N/A |
+-----------------------------------------+------------------------+----------------------+
 
+-----------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
+-----------------------------------------------------------------------------------------+


In [None]:

!pip install tqdm

from tqdm import tqdm
import torch

losses = []
val_losses = []

epoch_train_losses = []
epoch_test_losses = []

n_epochs = 10
early_stopping_tolerance = 3
early_stopping_threshold = 0.03
early_stopping_counter = 0

best_loss = float("inf")

for epoch in range(n_epochs):
 optimizer.zero_grad() 

 epoch_loss = 0
 model.train()

 for i, data in tqdm(enumerate(trainloader), total=len(trainloader)):
 x_batch, y_batch = data
 x_batch = x_batch.to(device)
 y_batch = y_batch.unsqueeze(1).float().to(device)

 loss = train_step(x_batch, y_batch) 
 loss_value = loss.item() 

 epoch_loss += loss_value / len(trainloader)
 losses.append(loss_value) 
 epoch_train_losses.append(epoch_loss)
 print(f"\nEpoch: {epoch+1}, train loss: {epoch_loss:.4f}")

 model.eval()
 with torch.no_grad():
 cum_loss = 0
 for x_batch, y_batch in testloader:
 x_batch = x_batch.to(device)
 y_batch = y_batch.unsqueeze(1).float().to(device)

 yhat = model(x_batch)
 val_loss = loss_fn(yhat, y_batch)
 val_loss_value = val_loss.item() 
 cum_loss += val_loss_value / len(testloader)
 val_losses.append(val_loss_value) 

 epoch_test_losses.append(cum_loss)
 print(f"Epoch: {epoch+1}, val loss: {cum_loss:.4f}")

 if cum_loss < best_loss:
 best_loss = cum_loss
 best_model_wts = model.state_dict()
 early_stopping_counter = 0
 else:
 early_stopping_counter += 1

 if early_stopping_counter == early_stopping_tolerance or best_loss <= early_stopping_threshold:
 print("\nTerminating: early stopping")
 break

model.load_state_dict(best_model_wts)




100%|██████████| 6250/6250 [07:21<00:00, 14.15it/s]



Epoch: 1, train loss: 0.3295
Epoch: 1, val loss: 0.2714


100%|██████████| 6250/6250 [07:49<00:00, 13.32it/s]



Epoch: 2, train loss: 0.3302
Epoch: 2, val loss: 0.2683


100%|██████████| 6250/6250 [06:44<00:00, 15.47it/s]



Epoch: 3, train loss: 0.3320
Epoch: 3, val loss: 0.2689


100%|██████████| 6250/6250 [06:30<00:00, 15.99it/s]



Epoch: 4, train loss: 0.3316
Epoch: 4, val loss: 0.2745


100%|██████████| 6250/6250 [06:30<00:00, 16.01it/s]



Epoch: 5, train loss: 0.3331
Epoch: 5, val loss: 0.2716

Terminating: early stopping




In [19]:

torch.save(model.state_dict(), "my_model.pth")


In [None]:
from safetensors.torch import save_file

save_file(model.state_dict(), "my_model.safetensors")

import h5py

state_dict = model.state_dict()

with h5py.File("my_model.h5", "w") as f:
 for key, tensor in state_dict.items():
 f.create_dataset(key, data=tensor.cpu().numpy())


In [23]:
#inference
import os
import torch
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd

class InferenceDataset(Dataset):
 def __init__(self, folder, transform):
 self.paths = [os.path.join(folder, f) for f in os.listdir(folder)
 if f.lower().endswith(("png", "jpg", "jpeg"))]
 self.transform = transform

 def __len__(self):
 return len(self.paths)

 def __getitem__(self, idx):
 img = Image.open(self.paths[idx]).convert("RGB")
 return self.transform(img), self.paths[idx]

def run_inference(image_folder, output_csv="predictions.csv"):
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

 model = models.resnet18(pretrained=True)
 for p in model.parameters():
 p.requires_grad = False
 model.fc = torch.nn.Linear(model.fc.in_features, 1)
 model = model.to(device)
 model.eval()

 transform = transforms.Compose([
 transforms.Resize((224, 224)),
 transforms.ToTensor(),
 transforms.Normalize([0.485, 0.456, 0.406],
 [0.229, 0.224, 0.225])
 ])

 dataset = InferenceDataset(image_folder, transform)
 loader = DataLoader(dataset, batch_size=1, shuffle=False)

 results = []
 with torch.no_grad():
 for img, path in loader:
 img = img.to(device)
 pred = torch.sigmoid(model(img)).item()
 label = "REAL" if pred >= 0.5 else "FAKE"
 results.append({"image_path": path[0], "prediction": label, "score": pred})

 pd.DataFrame(results).to_csv(output_csv, index=False)


In [24]:
final_path = "/kaggle/input/finald/Test datasets/Test_dataset_2"
run_inference(final_path, "outputdata1.csv")
run_inference(final_path, "outputdata2.csv")