{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/user/miniconda3/envs/dwl/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] }, { "data": { "text/html": [ "
\n", "shape: (5, 2)
iddescription
strstr
"02d892""a purple forest at dusk"
"0dcd2e""gray wool coat with a faux fur…
"1e9ac1""a lighthouse overlooking the o…
"2b25db""burgundy corduroy pants with p…
"4e6a54""orange corduroy overalls"
" ], "text/plain": [ "shape: (5, 2)\n", "┌────────┬─────────────────────────────────┐\n", "│ id ┆ description │\n", "│ --- ┆ --- │\n", "│ str ┆ str │\n", "╞════════╪═════════════════════════════════╡\n", "│ 02d892 ┆ a purple forest at dusk │\n", "│ 0dcd2e ┆ gray wool coat with a faux fur… │\n", "│ 1e9ac1 ┆ a lighthouse overlooking the o… │\n", "│ 2b25db ┆ burgundy corduroy pants with p… │\n", "│ 4e6a54 ┆ orange corduroy overalls │\n", "└────────┴─────────────────────────────────┘" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# We can load and explore the competition's train set to get a feel for the data.\n", "# We're not going to export this cell as it's not needed for our exported inferenceable model.\n", "\n", "import kagglehub\n", "import polars as pl\n", "\n", "train_path = kagglehub.competition_download('drawing-with-llms', 'train.csv')\n", "train = pl.read_csv(train_path)\n", "\n", "train.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "class Model:\n", " def __init__(self):\n", " '''Optional constructor, performs any setup logic, model instantiation, etc.'''\n", " pass\n", " \n", " def predict(self, prompt: str) -> str:\n", " '''Generates SVG which produces an image described by the prompt.\n", "\n", " Args:\n", " prompt (str): A prompt describing an image\n", " Returns:\n", " String of valid SVG code.\n", " '''\n", " # Renders a simple circle regardless of input\n", " return ''" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] }, { "data": { "image/svg+xml": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from IPython.display import SVG\n", "\n", "model = Model()\n", "svg = model.predict('a goose winning a gold medal')\n", "\n", "print(svg)\n", "display(SVG(svg))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['RN50',\n", " 'RN101',\n", " 'RN50x4',\n", " 'RN50x16',\n", " 'RN50x64',\n", " 'ViT-B/32',\n", " 'ViT-B/16',\n", " 'ViT-L/14',\n", " 'ViT-L/14@336px']" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import clip\n", "clip.available_models()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2025-04-20 13:55:34.589770: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "E0000 00:00:1745171734.600777 13214 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "E0000 00:00:1745171734.603957 13214 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", "W0000 00:00:1745171734.615566 13214 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", "W0000 00:00:1745171734.615584 13214 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", "W0000 00:00:1745171734.615585 13214 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", "W0000 00:00:1745171734.615586 13214 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", "2025-04-20 13:55:34.618659: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.\n", "Loading checkpoint shards: 100%|██████████| 4/4 [00:18<00:00, 4.68s/it]\n" ] } ], "source": [ "import pandas as pd\n", "import importlib\n", "metric = importlib.import_module('metric')\n", "importlib.reload(metric)\n", "\n", "vqa_evaluator = metric.VQAEvaluator()\n", "aesthetic_evaluator = metric.AestheticEvaluator()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "VQA Score: 0.9996758976500401\n", "Aesthetic Score: 0.5749330520629883\n", "Final Fidelity Score: 0.8709845773271212\n" ] } ], "source": [ "# score gpt4o generated images\n", "import ast\n", "import numpy as np\n", "from PIL import Image\n", "\n", "# Load the first sample from descriptions.csv\n", "descriptions_df = pd.read_csv('data/descriptions.csv')\n", "first_description = descriptions_df.iloc[1]\n", "\n", "eval_df = pd.read_csv('data/eval.csv')\n", "first_eval = eval_df.iloc[1]\n", "\n", "# Load the image\n", "image_path = 'data/gray_coat.png' # Assuming the image is saved with this name\n", "image = Image.open(image_path)\n", "\n", "# Prepare the inputs for scoring - need to parse the string representations\n", "questions = ast.literal_eval(first_eval['question'])\n", "choices = ast.literal_eval(first_eval['choices'])\n", "answers = ast.literal_eval(first_eval['answer'])\n", "\n", "# Calculate VQA score - don't wrap in additional lists\n", "vqa_score = vqa_evaluator.score(questions, choices, answers, image)\n", "\n", "# Calculate aesthetic score\n", "aesthetic_score = aesthetic_evaluator.score(image)\n", "\n", "# Apply image processing as done in the metric.score function\n", "image_processor = metric.ImageProcessor(image=image, seed=0).apply()\n", "processed_image = image_processor.image.copy()\n", "\n", "# Calculate final fidelity score\n", "instance_score = metric.harmonic_mean(vqa_score, aesthetic_score, beta=0.5)\n", "\n", "print(f\"VQA Score: {vqa_score}\")\n", "print(f\"Aesthetic Score: {aesthetic_score}\")\n", "print(f\"Final Fidelity Score: {instance_score}\")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No duplicate IDs found in data/descriptions.csv\n", "Sorted rows by ID\n", "Fixed and sorted CSV saved to data/descriptions.csv\n", "No duplicate IDs found in data/eval.csv\n", "Sorted data/eval.csv by ID\n" ] } ], "source": [ "# Fix duplicate IDs in descriptions.csv and order rows by id\n", "def fix_duplicate_ids(csv_path):\n", " \"\"\"\n", " Fix duplicate IDs in a CSV file by assigning new unique IDs to duplicates.\n", " Then order rows by ID.\n", " \"\"\"\n", " # Read the CSV file\n", " df = pd.read_csv(csv_path)\n", " \n", " # Check for duplicate IDs\n", " duplicate_mask = df['id'].duplicated(keep='first')\n", " duplicate_count = duplicate_mask.sum()\n", " \n", " if duplicate_count > 0:\n", " print(f\"Found {duplicate_count} duplicate IDs in {csv_path}\")\n", " \n", " # Get the maximum ID value\n", " max_id = df['id'].max()\n", " \n", " # Assign new IDs to duplicates\n", " new_ids = list(range(max_id + 1, max_id + 1 + duplicate_count))\n", " df.loc[duplicate_mask, 'id'] = new_ids\n", " \n", " print(f\"Assigned new IDs to duplicates\")\n", " else:\n", " print(f\"No duplicate IDs found in {csv_path}\")\n", " \n", " # Sort the dataframe by ID\n", " df = df.sort_values(by='id')\n", " print(f\"Sorted rows by ID\")\n", " \n", " # Save the fixed and sorted CSV\n", " df.to_csv(csv_path, index=False)\n", " print(f\"Fixed and sorted CSV saved to {csv_path}\")\n", " \n", " # Return the fixed dataframe\n", " return df\n", "\n", "# Fix descriptions.csv\n", "fixed_descriptions_df = fix_duplicate_ids('data/descriptions.csv')\n", "\n", "# Fix eval.csv if needed\n", "# First check if eval.csv has the same issue\n", "eval_df = pd.read_csv('data/eval.csv')\n", "duplicate_eval_ids = eval_df['id'].duplicated(keep='first').sum()\n", "\n", "if duplicate_eval_ids > 0:\n", " fixed_eval_df = fix_duplicate_ids('data/eval.csv')\n", "else:\n", " print(\"No duplicate IDs found in data/eval.csv\")\n", " # Still sort by ID even if no duplicates\n", " eval_df = eval_df.sort_values(by='id')\n", " eval_df.to_csv('data/eval.csv', index=False)\n", " print(\"Sorted data/eval.csv by ID\")\n" ] } ], "metadata": { "kernelspec": { "display_name": "dwl", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.11" } }, "nbformat": 4, "nbformat_minor": 2 }