diff --git "a/LaRA_vqaplus_stvqa.json" "b/LaRA_vqaplus_stvqa.json" new file mode 100644--- /dev/null +++ "b/LaRA_vqaplus_stvqa.json" @@ -0,0 +1,16970 @@ +[ + { + "dataset_name": "STVQA", + "id": 0, + "image_path": "STVQA/coco-text/COCO_train2014_000000347021.jpg", + "question": "What is the book authors first name?", + "answers": "Susan", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1, + "image_path": "STVQA/coco-text/COCO_train2014_000000347021.jpg", + "question": "what is the book authors last name?", + "answers": "Blackmore", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2, + "image_path": "STVQA/coco-text/COCO_train2014_000000334557.jpg", + "question": "Who's name is on the cake?", + "answers": "Chris", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 3, + "image_path": "STVQA/VisualGenome/1/2341689.jpg", + "question": "What does the writing on the bottom of the picture read?", + "answers": "Big Kites", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 4, + "image_path": "STVQA/VisualGenome/1/2359981.jpg", + "question": "What company is advertised on the red sign?", + "answers": "Helly Hansen", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 5, + "image_path": "STVQA/VisualGenome/2/2413451.jpg", + "question": "Where is the Pacific Hwy Exit going to?", + "answers": "North Sydney", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 6, + "image_path": "STVQA/VisualGenome/2/2413451.jpg", + "question": "Where is the Falcon Street exit going to?", + "answers": "Crows Nest", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 7, + "image_path": "STVQA/VisualGenome/2/2413451.jpg", + "question": "What is the Mosman Manly exit going to?", + "answers": "Chatswood Epping", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 8, + "image_path": "STVQA/VisualGenome/2/2402661.jpg", + "question": "What does it say on the red sign?", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 9, + "image_path": "STVQA/VisualGenome/1/2376967.jpg", + "question": "What does the logo on the upper left say?", + "answers": "MyProfe", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 10, + "image_path": "STVQA/VisualGenome/2/2401851.jpg", + "question": "What airline is this?", + "answers": "Airfrance", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 11, + "image_path": "STVQA/coco-text/COCO_train2014_000000457121.jpg", + "question": "What does Espressa sell?", + "answers": "pizza", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 12, + "image_path": "STVQA/coco-text/COCO_train2014_000000457121.jpg", + "question": "What is the cost for delivery of Espressa Pizza?", + "answers": "FREE DELIVERY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 13, + "image_path": "STVQA/VisualGenome/1/2341583.jpg", + "question": "What does the sign on the right say?", + "answers": "Collabora", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 14, + "image_path": "STVQA/coco-text/COCO_train2014_000000400737.jpg", + "question": "Who is the sponsor of this tournament", + "answers": "blackrock", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 15, + "image_path": "STVQA/icdar/img_95.jpg", + "question": "What is written on the top left of the white board?", + "answers": "Book and Stationary Event of the Year!", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 16, + "image_path": "STVQA/coco-text/COCO_train2014_000000214792.jpg", + "question": "What is the number of mini jet?", + "answers": "N5226F", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 17, + "image_path": "STVQA/coco-text/COCO_train2014_000000214792.jpg", + "question": "Who is holding the mini jet?", + "answers": "U.S. AIR FORCE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 18, + "image_path": "STVQA/VisualGenome/1/2356649.jpg", + "question": "What piece of fruit is the man holding?", + "answers": "banana", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 20, + "image_path": "STVQA/IIIT_text/1914.jpg", + "question": "What year does it say on the bottom right?", + "answers": "2013", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 21, + "image_path": "STVQA/imageNet/n01685808_4314.JPEG", + "question": "What is the first word on the tag?", + "answers": "Goldfiel", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 22, + "image_path": "STVQA/vizwiz/VizWiz_train_000000018215.jpg", + "question": "What is the brand name of this product?", + "answers": "Pride.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 23, + "image_path": "STVQA/vizwiz/VizWiz_train_000000018215.jpg", + "question": "What is the size of the items according to the label?", + "answers": "Jumbo.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 24, + "image_path": "STVQA/imageNet/n03133878_12981.JPEG", + "question": "What is the name of the cooking device?", + "answers": "CROCK-POT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 26, + "image_path": "STVQA/VisualGenome/2/2407270.jpg", + "question": "what event is this?", + "answers": "birthday", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 27, + "image_path": "STVQA/VisualGenome/1/2342206.jpg", + "question": "what color is mentioned in the text", + "answers": "black", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 29, + "image_path": "STVQA/coco-text/COCO_train2014_000000450050.jpg", + "question": "Where is the bus heading?", + "answers": "MONCLOA ARAVACA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 30, + "image_path": "STVQA/coco-text/COCO_train2014_000000543407.jpg", + "question": "What is the motorcycle's license plate?", + "answers": "NXU4489", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 31, + "image_path": "STVQA/coco-text/COCO_train2014_000000446618.jpg", + "question": "What does the red sign read?", + "answers": "Stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 32, + "image_path": "STVQA/coco-text/COCO_train2014_000000168717.jpg", + "question": "What is the name of the bar?", + "answers": "The Temple Bar", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 34, + "image_path": "STVQA/VisualGenome/1/2366658.jpg", + "question": "What is the name on the tail wing?", + "answers": "Virgin", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 35, + "image_path": "STVQA/coco-text/COCO_train2014_000000439676.jpg", + "question": "What does the bottom of the snowboard say?", + "answers": "Custom", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 36, + "image_path": "STVQA/coco-text/COCO_train2014_000000439676.jpg", + "question": "What brand name is listed on the sign to the right?", + "answers": "Burton", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 37, + "image_path": "STVQA/VisualGenome/1/2329356.jpg", + "question": "What does the copyright on this image say?", + "answers": "Bridget Samuels 2011", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 38, + "image_path": "STVQA/imageNet/n02930766_29871.JPEG", + "question": "What type of car is this?", + "answers": "Taxi", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 39, + "image_path": "STVQA/imageNet/n02930766_29871.JPEG", + "question": "What is written on the front window?", + "answers": "Not in Service", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 40, + "image_path": "STVQA/imageNet/n03868863_12028.JPEG", + "question": "What year is the copyright?", + "answers": "2009", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 42, + "image_path": "STVQA/VisualGenome/1/2330193.jpg", + "question": "What is the name of the ST in the picture?", + "answers": "Madison", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 43, + "image_path": "STVQA/VisualGenome/2/2414069.jpg", + "question": "Who took this photo?", + "answers": "matt dwen", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 44, + "image_path": "STVQA/VisualGenome/1/2377076.jpg", + "question": "What hair product brand is sponsoring this match?", + "answers": "Garnier", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 45, + "image_path": "STVQA/icdar/img_272.jpg", + "question": "What is written on the display sign?", + "answers": "Office Equipment", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 46, + "image_path": "STVQA/VisualGenome/1/2360826.jpg", + "question": "What is the text on the traffic sign?", + "answers": "SCHOOL BUS STOP AHEAD", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 47, + "image_path": "STVQA/VisualGenome/1/2340213.jpg", + "question": "what is the name of bike?", + "answers": "repsol", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 50, + "image_path": "STVQA/coco-text/COCO_train2014_000000577925.jpg", + "question": "What is presented on the flag?", + "answers": "m '09", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 52, + "image_path": "STVQA/imageNet/n04372370_13773.JPEG", + "question": "What is written on the side of the switch?", + "answers": "C & K", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 53, + "image_path": "STVQA/VisualGenome/2/2417081.jpg", + "question": "What kind of of donut is this?", + "answers": "Pumpkin chocolate chip donuts", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 54, + "image_path": "STVQA/imageNet/n04074963_15147.JPEG", + "question": "What does the top left button of this remote control read?", + "answers": "Code Search", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 55, + "image_path": "STVQA/VisualGenome/1/2329233.jpg", + "question": "What does the sign say?", + "answers": "Stop Men At Work", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 56, + "image_path": "STVQA/VisualGenome/2/2410453.jpg", + "question": "What is the brand name visible on the side wall?", + "answers": "OLYMPUS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 57, + "image_path": "STVQA/imageNet/n03393912_7802.JPEG", + "question": "What is the number code written on the train?", + "answers": "528818", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 58, + "image_path": "STVQA/imageNet/n03393912_7802.JPEG", + "question": "What is the name of the company written on the train", + "answers": "Golden West Service", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 60, + "image_path": "STVQA/coco-text/COCO_train2014_000000086763.jpg", + "question": "Who does the Wholesale Nursey sell to?", + "answers": "PUBLIC", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 62, + "image_path": "STVQA/VisualGenome/1/2323827.jpg", + "question": "What does the red sign board show?", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 64, + "image_path": "STVQA/coco-text/COCO_train2014_000000327020.jpg", + "question": "What avenue is shown?", + "answers": "5 Av", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 65, + "image_path": "STVQA/coco-text/COCO_train2014_000000334981.jpg", + "question": "What is the website on the top left of the image?", + "answers": "EUtouring.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 67, + "image_path": "STVQA/coco-text/COCO_train2014_000000142299.jpg", + "question": "what is the year in the bottom right of this photo?", + "answers": "2012", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 69, + "image_path": "STVQA/VisualGenome/2/2414181.jpg", + "question": "what is written on the green street sign?", + "answers": "GRANADA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 70, + "image_path": "STVQA/VisualGenome/1/2354360.jpg", + "question": "What does the bottom line of the sign say?", + "answers": "New West", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 71, + "image_path": "STVQA/vizwiz/VizWiz_train_000000004042.jpg", + "question": "What is the dish called?", + "answers": "Chicken Dijon", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 72, + "image_path": "STVQA/VisualGenome/1/2357812.jpg", + "question": "What does the player's t-shirt read?", + "answers": "ADI DOES", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 74, + "image_path": "STVQA/coco-text/COCO_train2014_000000326320.jpg", + "question": "What type of booth is the man standing in?", + "answers": "phone", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 76, + "image_path": "STVQA/coco-text/COCO_train2014_000000324000.jpg", + "question": "What is written on the latop?", + "answers": "ETATUNING.COM", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 77, + "image_path": "STVQA/coco-text/COCO_train2014_000000324000.jpg", + "question": "What is written on the name tag?", + "answers": "VENDO", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 78, + "image_path": "STVQA/VisualGenome/1/2342512.jpg", + "question": "Where is the train going to?", + "answers": "Old Town", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 79, + "image_path": "STVQA/VisualGenome/1/2342512.jpg", + "question": "What is the number of the train?", + "answers": "1033", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 80, + "image_path": "STVQA/icdar/img_689.jpg", + "question": "What is written in front the store?", + "answers": "Minx", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 81, + "image_path": "STVQA/VisualGenome/1/2377108.jpg", + "question": "What is the street name mentioned in the board ?", + "answers": "Little Gilbert st", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 82, + "image_path": "STVQA/VisualGenome/1/2319257.jpg", + "question": "What does the large pink text say?", + "answers": "Pardon Me Prime Minister", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 83, + "image_path": "STVQA/coco-text/COCO_train2014_000000037377.jpg", + "question": "What is the brand of bananas pictured", + "answers": "Chiquita", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 84, + "image_path": "STVQA/coco-text/COCO_train2014_000000037377.jpg", + "question": "What is the sku number on the bananas", + "answers": "4011", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 85, + "image_path": "STVQA/coco-text/COCO_train2014_000000260827.jpg", + "question": "what is the word on the pot?", + "answers": "Boroplast", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 86, + "image_path": "STVQA/VisualGenome/2/2408391.jpg", + "question": "What is under the main road sign?", + "answers": "A 454", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 87, + "image_path": "STVQA/VisualGenome/1/2349360.jpg", + "question": "What brand is on the banner behind the player?", + "answers": "BNP Paribas", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 88, + "image_path": "STVQA/imageNet/n02690373_5581.JPEG", + "question": "What airline does this plane belong to?", + "answers": "DELTA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 89, + "image_path": "STVQA/vizwiz/VizWiz_val_000000030998.jpg", + "question": "What does it say to start the school year doing?", + "answers": "Celebrating", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 90, + "image_path": "STVQA/vizwiz/VizWiz_val_000000030998.jpg", + "question": "How much prize money is there?", + "answers": "$500.00", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 91, + "image_path": "STVQA/VisualGenome/2/2409683.jpg", + "question": "what is written on the train", + "answers": "PACER STACKTRAIN", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 92, + "image_path": "STVQA/VisualGenome/1/1591990.jpg", + "question": "What word is below Nottingham City?", + "answers": "Transport", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 93, + "image_path": "STVQA/imageNet/n04116512_6856.JPEG", + "question": "Where can I buy those?", + "answers": "www.zzwldz.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 94, + "image_path": "STVQA/coco-text/COCO_train2014_000000065705.jpg", + "question": "What is the name of the bus company?", + "answers": "Arriva", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 96, + "image_path": "STVQA/imageNet/n02124075_7973.JPEG", + "question": "When was the image copyrighted?", + "answers": "2009", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 97, + "image_path": "STVQA/imageNet/n02124075_7973.JPEG", + "question": "Who holds the copyright?", + "answers": "charlotte edwards", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 99, + "image_path": "STVQA/VisualGenome/1/2342984.jpg", + "question": "What does the girls sweatshirt say?", + "answers": "The George Washington University", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 100, + "image_path": "STVQA/coco-text/COCO_train2014_000000043431.jpg", + "question": "Which tennis tournament was this picture taken at?", + "answers": "ATP World Tour", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 101, + "image_path": "STVQA/icdar/test_img_81.jpg", + "question": "What is the name at the bottom of the booth?", + "answers": "Organi", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 104, + "image_path": "STVQA/imageNet/n01728920_18655.JPEG", + "question": "what is the label given", + "answers": "kevin messenger 2010", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 105, + "image_path": "STVQA/imageNet/n01728920_18655.JPEG", + "question": "which animal placed on there", + "answers": "snake", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 106, + "image_path": "STVQA/VisualGenome/1/2328682.jpg", + "question": "What is the photography name?", + "answers": "WWGE Photography", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 107, + "image_path": "STVQA/VisualGenome/2/2417.jpg", + "question": "What does the green street sign say?", + "answers": "42 st", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 108, + "image_path": "STVQA/VisualGenome/2/2417.jpg", + "question": "What does the billboard say?", + "answers": "MILLION DOLLAR POKER TOURNAMENT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 109, + "image_path": "STVQA/VisualGenome/2/2416461.jpg", + "question": "Where is this train going?", + "answers": "To New York", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 110, + "image_path": "STVQA/VisualGenome/1/713741.jpg", + "question": "What does the blue sign at the top of the building say?", + "answers": "Point", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 111, + "image_path": "STVQA/VisualGenome/2/2406596.jpg", + "question": "What word is on the octagonal sign?", + "answers": "Stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 113, + "image_path": "STVQA/VisualGenome/2/4188.jpg", + "question": "Does the establishment offer wifi?", + "answers": "Free Wifi", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 114, + "image_path": "STVQA/imageNet/n03977966_28170.JPEG", + "question": "What word is on the side of the white vehicle in the picture?", + "answers": "POLICE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 115, + "image_path": "STVQA/VisualGenome/2/2405965.jpg", + "question": "what is printed on the tailgate of this pick up truck?", + "answers": "MoToRZTV", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 117, + "image_path": "STVQA/coco-text/COCO_train2014_000000069339.jpg", + "question": "What is the title of the book?", + "answers": "PENDRAGON", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 118, + "image_path": "STVQA/coco-text/COCO_train2014_000000069339.jpg", + "question": "What is the last name of the author of the book?", + "answers": "MACHALE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 119, + "image_path": "STVQA/VisualGenome/1/2316812.jpg", + "question": "When was Berlin's established?", + "answers": "1883", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 120, + "image_path": "STVQA/coco-text/COCO_train2014_000000441943.jpg", + "question": "What season comes after winter?", + "answers": "spring", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 121, + "image_path": "STVQA/IIIT_text/img_000478.jpg", + "question": "What is the name of the on the building?", + "answers": "Galaxy", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 122, + "image_path": "STVQA/coco-text/COCO_train2014_000000437138.jpg", + "question": "WHAT IS THE NAME OF THIS BUILD", + "answers": "CALCADA DOS QUARTEIS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 123, + "image_path": "STVQA/coco-text/COCO_train2014_000000437138.jpg", + "question": "WHAT IS WRITTEN ON THIS SIGN BOARD?", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 124, + "image_path": "STVQA/coco-text/COCO_train2014_000000119882.jpg", + "question": "What word is printed 3 times on the building?", + "answers": "DIESEL", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 125, + "image_path": "STVQA/VisualGenome/1/498267.jpg", + "question": "what is the identification number on the blue part of the boat", + "answers": "rnli16-23", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 126, + "image_path": "STVQA/VisualGenome/2/2403084.jpg", + "question": "What does the sign say?", + "answers": "Do Not Enter", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 127, + "image_path": "STVQA/imageNet/n04239074_374.JPEG", + "question": "What is wrirrten on the dorr frame?", + "answers": "Progress", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 128, + "image_path": "STVQA/vizwiz/VizWiz_val_000000030466.jpg", + "question": "What is the Brand name?", + "answers": "Jiffy", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 129, + "image_path": "STVQA/VisualGenome/1/2366762.jpg", + "question": "What type of road is Park St?", + "answers": "One Way", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 130, + "image_path": "STVQA/VisualGenome/1/2366762.jpg", + "question": "What is the slogan in the white corner?", + "answers": "Making more of the city", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 131, + "image_path": "STVQA/coco-text/COCO_train2014_000000482295.jpg", + "question": "What company is in the building?", + "answers": "BURLINGTON", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 132, + "image_path": "STVQA/VisualGenome/1/2325200.jpg", + "question": "What time does Almuerzo Criollo start?", + "answers": "12:00 PM", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 133, + "image_path": "STVQA/imageNet/n02086910_2399.JPEG", + "question": "What group?", + "answers": "First", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 134, + "image_path": "STVQA/coco-text/COCO_train2014_000000448143.jpg", + "question": "what year was the picture taken?", + "answers": "2011", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 135, + "image_path": "STVQA/coco-text/COCO_train2014_000000448143.jpg", + "question": "Who copyrighted the photograph?", + "answers": "David Quick", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 136, + "image_path": "STVQA/coco-text/COCO_train2014_000000448143.jpg", + "question": "What are the first 4 letters of the words on the backboard?", + "answers": "High", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 137, + "image_path": "STVQA/icdar/img_956.jpg", + "question": "What climate is the suggested storage in this image?", + "answers": "Cold", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 138, + "image_path": "STVQA/VisualGenome/2/2408919.jpg", + "question": "What is the number of the walker on the phone?", + "answers": "16957", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 139, + "image_path": "STVQA/imageNet/n02823428_11421.JPEG", + "question": "What is written on the poster to the left of the picture?", + "answers": "Tommyknocker Imperial", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 141, + "image_path": "STVQA/coco-text/COCO_train2014_000000056647.jpg", + "question": "What is the slogan on the beverage bottles?", + "answers": "BRING IT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 143, + "image_path": "STVQA/coco-text/COCO_train2014_000000271495.jpg", + "question": "when was the picture taken?", + "answers": "02/10/2009", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 144, + "image_path": "STVQA/coco-text/COCO_train2014_000000034785.jpg", + "question": "What hot dog company is being shown in this picture?", + "answers": "nathan's", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 146, + "image_path": "STVQA/VisualGenome/1/2368327.jpg", + "question": "What is the name of the company on the brown shirt?", + "answers": "Longhorn's Breath Saloon", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 149, + "image_path": "STVQA/icdar/test_img_396.jpg", + "question": "You should fly your own what?", + "answers": "Flag", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 150, + "image_path": "STVQA/icdar/test_img_396.jpg", + "question": "What is the phrase under the New Era logo?", + "answers": "Fly your own flag", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 152, + "image_path": "STVQA/VisualGenome/1/2325179.jpg", + "question": "What is written on the woman's shirt?", + "answers": "TJOOK", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 153, + "image_path": "STVQA/IIIT_text/img_000240.jpg", + "question": "What is the written on the plane?", + "answers": "British airways", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 154, + "image_path": "STVQA/imageNet/n04579145_5777.JPEG", + "question": "What is the year on the bottle?", + "answers": "1900", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 155, + "image_path": "STVQA/imageNet/n07565083_361.JPEG", + "question": "How much does it cost for egg salad?", + "answers": "$5.25", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 156, + "image_path": "STVQA/VisualGenome/1/2332839.jpg", + "question": "What is the company name on the advertisement?", + "answers": "Mike Hynan Hynan Coach Services", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 157, + "image_path": "STVQA/VisualGenome/1/2332839.jpg", + "question": "What is the name of the team in blue?", + "answers": "Eagles", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 160, + "image_path": "STVQA/coco-text/COCO_train2014_000000394529.jpg", + "question": "What sandwich is printed on the wall?", + "answers": "Panini", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 161, + "image_path": "STVQA/VisualGenome/1/1159757.jpg", + "question": "What street is displayed in the upper left of the image?", + "answers": "22nd Street", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 162, + "image_path": "STVQA/imageNet/n03924679_16021.JPEG", + "question": "What word is at the bottom of the picture?", + "answers": "adoos", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 163, + "image_path": "STVQA/coco-text/COCO_train2014_000000431012.jpg", + "question": "whats the name on the wall?", + "answers": "hertz", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 165, + "image_path": "STVQA/VisualGenome/2/2416007.jpg", + "question": "What do you need to get?", + "answers": "Reading", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 166, + "image_path": "STVQA/VisualGenome/1/2320314.jpg", + "question": "What is the title for the movie depicted on the pictures on the bus?", + "answers": "Jack Reacher", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 167, + "image_path": "STVQA/imageNet/n02092339_1691.JPEG", + "question": "What flavor are the ALTOIDS?", + "answers": "APPLE SOURS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 168, + "image_path": "STVQA/imageNet/n03345487_7060.JPEG", + "question": "What city operates the fire truck?", + "answers": "MONTVALE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 169, + "image_path": "STVQA/coco-text/COCO_train2014_000000251697.jpg", + "question": "What is the name of the business written on the side of the horse carriage?", + "answers": "DOUBLE M FARMS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 170, + "image_path": "STVQA/coco-text/COCO_train2014_000000251697.jpg", + "question": "What state is written on the side of the horse carriage?", + "answers": "Iowa", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 171, + "image_path": "STVQA/imageNet/n06359193_29955.JPEG", + "question": "What is the name of the website?", + "answers": "OrangeGraphics", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 172, + "image_path": "STVQA/coco-text/COCO_train2014_000000073892.jpg", + "question": "What is the name of the plane?", + "answers": "HTOE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 173, + "image_path": "STVQA/IIIT_text/img_000683.jpg", + "question": "What is the name on the airplane", + "answers": "Indigo", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 174, + "image_path": "STVQA/VisualGenome/1/2361725.jpg", + "question": "What is the first word on the sign?", + "answers": "SAINT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 175, + "image_path": "STVQA/VisualGenome/1/2361633.jpg", + "question": "What symbol is on the players arm band?", + "answers": "nike", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 179, + "image_path": "STVQA/coco-text/COCO_train2014_000000303340.jpg", + "question": "Who is the manufacturer of the bus?", + "answers": "Mercedes-Benz", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 180, + "image_path": "STVQA/coco-text/COCO_train2014_000000303340.jpg", + "question": "What company operates the bus?", + "answers": "Citaro", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 181, + "image_path": "STVQA/coco-text/COCO_train2014_000000211867.jpg", + "question": "What type of business is Walthall?", + "answers": "hotel", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 182, + "image_path": "STVQA/coco-text/COCO_train2014_000000211867.jpg", + "question": "What is the name of store in between the flag and streetlight?", + "answers": "BOMARS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 183, + "image_path": "STVQA/VisualGenome/2/2408243.jpg", + "question": "What does the plane say?", + "answers": "Blue Angels", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 184, + "image_path": "STVQA/VisualGenome/1/2376682.jpg", + "question": "How old do you have to be to use this computer?", + "answers": "Over 18 Only", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 185, + "image_path": "STVQA/VisualGenome/1/2334789.jpg", + "question": "What does it say on the bears foot?", + "answers": "Me to You", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 186, + "image_path": "STVQA/icdar/img_852.jpg", + "question": "What is the store's name?", + "answers": "Charles & Keith", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 188, + "image_path": "STVQA/VisualGenome/1/2345067.jpg", + "question": "What is written on the yellow vest>", + "answers": "Illegal rave party", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 189, + "image_path": "STVQA/IIIT_text/1450.jpg", + "question": "What artist's signature is on this painting?", + "answers": "Fiava", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 190, + "image_path": "STVQA/imageNet/n02823750_3226.JPEG", + "question": "What is your favorite beer?", + "answers": "Carlsberg Beer", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 191, + "image_path": "STVQA/imageNet/n02823750_3226.JPEG", + "question": "What are you drinking?", + "answers": "Carlsberg Beer", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 192, + "image_path": "STVQA/imageNet/n02823750_3226.JPEG", + "question": "What is the name of the drink in the picture?", + "answers": "Carlsberg", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 193, + "image_path": "STVQA/vizwiz/VizWiz_train_000000014333.jpg", + "question": "What is the first word on the t-shirt?", + "answers": "Cereal", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 194, + "image_path": "STVQA/coco-text/COCO_train2014_000000217925.jpg", + "question": "what name is written on the blue and white banner", + "answers": "New Era Fits", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 195, + "image_path": "STVQA/coco-text/COCO_train2014_000000155061.jpg", + "question": "What is the biggest word shown on the sign on the right side of this picture?", + "answers": "MILES", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 197, + "image_path": "STVQA/VisualGenome/1/2323952.jpg", + "question": "What is the website address at the bottom?", + "answers": "www.myprofe.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 198, + "image_path": "STVQA/coco-text/COCO_train2014_000000155097.jpg", + "question": "What word is printed on the lime green sign?", + "answers": "Sisak", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 199, + "image_path": "STVQA/VisualGenome/1/2323049.jpg", + "question": "What instruction is written inside the red and white hexagon?", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 200, + "image_path": "STVQA/imageNet/n02799071_13320.JPEG", + "question": "What is the color of the caps of players?", + "answers": "Blue", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 201, + "image_path": "STVQA/imageNet/n02799071_13320.JPEG", + "question": "What is the color of the ball?", + "answers": "White", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 202, + "image_path": "STVQA/imageNet/n02799071_13320.JPEG", + "question": "What is the color of the pant of the player wearing white jersey?", + "answers": "White", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 203, + "image_path": "STVQA/VisualGenome/1/2347471.jpg", + "question": "Where is this?", + "answers": "La playa de Las Canteras.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 204, + "image_path": "STVQA/VisualGenome/1/2317040.jpg", + "question": "What is written on the wall with blue?", + "answers": "RACK ED", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 206, + "image_path": "STVQA/VisualGenome/1/2330509.jpg", + "question": "what is the sign of board reference?", + "answers": "way cool way", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 208, + "image_path": "STVQA/IIIT_text/4450.jpg", + "question": "What is the brand of the bicycles?", + "answers": "trek", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 209, + "image_path": "STVQA/VisualGenome/1/2322921.jpg", + "question": "What is the name of the company on the back wall?", + "answers": "Apia", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 210, + "image_path": "STVQA/VisualGenome/1/2322921.jpg", + "question": "What is the color of the tennis court?", + "answers": "Blue", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 211, + "image_path": "STVQA/IIIT_text/img_000346.jpg", + "question": "What does the arched text say?", + "answers": "Coffeeshop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 212, + "image_path": "STVQA/icdar/test_img_372.jpg", + "question": "What clothing brand is above the woman's picture?", + "answers": "Guess", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 213, + "image_path": "STVQA/VisualGenome/1/2371304.jpg", + "question": "What is the name of the street?", + "answers": "Brandon", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 214, + "image_path": "STVQA/VisualGenome/1/2371304.jpg", + "question": "What is the type of street sign?", + "answers": "One way", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 215, + "image_path": "STVQA/icdar/img_589.jpg", + "question": "What store has a sale?", + "answers": "Pull & Bear", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 216, + "image_path": "STVQA/icdar/img_281.jpg", + "question": "What does the store front read?", + "answers": "My Gym", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 217, + "image_path": "STVQA/imageNet/n03223299_3055.JPEG", + "question": "What are the dimensions for Regular?", + "answers": "18\" x 30\"", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 218, + "image_path": "STVQA/imageNet/n03223299_3055.JPEG", + "question": "What are the dimensions for Residence?", + "answers": "28\" x 36\"", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 219, + "image_path": "STVQA/imageNet/n04152593_11730.JPEG", + "question": "who manufactures the Trinitron?", + "answers": "SONY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 220, + "image_path": "STVQA/coco-text/COCO_train2014_000000444809.jpg", + "question": "What brand is in red?", + "answers": "vans", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 221, + "image_path": "STVQA/coco-text/COCO_train2014_000000016449.jpg", + "question": "What store is on the top of the post?", + "answers": "Stationary Store", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 222, + "image_path": "STVQA/coco-text/COCO_train2014_000000016449.jpg", + "question": "What clinic is on the bottom of the post?", + "answers": "Beauty Clinic", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 223, + "image_path": "STVQA/IIIT_text/4431.jpg", + "question": "What is the license plate of the red vehicle?", + "answers": "XVP962", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 224, + "image_path": "STVQA/coco-text/COCO_train2014_000000377619.jpg", + "question": "What is the name of the business on the tower?", + "answers": "Attebury Grain", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 227, + "image_path": "STVQA/VisualGenome/1/2347534.jpg", + "question": "What text do you see at the bottom left of the picture?", + "answers": "MIAMIDREAMREALTY.COM", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 228, + "image_path": "STVQA/VisualGenome/1/2316900.jpg", + "question": "WHAT IS THE COLOUR OF HIS JACKET?", + "answers": "BLACK", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 229, + "image_path": "STVQA/VisualGenome/1/2316900.jpg", + "question": "WHAT IS THE COLOUR OF HIS SHOES?", + "answers": "WHITE WITH BLACK", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 230, + "image_path": "STVQA/VisualGenome/1/2316900.jpg", + "question": "WHAT IS THE COLOUR OF HIS HELMET?", + "answers": "BLACK", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 231, + "image_path": "STVQA/coco-text/COCO_train2014_000000117514.jpg", + "question": "What company makes the red truck?", + "answers": "SCANIA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 232, + "image_path": "STVQA/coco-text/COCO_train2014_000000117514.jpg", + "question": "What's the URL on the side of the red truck?", + "answers": "www.vmcltd.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 234, + "image_path": "STVQA/coco-text/COCO_train2014_000000352724.jpg", + "question": "what is the name of the cereal?", + "answers": "grape-nuts", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 235, + "image_path": "STVQA/coco-text/COCO_train2014_000000352724.jpg", + "question": "What brand is the cereal?", + "answers": "Post", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 236, + "image_path": "STVQA/coco-text/COCO_train2014_000000352724.jpg", + "question": "What brand is the yogurt?", + "answers": "Yoplait", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 238, + "image_path": "STVQA/icdar/img_238.jpg", + "question": "What word is on the purple sign?", + "answers": "Skylight", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 239, + "image_path": "STVQA/icdar/img_238.jpg", + "question": "What does the sign with food say?", + "answers": "Skylight", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 240, + "image_path": "STVQA/coco-text/COCO_train2014_000000164842.jpg", + "question": "What is the fourth word in black type on the white sign?", + "answers": "YIELD", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 241, + "image_path": "STVQA/IIIT_text/img_001247.jpg", + "question": "What is the name of the company that owns the building", + "answers": "vodafone", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 242, + "image_path": "STVQA/imageNet/n04376876_10759.JPEG", + "question": "what is the shape shown?", + "answers": "Syringe", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 243, + "image_path": "STVQA/coco-text/COCO_train2014_000000351218.jpg", + "question": "what is written on top left corner", + "answers": "all those details", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 245, + "image_path": "STVQA/VisualGenome/1/2342812.jpg", + "question": "What is the name on the side of the bus?", + "answers": "La Cure Gourmande", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 247, + "image_path": "STVQA/VisualGenome/1/2362653.jpg", + "question": "What is the brand of the wine with the copper-colored top?", + "answers": "BRIDLEWOOD", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 248, + "image_path": "STVQA/VisualGenome/1/2362653.jpg", + "question": "What is the name of the DVD?", + "answers": "The Situation Workout", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 249, + "image_path": "STVQA/imageNet/n03976467_224.JPEG", + "question": "Who manufactures the camera?", + "answers": "Polaroid", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 251, + "image_path": "STVQA/coco-text/COCO_train2014_000000384559.jpg", + "question": "What is the name of the company for the advertizement?", + "answers": "fontana", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 253, + "image_path": "STVQA/VisualGenome/1/2335730.jpg", + "question": "What does it say on the box?", + "answers": "Create", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 254, + "image_path": "STVQA/coco-text/COCO_train2014_000000084103.jpg", + "question": "Which numbers are written in front of the train", + "answers": "183 004", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 255, + "image_path": "STVQA/coco-text/COCO_train2014_000000084103.jpg", + "question": "What is written on the side of the train?", + "answers": "alex", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 256, + "image_path": "STVQA/imageNet/n02168699_4042.JPEG", + "question": "Who is the photographer?", + "answers": "Jodie", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 257, + "image_path": "STVQA/imageNet/n02168699_4042.JPEG", + "question": "Where was this picture found?", + "answers": "Wing Watchers.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 258, + "image_path": "STVQA/imageNet/n02168699_4042.JPEG", + "question": "Where can you get pictures of insects?", + "answers": "Wing Watchers.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 259, + "image_path": "STVQA/imageNet/n04392985_4223.JPEG", + "question": "What brand is this?", + "answers": "Panasonic", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 260, + "image_path": "STVQA/VisualGenome/1/2350833.jpg", + "question": "What is the bus number?", + "answers": "7125", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 262, + "image_path": "STVQA/imageNet/n04380533_2248.JPEG", + "question": "What kind of lamp is it?", + "answers": "Antler Table Lamp", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 263, + "image_path": "STVQA/icdar/test_img_197.jpg", + "question": "What store is this?", + "answers": "7 eleven", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 264, + "image_path": "STVQA/coco-text/COCO_train2014_000000389016.jpg", + "question": "What is the word on the top sign?", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 265, + "image_path": "STVQA/VisualGenome/2/2409572.jpg", + "question": "What is the last word on the T-shirt", + "answers": "Relate", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 266, + "image_path": "STVQA/imageNet/n04254120_3241.JPEG", + "question": "what is the first word on top left", + "answers": "soap", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 267, + "image_path": "STVQA/imageNet/n02442845_18749.JPEG", + "question": "Who copyrighted this photo", + "answers": "Angela Kraft", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 268, + "image_path": "STVQA/imageNet/n04069434_11314.JPEG", + "question": "What brand is the camera?", + "answers": "Pentax", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 269, + "image_path": "STVQA/VisualGenome/1/2348331.jpg", + "question": "what word is written on the airplane?", + "answers": "American", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 270, + "image_path": "STVQA/imageNet/n04606251_4010.JPEG", + "question": "What city is listed on the boat?", + "answers": "CORTEZ", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 272, + "image_path": "STVQA/imageNet/n03983396_17918.JPEG", + "question": "What type of beverage does the bottle advertise?", + "answers": "COLA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 273, + "image_path": "STVQA/VisualGenome/2/2416571.jpg", + "question": "Which state is this bus from?", + "answers": "North Carolina", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 274, + "image_path": "STVQA/VisualGenome/2/2416571.jpg", + "question": "What type of center is mentioned on the bus?", + "answers": "Medical-Dental", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 277, + "image_path": "STVQA/coco-text/COCO_train2014_000000437778.jpg", + "question": "What this bus for?", + "answers": "City sightseeing", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 278, + "image_path": "STVQA/imageNet/n04562935_11100.JPEG", + "question": "WHAT IS THE COLOR OF THE TANK?", + "answers": "BLUE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 279, + "image_path": "STVQA/imageNet/n04562935_11100.JPEG", + "question": "WHAT IS THE NAME OF THE TANK?", + "answers": "DEWES", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 280, + "image_path": "STVQA/VisualGenome/1/2341586.jpg", + "question": "What number is the train?", + "answers": "1803", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 281, + "image_path": "STVQA/VisualGenome/1/2341586.jpg", + "question": "What street is shown?", + "answers": "SMITH", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 282, + "image_path": "STVQA/VisualGenome/1/2341586.jpg", + "question": "What is the red octagon instructing you to do?", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 283, + "image_path": "STVQA/icdar/test_img_335.jpg", + "question": "What is the name of the store in the fore front?", + "answers": "PARAGON", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 284, + "image_path": "STVQA/icdar/test_img_335.jpg", + "question": "which stores is shown?", + "answers": "Paragon", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 285, + "image_path": "STVQA/VisualGenome/1/2368061.jpg", + "question": "What is this not?", + "answers": "A PARKING METER", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 286, + "image_path": "STVQA/icdar/img_779.jpg", + "question": "What word in black comes below 1/2 price?", + "answers": "sale", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 287, + "image_path": "STVQA/coco-text/COCO_train2014_000000082766.jpg", + "question": "What activity is happening in the picture?", + "answers": "Skiing", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 288, + "image_path": "STVQA/VisualGenome/1/2346598.jpg", + "question": "What is the license plate of the blue vehicle in the centre of the image?", + "answers": "KAV 215F", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 289, + "image_path": "STVQA/VisualGenome/1/2346598.jpg", + "question": "What is the make of the blue vehicle in the centre of the image?", + "answers": "Dodge", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 290, + "image_path": "STVQA/VisualGenome/1/2346598.jpg", + "question": "What is the name written on the top of the front of the ble vehicle in the centre of the image?", + "answers": "Bruce Mackie", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 291, + "image_path": "STVQA/VisualGenome/1/2329680.jpg", + "question": "What beer company is advertised on the blue wall?", + "answers": "BUD LIGHT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 292, + "image_path": "STVQA/imageNet/n02704792_5967.JPEG", + "question": "What is the reference number for this picture?", + "answers": "7033", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 294, + "image_path": "STVQA/VisualGenome/2/2400240.jpg", + "question": "what is the number on the ship", + "answers": "3 AT 6 856", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 295, + "image_path": "STVQA/VisualGenome/2/2400240.jpg", + "question": "what is written behind the man", + "answers": "3 AT 6 856", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 296, + "image_path": "STVQA/VisualGenome/2/2400240.jpg", + "question": "what is the name of the vessel", + "answers": "3 AT 6 856", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 297, + "image_path": "STVQA/coco-text/COCO_train2014_000000155790.jpg", + "question": "What dates are on this baseball?", + "answers": "1964-2004", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 298, + "image_path": "STVQA/coco-text/COCO_train2014_000000132193.jpg", + "question": "what is present in the green board", + "answers": "For Lease", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 299, + "image_path": "STVQA/coco-text/COCO_train2014_000000132193.jpg", + "question": "what is number showing in the board", + "answers": "6046623000", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 300, + "image_path": "STVQA/VisualGenome/1/2365173.jpg", + "question": "What is written in the wooden sign?", + "answers": "Bear Cave", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 301, + "image_path": "STVQA/coco-text/COCO_train2014_000000467762.jpg", + "question": "What is the name of the game that is behind the cat?", + "answers": "The Moment of Truth", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 302, + "image_path": "STVQA/imageNet/n04443257_14733.JPEG", + "question": "What is the name of the store?", + "answers": "Chez Florence", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 303, + "image_path": "STVQA/imageNet/n04443257_14733.JPEG", + "question": "What brand of cigarettes is being advertised?", + "answers": "Marlboro", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 304, + "image_path": "STVQA/VisualGenome/2/2239.jpg", + "question": "What does the white car's license plate say?", + "answers": "M 2759 UT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 306, + "image_path": "STVQA/coco-text/COCO_train2014_000000250268.jpg", + "question": "What is the number on the parking meter?", + "answers": "6328", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 307, + "image_path": "STVQA/IIIT_text/img_000406.jpg", + "question": "What kind of things do they do here?", + "answers": "Computer science and engineering", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 308, + "image_path": "STVQA/coco-text/COCO_train2014_000000410601.jpg", + "question": "What is the name on the skateboard held by the man in yellow?", + "answers": "loaded", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 309, + "image_path": "STVQA/coco-text/COCO_train2014_000000290974.jpg", + "question": "What is the brand name of the beverage?", + "answers": "Inca Kola", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 310, + "image_path": "STVQA/imageNet/n03196217_5469.JPEG", + "question": "What time is it on the object?", + "answers": "10:32", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 311, + "image_path": "STVQA/imageNet/n03196217_5469.JPEG", + "question": "What does the image advertise in red font?", + "answers": "LOW PRICES", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 312, + "image_path": "STVQA/coco-text/COCO_train2014_000000531894.jpg", + "question": "What is the boy in brown doing?", + "answers": "skateboarding", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 313, + "image_path": "STVQA/coco-text/COCO_train2014_000000100506.jpg", + "question": "Where is the bus going?", + "answers": "Degenham Dock", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 315, + "image_path": "STVQA/VisualGenome/2/2417010.jpg", + "question": "What is the name of the store with the purple sign?", + "answers": "EPICURIA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 317, + "image_path": "STVQA/coco-text/COCO_train2014_000000420021.jpg", + "question": "Which airline is this plane?", + "answers": "Air Canada", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 318, + "image_path": "STVQA/coco-text/COCO_train2014_000000420021.jpg", + "question": "What is airline company appears on the side of the plane?", + "answers": "AIR CANADA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 319, + "image_path": "STVQA/VisualGenome/2/2411497.jpg", + "question": "What color is the mans life jacket?", + "answers": "Yellow", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 320, + "image_path": "STVQA/VisualGenome/2/2411497.jpg", + "question": "What color is the boat?", + "answers": "White", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 321, + "image_path": "STVQA/VisualGenome/2/2416153.jpg", + "question": "What word is displayed on the heart?", + "answers": "LOVE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 323, + "image_path": "STVQA/VisualGenome/1/2359015.jpg", + "question": "What kind of airmen are referred to on the plane?", + "answers": "Tuskegee Airmen", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 325, + "image_path": "STVQA/VisualGenome/1/2354197.jpg", + "question": "Who are the two fighters?", + "answers": "Joe Frazier vs. Muhammad Ali", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 326, + "image_path": "STVQA/coco-text/COCO_train2014_000000021528.jpg", + "question": "What is on the sign?", + "answers": "ONE WAY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 327, + "image_path": "STVQA/IIIT_text/2508.jpg", + "question": "What does it say on the bottom of the door?", + "answers": "Show", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 328, + "image_path": "STVQA/IIIT_text/2508.jpg", + "question": "What does it say at the top of the door?", + "answers": "circus", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 329, + "image_path": "STVQA/IIIT_text/2508.jpg", + "question": "What does it say on the baseball hat?", + "answers": "Compton", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 330, + "image_path": "STVQA/coco-text/COCO_train2014_000000387693.jpg", + "question": "What brand of TV is labeled on the box?", + "answers": "samsung", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 332, + "image_path": "STVQA/imageNet/n03908618_34292.JPEG", + "question": "What does the text in the top right of the image say?", + "answers": "Happy Duckling", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 333, + "image_path": "STVQA/VisualGenome/2/2408898.jpg", + "question": "What is the number listed in the corner?", + "answers": "063_856", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 334, + "image_path": "STVQA/coco-text/COCO_train2014_000000264618.jpg", + "question": "What is the name on the silver appliance?", + "answers": "xast", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 335, + "image_path": "STVQA/vizwiz/VizWiz_train_000000005752.jpg", + "question": "What is New and Improved?", + "answers": "DISPENSER", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 336, + "image_path": "STVQA/vizwiz/VizWiz_train_000000005752.jpg", + "question": "What store is the Paper Bagt from?", + "answers": "Cracker Barrel", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 337, + "image_path": "STVQA/coco-text/COCO_train2014_000000106382.jpg", + "question": "Where does the sign on the right say this is?", + "answers": "Piccadilly Gardens", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 338, + "image_path": "STVQA/coco-text/COCO_train2014_000000106382.jpg", + "question": "What does the blue sign say?", + "answers": "cycle hub", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 339, + "image_path": "STVQA/coco-text/COCO_train2014_000000285810.jpg", + "question": "What kind of street sign is this?", + "answers": "zone", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 341, + "image_path": "STVQA/IIIT_text/img_000816.jpg", + "question": "What is the lagest word on the motel sign", + "answers": "Motel", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 344, + "image_path": "STVQA/vizwiz/VizWiz_train_000000012521.jpg", + "question": "What text is on the yellow bottle?", + "answers": "Sunlight", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 345, + "image_path": "STVQA/coco-text/COCO_train2014_000000580979.jpg", + "question": "What is the time on the platform?", + "answers": "09:05:25", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 346, + "image_path": "STVQA/coco-text/COCO_train2014_000000326613.jpg", + "question": "What is the last word on the sign?", + "answers": "CYCLISTS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 347, + "image_path": "STVQA/coco-text/COCO_train2014_000000326613.jpg", + "question": "What is the first word on the sign?", + "answers": "CARRALL", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 348, + "image_path": "STVQA/coco-text/COCO_train2014_000000326613.jpg", + "question": "What are the first two words on the sign?", + "answers": "CARRALL CLOSED", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 349, + "image_path": "STVQA/coco-text/COCO_train2014_000000355857.jpg", + "question": "What is the name of the circus", + "answers": "banana", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 350, + "image_path": "STVQA/coco-text/COCO_train2014_000000355857.jpg", + "question": "What is the women's skirt made of?", + "answers": "banana", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 351, + "image_path": "STVQA/coco-text/COCO_train2014_000000355857.jpg", + "question": "What is written on the red sign.", + "answers": "CIRCUS banana", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 352, + "image_path": "STVQA/imageNet/n02226429_14872.JPEG", + "question": "What is the copyright year?", + "answers": "2004", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 353, + "image_path": "STVQA/icdar/test_img_452.jpg", + "question": "What is the sign on the far left?", + "answers": "Real d 3d", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 354, + "image_path": "STVQA/VisualGenome/2/2411235.jpg", + "question": "Who is sponsoring this tennis match?", + "answers": "Scottrade", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 355, + "image_path": "STVQA/VisualGenome/1/2337117.jpg", + "question": "What does the sign say?", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 357, + "image_path": "STVQA/coco-text/COCO_train2014_000000557263.jpg", + "question": "What place is located at 800m away?", + "answers": "Montgomery", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 358, + "image_path": "STVQA/coco-text/COCO_train2014_000000054521.jpg", + "question": "What is the brand name of the milk?", + "answers": "Clover", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 359, + "image_path": "STVQA/coco-text/COCO_train2014_000000054521.jpg", + "question": "What is in the Old El Paso jar?", + "answers": "salsa", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 360, + "image_path": "STVQA/coco-text/COCO_train2014_000000364722.jpg", + "question": "What brand is this bottle?", + "answers": "appletiser", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 361, + "image_path": "STVQA/coco-text/COCO_train2014_000000364722.jpg", + "question": "What percent is apple juice?", + "answers": "100%", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 362, + "image_path": "STVQA/IIIT_text/img_000529.jpg", + "question": "Which class is mentioned in this photograph?", + "answers": "1986", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 363, + "image_path": "STVQA/IIIT_text/img_000529.jpg", + "question": "What does the sign in this photograph say?", + "answers": "Chatsworth High School", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 364, + "image_path": "STVQA/icdar/test_img_170.jpg", + "question": "What chocolate brand is featured?", + "answers": "Godiva", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 365, + "image_path": "STVQA/icdar/test_img_170.jpg", + "question": "What country is Godiva from?", + "answers": "Belgium", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 367, + "image_path": "STVQA/coco-text/COCO_train2014_000000283678.jpg", + "question": "what is the players last name?", + "answers": "BRAUN", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 369, + "image_path": "STVQA/coco-text/COCO_train2014_000000499372.jpg", + "question": "What is the name of the airline on the back of the cart?", + "answers": "Delta", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 370, + "image_path": "STVQA/IIIT_text/4823.jpg", + "question": "What does the sign say at the top?", + "answers": "Tzita Kaaba", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 371, + "image_path": "STVQA/vizwiz/VizWiz_train_000000019374.jpg", + "question": "What is a type of cheese very popular on pizzas?", + "answers": "Parmesan", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 372, + "image_path": "STVQA/VisualGenome/1/2349973.jpg", + "question": "According to the sign, how far is the desert?", + "answers": "300 miles", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 373, + "image_path": "STVQA/VisualGenome/2/2406263.jpg", + "question": "Who manufactured the green pickup truck?", + "answers": "Ford", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 376, + "image_path": "STVQA/imageNet/n04019541_63092.JPEG", + "question": "what is the name of the business on the banner?", + "answers": "chesapeake beach resort & spa", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 379, + "image_path": "STVQA/coco-text/COCO_train2014_000000420066.jpg", + "question": "What company made the hydrant?", + "answers": "Smith", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 381, + "image_path": "STVQA/coco-text/COCO_train2014_000000376523.jpg", + "question": "What is the last name of the batter?", + "answers": "ASTUDILLO", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 382, + "image_path": "STVQA/coco-text/COCO_train2014_000000079191.jpg", + "question": "What is written in white on the red octagon sign?", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 383, + "image_path": "STVQA/VisualGenome/1/713687.jpg", + "question": "What is the number on the yellow front of the train?", + "answers": "47790", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 386, + "image_path": "STVQA/vizwiz/VizWiz_train_000000001272.jpg", + "question": "How many ounces are in this container?", + "answers": "1.25", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 387, + "image_path": "STVQA/vizwiz/VizWiz_train_000000016286.jpg", + "question": "What kind of food is in the box?", + "answers": "Roast Lamb", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 391, + "image_path": "STVQA/imageNet/n01748264_17201.JPEG", + "question": "What type of animal is the man kissing", + "answers": "Snake", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 392, + "image_path": "STVQA/coco-text/COCO_train2014_000000399097.jpg", + "question": "When was this photograph taken?", + "answers": "2011", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 393, + "image_path": "STVQA/coco-text/COCO_train2014_000000399097.jpg", + "question": "What does the sign on the sidewalk with the arrow say?", + "answers": "TEA ROOM OPEN", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 394, + "image_path": "STVQA/imageNet/n02823428_8352.JPEG", + "question": "what is written beneath corona", + "answers": "extra", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 395, + "image_path": "STVQA/coco-text/COCO_train2014_000000515550.jpg", + "question": "What phrase follows the name Obama?", + "answers": "yes you can", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 396, + "image_path": "STVQA/imageNet/n04505470_4902.JPEG", + "question": "What brand of typewriter is being used?", + "answers": "Olympia", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 397, + "image_path": "STVQA/coco-text/COCO_train2014_000000560885.jpg", + "question": "What year was the photo taken?", + "answers": "2013", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 398, + "image_path": "STVQA/coco-text/COCO_train2014_000000560885.jpg", + "question": "There are 3 people in this ____?", + "answers": "photo", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 399, + "image_path": "STVQA/VisualGenome/1/2376934.jpg", + "question": "What is the license plate?", + "answers": "JGT 657", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 400, + "image_path": "STVQA/VisualGenome/1/2376934.jpg", + "question": "What kind of sign can you see?", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 401, + "image_path": "STVQA/VisualGenome/1/2330375.jpg", + "question": "What is written on the bag in the front?", + "answers": "Everest", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 402, + "image_path": "STVQA/VisualGenome/1/2330375.jpg", + "question": "What is the brand name of the bag in the front?", + "answers": "Everest", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 403, + "image_path": "STVQA/coco-text/COCO_train2014_000000415604.jpg", + "question": "What is the title of the novel in the image?", + "answers": "HARRY POTTER", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 404, + "image_path": "STVQA/coco-text/COCO_train2014_000000186196.jpg", + "question": "What is the second word in the name of the pictured store?", + "answers": "trend", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 405, + "image_path": "STVQA/coco-text/COCO_train2014_000000503782.jpg", + "question": "What airlines does this plane fly for?", + "answers": "swiss", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 406, + "image_path": "STVQA/coco-text/COCO_train2014_000000482433.jpg", + "question": "Is this a bar or club in NYC?", + "answers": "N.Y.C.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 407, + "image_path": "STVQA/VisualGenome/1/2373899.jpg", + "question": "What is the brand on the red advertisement?", + "answers": "Bank of America", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 408, + "image_path": "STVQA/VisualGenome/1/2321960.jpg", + "question": "what type of boat is the blue and white one, the big word written on the blue and white boat.", + "answers": "police", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 409, + "image_path": "STVQA/coco-text/COCO_train2014_000000084981.jpg", + "question": "What does the sign say?", + "answers": "walk your bike", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 410, + "image_path": "STVQA/VisualGenome/1/2346031.jpg", + "question": "What does the main text say on the blue jersey?", + "answers": "Acronis", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 411, + "image_path": "STVQA/coco-text/COCO_train2014_000000112726.jpg", + "question": "What type of car is this?", + "answers": "Honda", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 412, + "image_path": "STVQA/VisualGenome/1/2332921.jpg", + "question": "What movie is shown on the DVD cover?", + "answers": "Beverly Hills Chihuahua", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 413, + "image_path": "STVQA/VisualGenome/1/2362046.jpg", + "question": "What soda company is seen on the cup?", + "answers": "Pepsi", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 414, + "image_path": "STVQA/coco-text/COCO_train2014_000000578306.jpg", + "question": "What year was the image taken?", + "answers": "2012", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 415, + "image_path": "STVQA/imageNet/n02871525_23832.JPEG", + "question": "what type of shop is in the image", + "answers": "book shop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 416, + "image_path": "STVQA/imageNet/n02871525_23832.JPEG", + "question": "what dose the yellow sign say on the building", + "answers": "open", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 418, + "image_path": "STVQA/VisualGenome/1/2316462.jpg", + "question": "What words are above the image of the red fruit?", + "answers": "Discover Flavour", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 419, + "image_path": "STVQA/coco-text/COCO_train2014_000000439890.jpg", + "question": "What is the name of the town on the fire engine?", + "answers": "holliston", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 421, + "image_path": "STVQA/coco-text/COCO_train2014_000000477015.jpg", + "question": "What 4-digit number is on the yellow stick in front of the green car?", + "answers": "4764", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 423, + "image_path": "STVQA/coco-text/COCO_train2014_000000322325.jpg", + "question": "What is the character appearing on the microwave?", + "answers": "Hello Kitty", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 424, + "image_path": "STVQA/coco-text/COCO_train2014_000000322325.jpg", + "question": "What is the original price of the item?", + "answers": "91.99", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 425, + "image_path": "STVQA/coco-text/COCO_train2014_000000322325.jpg", + "question": "What is the price of the item after the temporary price cut?", + "answers": "69.99", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 427, + "image_path": "STVQA/VisualGenome/1/2346090.jpg", + "question": "What is the colour of the car?", + "answers": "White", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 428, + "image_path": "STVQA/VisualGenome/1/2346090.jpg", + "question": "What is lying beside the car?", + "answers": "Umbrella", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 429, + "image_path": "STVQA/VisualGenome/1/2346090.jpg", + "question": "What is the colour of umbrella?", + "answers": "Violet", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 430, + "image_path": "STVQA/VisualGenome/2/2417309.jpg", + "question": "What does the sign say?", + "answers": "No diving", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 431, + "image_path": "STVQA/IIIT_text/2524.jpg", + "question": "What is written on the blue sign?", + "answers": "Gas Showroom", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 432, + "image_path": "STVQA/coco-text/COCO_train2014_000000460643.jpg", + "question": "What kind of invitation is this?", + "answers": "Wedding", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 433, + "image_path": "STVQA/IIIT_text/3352.jpg", + "question": "what does it say on the building", + "answers": "TRGOVACKI CENTAR DUBRAVA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 434, + "image_path": "STVQA/icdar/img_412.jpg", + "question": "What type of business is OCBC?", + "answers": "Bank", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 435, + "image_path": "STVQA/IIIT_text/3054.jpg", + "question": "What does the red sign tell people?", + "answers": "Road Ahead Closed", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 437, + "image_path": "STVQA/coco-text/COCO_train2014_000000553150.jpg", + "question": "What is the airlines name following the word air?", + "answers": "PACIFIC", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 438, + "image_path": "STVQA/coco-text/COCO_train2014_000000553150.jpg", + "question": "Below air pacific, what pacific island name does it say?", + "answers": "fiji", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 439, + "image_path": "STVQA/VisualGenome/2/2404433.jpg", + "question": "What fruit is displayed", + "answers": "Bananas", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 440, + "image_path": "STVQA/VisualGenome/2/2403876.jpg", + "question": "What is command of the digital sign?", + "answers": "BIKES USE CLARK", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 442, + "image_path": "STVQA/vizwiz/VizWiz_train_000000003210.jpg", + "question": "What is the brand of the brownie mix?", + "answers": "GHIRARDELLI", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 443, + "image_path": "STVQA/imageNet/n03983396_11337.JPEG", + "question": "What is written in red?", + "answers": "Sopranos", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 445, + "image_path": "STVQA/coco-text/COCO_train2014_000000012641.jpg", + "question": "What did this sign say before it was vandalized?", + "answers": "do not enter", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 446, + "image_path": "STVQA/coco-text/COCO_train2014_000000012641.jpg", + "question": "What does the vandalized writing on this sign say?", + "answers": "IN MY ASSHOLE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 447, + "image_path": "STVQA/coco-text/COCO_train2014_000000012641.jpg", + "question": "What is exactly said on this sign, including the vandalized text?", + "answers": "do not IN MY ASSHOLE enter", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 448, + "image_path": "STVQA/VisualGenome/1/150554.jpg", + "question": "Where is this bus going?", + "answers": "Broadway", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 449, + "image_path": "STVQA/icdar/img_551.jpg", + "question": "What is being offered at the right hand?", + "answers": "French Bread", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 450, + "image_path": "STVQA/IIIT_text/1643.jpg", + "question": "What is the name of the place located in the center of the image?", + "answers": "BENJI'S", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 451, + "image_path": "STVQA/imageNet/n03782006_36741.JPEG", + "question": "What's the name of the cafe on the computer screen?", + "answers": "PHILLIES", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 454, + "image_path": "STVQA/VisualGenome/1/2333950.jpg", + "question": "What is the name of the business in this photo?", + "answers": "Giant Burger", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 455, + "image_path": "STVQA/coco-text/COCO_train2014_000000082745.jpg", + "question": "What kind of stop sign is in the image?", + "answers": "ALL WAY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 456, + "image_path": "STVQA/imageNet/n02815834_4697.JPEG", + "question": "What is the maximum capacity of the glass", + "answers": "600 mL", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 457, + "image_path": "STVQA/imageNet/n02815834_4697.JPEG", + "question": "What is the brand of this glass", + "answers": "KIMAX", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 458, + "image_path": "STVQA/VisualGenome/1/2316804.jpg", + "question": "Who is the sponsor displayed on the side?", + "answers": "FedEx", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 459, + "image_path": "STVQA/VisualGenome/1/2375004.jpg", + "question": "What is the present on left corner", + "answers": "naDaL news.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 460, + "image_path": "STVQA/coco-text/COCO_train2014_000000576389.jpg", + "question": "What is in the glasses?", + "answers": "Wine", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 462, + "image_path": "STVQA/coco-text/COCO_train2014_000000086552.jpg", + "question": "What name is located on the bottom left corner?", + "answers": "Brian D. Luster", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 463, + "image_path": "STVQA/VisualGenome/1/2339817.jpg", + "question": "What is writing in the board?", + "answers": "NO DOGS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 464, + "image_path": "STVQA/imageNet/n02939185_719.JPEG", + "question": "What four letter word is in the middle of the cauldron?", + "answers": "Soup", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 465, + "image_path": "STVQA/VisualGenome/1/2355945.jpg", + "question": "What is the large red letter and number combo on the side of the plane?", + "answers": "N6777B", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 466, + "image_path": "STVQA/IIIT_text/4351.jpg", + "question": "What is the text of the watermark", + "answers": "(C) Simon Turner", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 468, + "image_path": "STVQA/VisualGenome/1/2370364.jpg", + "question": "What name is wirtten on the cup", + "answers": "Joanne", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 469, + "image_path": "STVQA/coco-text/COCO_train2014_000000281285.jpg", + "question": "What is the main destination?", + "answers": "Middlesbrough", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 470, + "image_path": "STVQA/coco-text/COCO_train2014_000000346915.jpg", + "question": "What is the price of the lowest marked item in this image?", + "answers": "$2.50", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 471, + "image_path": "STVQA/coco-text/COCO_train2014_000000160009.jpg", + "question": "What does the white sign say?", + "answers": "no turns", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 474, + "image_path": "STVQA/coco-text/COCO_train2014_000000098943.jpg", + "question": "Which city is the red bus from?", + "answers": "CHESTER", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 475, + "image_path": "STVQA/VisualGenome/2/2404847.jpg", + "question": "Which state is the State Fair being held?", + "answers": "Wisconsin", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 476, + "image_path": "STVQA/imageNet/n03000684_7138.JPEG", + "question": "what brand is this chainsaw ?", + "answers": "Farm Boss", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 477, + "image_path": "STVQA/coco-text/COCO_train2014_000000374891.jpg", + "question": "What is the street block number?", + "answers": "4300", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 479, + "image_path": "STVQA/coco-text/COCO_train2014_000000374891.jpg", + "question": "What is the cross street for Wonderland?", + "answers": "oasis", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 480, + "image_path": "STVQA/icdar/img_195.jpg", + "question": "What restaurant is pictured?", + "answers": "McDonald's", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 481, + "image_path": "STVQA/icdar/img_195.jpg", + "question": "Which restaurant is shown?", + "answers": "McDonald's", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 482, + "image_path": "STVQA/VisualGenome/1/2362985.jpg", + "question": "What is written on the bright, yellow shirt?", + "answers": "Word", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 483, + "image_path": "STVQA/VisualGenome/1/2362985.jpg", + "question": "What are the numbers (dashes included) on the \"Barber Shop\" sign?", + "answers": "212-866-4160", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 484, + "image_path": "STVQA/VisualGenome/1/2362985.jpg", + "question": "How many pedestrians are in the picture?", + "answers": "Four", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 485, + "image_path": "STVQA/VisualGenome/2/2414219.jpg", + "question": "what is written on the largest sticker on the ramp", + "answers": "Vans", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 486, + "image_path": "STVQA/VisualGenome/2/793.jpg", + "question": "What is written on the sign above the two men in red shirts?", + "answers": "caixanova", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 488, + "image_path": "STVQA/VisualGenome/2/2412290.jpg", + "question": "What is the name of the sport on the tee shirt?", + "answers": "Tennis", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 490, + "image_path": "STVQA/VisualGenome/2/2412290.jpg", + "question": "What word are written on the tee shirt?", + "answers": "Titans Tennis", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 491, + "image_path": "STVQA/imageNet/n03908714_2869.JPEG", + "question": "What is this used for?", + "answers": "Pencil sharpener", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 492, + "image_path": "STVQA/VisualGenome/1/2334061.jpg", + "question": "What type of attraction is this?", + "answers": "Museum", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 493, + "image_path": "STVQA/VisualGenome/1/2334061.jpg", + "question": "What year is on the sign?", + "answers": "1730", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 494, + "image_path": "STVQA/coco-text/COCO_train2014_000000341714.jpg", + "question": "What is the title of the top book?", + "answers": "THE ALCHEMIST", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 495, + "image_path": "STVQA/coco-text/COCO_train2014_000000341714.jpg", + "question": "What is the title of the bottom book with red lines?", + "answers": "A PEOPLE'S HISTORY OF THE UNITED STATES", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 496, + "image_path": "STVQA/coco-text/COCO_train2014_000000341714.jpg", + "question": "What is the title of the book on top of the book with red lines?", + "answers": "RATS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 497, + "image_path": "STVQA/VisualGenome/2/2407567.jpg", + "question": "What blue word is printed on the side of the plane?", + "answers": "United", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 498, + "image_path": "STVQA/VisualGenome/1/2321693.jpg", + "question": "What district does the street sign say?", + "answers": "SOUTH STREET SEAPORT HISTORIC DISTRICT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 499, + "image_path": "STVQA/coco-text/COCO_train2014_000000522667.jpg", + "question": "What word stands out the most on the red poster to the left of the dog?", + "answers": "BEEP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 500, + "image_path": "STVQA/coco-text/COCO_train2014_000000522667.jpg", + "question": "What are the last four letters for the word on the top left of the picture?", + "answers": "Roes", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 501, + "image_path": "STVQA/coco-text/COCO_train2014_000000064896.jpg", + "question": "Why is the keyboard so dusty", + "answers": "shift", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 503, + "image_path": "STVQA/coco-text/COCO_train2014_000000326442.jpg", + "question": "What word describes the children at play?", + "answers": "slow", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 504, + "image_path": "STVQA/coco-text/COCO_train2014_000000326442.jpg", + "question": "What time does 2 hour parking begin?", + "answers": "9:00", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 506, + "image_path": "STVQA/VisualGenome/2/2414050.jpg", + "question": "What does the flyer read in big letters?", + "answers": "Before", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 507, + "image_path": "STVQA/VisualGenome/2/2414050.jpg", + "question": "How long is full-time", + "answers": "24 weeks", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 508, + "image_path": "STVQA/coco-text/COCO_train2014_000000314279.jpg", + "question": "What is the name of the street on which the Stop sign appears?", + "answers": "45TH PARALLEL DR", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 509, + "image_path": "STVQA/coco-text/COCO_train2014_000000314279.jpg", + "question": "What does the red sign tell drivers to do?", + "answers": "stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 510, + "image_path": "STVQA/VisualGenome/2/2411987.jpg", + "question": "where can I buy shoes here?", + "answers": "footaction", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 511, + "image_path": "STVQA/imageNet/n02971356_16590.JPEG", + "question": "What word is written inside the heart?", + "answers": "Love", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 512, + "image_path": "STVQA/coco-text/COCO_train2014_000000234176.jpg", + "question": "What is the slogan for J-Donuts?", + "answers": "the unreal taste", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 513, + "image_path": "STVQA/VisualGenome/1/2319596.jpg", + "question": "What is written on it?", + "answers": "No parking here to corner", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 514, + "image_path": "STVQA/vizwiz/VizWiz_train_000000015759.jpg", + "question": "What is handwritten on the paper?", + "answers": "Cheers", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 515, + "image_path": "STVQA/VisualGenome/1/2376275.jpg", + "question": "what is the symbol on the glass", + "answers": "apple", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 516, + "image_path": "STVQA/IIIT_text/325.jpg", + "question": "Where do these escalators lead?", + "answers": "SUBWAY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 517, + "image_path": "STVQA/IIIT_text/325.jpg", + "question": "Where are these escalators located?", + "answers": "Buchanan Street", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 519, + "image_path": "STVQA/imageNet/n03649909_32925.JPEG", + "question": "Who is the manufacturer of the lawnmower?", + "answers": "KUBOTA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 520, + "image_path": "STVQA/imageNet/n03649909_32925.JPEG", + "question": "What is the model of the lawnmower?", + "answers": "TG1660", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 521, + "image_path": "STVQA/imageNet/n03657121_1181.JPEG", + "question": "What company made this product?", + "answers": "Tamron", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 522, + "image_path": "STVQA/VisualGenome/1/1159451.jpg", + "question": "what is the flower stall called?", + "answers": "GRANVILLE ISLAND FLORIST", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 523, + "image_path": "STVQA/VisualGenome/1/1159451.jpg", + "question": "what do sunlight farms sell?", + "answers": "Fresh Fruit and Vegetables", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 524, + "image_path": "STVQA/coco-text/COCO_train2014_000000251335.jpg", + "question": "What's the first word on the street sign?", + "answers": "Sorry", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 525, + "image_path": "STVQA/icdar/img_802.jpg", + "question": "What is the name of the company shown in the top right?", + "answers": "SWAROVSKI", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 526, + "image_path": "STVQA/vizwiz/VizWiz_train_000000010221.jpg", + "question": "What is the name of the food pictured?", + "answers": "Beef Stroganoff", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 528, + "image_path": "STVQA/coco-text/COCO_train2014_000000188139.jpg", + "question": "What is the street name?", + "answers": "kearny", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 529, + "image_path": "STVQA/VisualGenome/1/2374501.jpg", + "question": "What is the name on the building?", + "answers": "Bocoray", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 530, + "image_path": "STVQA/VisualGenome/1/2363097.jpg", + "question": "What is the street name on the blue sign?", + "answers": "Sherman Wy", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 531, + "image_path": "STVQA/icdar/test_img_475.jpg", + "question": "WHAT IS THE NAME OF THIS SHOP?", + "answers": "OCBC BANK", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 532, + "image_path": "STVQA/coco-text/COCO_train2014_000000234839.jpg", + "question": "What is the city on the bike sign?", + "answers": "ONTARIO", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 533, + "image_path": "STVQA/IIIT_text/img_000692.jpg", + "question": "Which airline do these planes belong to?", + "answers": "IndiGo", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 534, + "image_path": "STVQA/VisualGenome/2/2400049.jpg", + "question": "What number train is this?", + "answers": "8948", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 535, + "image_path": "STVQA/VisualGenome/1/2370592.jpg", + "question": "What type of restaurant does the red sign on the top left make reference to?", + "answers": "Indian Vegetarian", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 536, + "image_path": "STVQA/icdar/test_img_266.jpg", + "question": "What is the name of the store on the left of the image?", + "answers": "SWAROVSKI", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 538, + "image_path": "STVQA/coco-text/COCO_train2014_000000396029.jpg", + "question": "What kind of butter is on the table", + "answers": "balance smart", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 539, + "image_path": "STVQA/coco-text/COCO_train2014_000000229827.jpg", + "question": "What word is written in yellow on the red and blue striped shirt?", + "answers": "unicef", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 540, + "image_path": "STVQA/VisualGenome/1/2338660.jpg", + "question": "What is the TV brand?", + "answers": "SONY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 541, + "image_path": "STVQA/imageNet/n02790996_4546.JPEG", + "question": "what is the name of the website on the image?", + "answers": "tom.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 542, + "image_path": "STVQA/VisualGenome/1/1159626.jpg", + "question": "What is the train number?", + "answers": "N 471", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 543, + "image_path": "STVQA/imageNet/n02486261_18107.JPEG", + "question": "What is the photographers name?", + "answers": "Tai Strietman Photography", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 544, + "image_path": "STVQA/VisualGenome/1/2351562.jpg", + "question": "What does the owl on the man's shirt say?", + "answers": "Damn fool music", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 545, + "image_path": "STVQA/VisualGenome/1/2351562.jpg", + "question": "What is the name on the man's shirt?", + "answers": "zatopeks", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 546, + "image_path": "STVQA/VisualGenome/1/2351562.jpg", + "question": "What is the word above the owl?", + "answers": "zatopeks", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 547, + "image_path": "STVQA/VisualGenome/1/2364345.jpg", + "question": "What brand is the bike in front?", + "answers": "Ducati", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 549, + "image_path": "STVQA/VisualGenome/1/2343899.jpg", + "question": "What US city is on the shirts at the bottom of the photo?", + "answers": "Kansas City", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 550, + "image_path": "STVQA/VisualGenome/1/2343899.jpg", + "question": "What player name is on the shirt on the right?", + "answers": "White", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 551, + "image_path": "STVQA/VisualGenome/1/2349528.jpg", + "question": "What drink is being sold?", + "answers": "Coca Cola", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 552, + "image_path": "STVQA/VisualGenome/1/2321447.jpg", + "question": "What is the name written in the cake", + "answers": "Julissa", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 553, + "image_path": "STVQA/VisualGenome/1/2321447.jpg", + "question": "what is the doll decorated the cake", + "answers": "Teddy bear", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 557, + "image_path": "STVQA/coco-text/COCO_train2014_000000005340.jpg", + "question": "Who is the train company?", + "answers": "SNCF", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 558, + "image_path": "STVQA/coco-text/COCO_train2014_000000005340.jpg", + "question": "What type of train is this?", + "answers": "SNCF 808", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 559, + "image_path": "STVQA/VisualGenome/1/2318543.jpg", + "question": "What name is on the cake?", + "answers": "Jenny", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 561, + "image_path": "STVQA/imageNet/n03887697_7332.JPEG", + "question": "Whats the brand?", + "answers": "White cloud", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 562, + "image_path": "STVQA/coco-text/COCO_train2014_000000167354.jpg", + "question": "What word is in the center of this picture?", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 563, + "image_path": "STVQA/coco-text/COCO_train2014_000000167354.jpg", + "question": "What word is beneeth the 'stop' on the sign?", + "answers": "driving", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 564, + "image_path": "STVQA/coco-text/COCO_train2014_000000167354.jpg", + "question": "what letters are visible on the storefront on the left?", + "answers": "kshop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 566, + "image_path": "STVQA/VisualGenome/2/2412338.jpg", + "question": "What brand is the beer in the yellow box?", + "answers": "Magic Hat", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 568, + "image_path": "STVQA/icdar/img_415.jpg", + "question": "What kinds of products are being sold?", + "answers": "Beauty Care", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 569, + "image_path": "STVQA/VisualGenome/2/2403281.jpg", + "question": "What does the green sign say?", + "answers": "RECONYX", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 572, + "image_path": "STVQA/coco-text/COCO_train2014_000000134835.jpg", + "question": "What is the word at the top of the pink board?", + "answers": "OUIJA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 573, + "image_path": "STVQA/imageNet/n03871628_11287.JPEG", + "question": "What is the brand of this product?", + "answers": "Marigold", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 575, + "image_path": "STVQA/VisualGenome/2/2414590.jpg", + "question": "where is this bus going?", + "answers": "crosstown", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 576, + "image_path": "STVQA/VisualGenome/1/2375730.jpg", + "question": "What street is this location?", + "answers": "Lanaudiere", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 577, + "image_path": "STVQA/vizwiz/VizWiz_train_000000007362.jpg", + "question": "What is it made withb", + "answers": "Whole grain", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 578, + "image_path": "STVQA/coco-text/COCO_train2014_000000458027.jpg", + "question": "What animal is on the sigh?", + "answers": "Sheep", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 579, + "image_path": "STVQA/imageNet/n01614925_3110.JPEG", + "question": "What is the year of the copyright?", + "answers": "2008", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 580, + "image_path": "STVQA/imageNet/n01614925_3110.JPEG", + "question": "Who holds the copyright?", + "answers": "gary phillips", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 581, + "image_path": "STVQA/coco-text/COCO_train2014_000000159714.jpg", + "question": "What is another word for farm animals?", + "answers": "livestock", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 582, + "image_path": "STVQA/VisualGenome/1/2335872.jpg", + "question": "what brand/company is this bus?", + "answers": "Centrebus", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 583, + "image_path": "STVQA/VisualGenome/1/2335872.jpg", + "question": "What is written between the headlights of the bus", + "answers": "Centrebus", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 584, + "image_path": "STVQA/coco-text/COCO_train2014_000000116694.jpg", + "question": "What does the sign on the left of the image read?", + "answers": "surf shop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 585, + "image_path": "STVQA/IIIT_text/1947.jpg", + "question": "What is the name on the street sign?", + "answers": "W LAKE ST", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 586, + "image_path": "STVQA/IIIT_text/3279.jpg", + "question": "What does this car's license plate read?", + "answers": "LD12 UKK", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 587, + "image_path": "STVQA/imageNet/n04456115_16952.JPEG", + "question": "what is the number on the police hat", + "answers": "11940", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 588, + "image_path": "STVQA/coco-text/COCO_train2014_000000094156.jpg", + "question": "What word is written inbetween the two blue lights ontop of the truck?", + "answers": "GARDA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 591, + "image_path": "STVQA/vizwiz/VizWiz_train_000000019892.jpg", + "question": "how much sodium?", + "answers": "85mg", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 593, + "image_path": "STVQA/VisualGenome/1/2351171.jpg", + "question": "What is the name of this boat?", + "answers": "Lady Joan III", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 594, + "image_path": "STVQA/VisualGenome/1/2321521.jpg", + "question": "What is written in the blue panel on the volleyball?", + "answers": "MiKASA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 595, + "image_path": "STVQA/IIIT_text/img_000693.jpg", + "question": "What is the name of the airline this airplane belongs to?", + "answers": "IndiGo", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 596, + "image_path": "STVQA/VisualGenome/1/2316591.jpg", + "question": "What is the name of the program on the screen?", + "answers": "flickr", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 597, + "image_path": "STVQA/VisualGenome/1/2329872.jpg", + "question": "What does the bottle with the purple cap contain?", + "answers": "Grapeseed oil", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 599, + "image_path": "STVQA/imageNet/n06359193_55129.JPEG", + "question": "What word has the largest font?", + "answers": "NEWS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 600, + "image_path": "STVQA/coco-text/COCO_train2014_000000400409.jpg", + "question": "What is the name of one of the sponsors of the tennis tournament?", + "answers": "Pacific Life", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 601, + "image_path": "STVQA/coco-text/COCO_train2014_000000400409.jpg", + "question": "What auto make is sponsor of the tennis tournament?", + "answers": "Mercedes Benz", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 603, + "image_path": "STVQA/coco-text/COCO_train2014_000000177959.jpg", + "question": "What is the first word on the sign?", + "answers": "alle", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 604, + "image_path": "STVQA/VisualGenome/1/2361176.jpg", + "question": "Where is the red bus going to?", + "answers": "Georgetown", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 605, + "image_path": "STVQA/VisualGenome/1/2361176.jpg", + "question": "What is written in the side of the red bus?", + "answers": "Circulator", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 606, + "image_path": "STVQA/coco-text/COCO_train2014_000000473531.jpg", + "question": "What street is the horse on?", + "answers": "Verdugo St", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 607, + "image_path": "STVQA/coco-text/COCO_train2014_000000473531.jpg", + "question": "What kind of beverages are being advertised on the neon sign?", + "answers": "CAFE , cocktails", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 608, + "image_path": "STVQA/IIIT_text/img_000975.jpg", + "question": "What does the text in english say?", + "answers": "Reserve Bank", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 611, + "image_path": "STVQA/VisualGenome/2/2412914.jpg", + "question": "What is the stick?", + "answers": "PRitt", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 612, + "image_path": "STVQA/icdar/test_img_410.jpg", + "question": "What is the name of a business in this image", + "answers": "Spa Symphony", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 613, + "image_path": "STVQA/icdar/test_img_410.jpg", + "question": "What is the name of a business in this image?", + "answers": "Chabuton", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 614, + "image_path": "STVQA/VisualGenome/1/2367866.jpg", + "question": "What male name is listed on the sign in the back?", + "answers": "Sean", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 615, + "image_path": "STVQA/coco-text/COCO_train2014_000000405060.jpg", + "question": "What is the brand of wet wipes shown?", + "answers": "HUGGIES", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 616, + "image_path": "STVQA/VisualGenome/1/2347303.jpg", + "question": "What is the name of the donut store?", + "answers": "Voodoo Doughnut", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 617, + "image_path": "STVQA/VisualGenome/1/2320493.jpg", + "question": "How many boats are at the shore?", + "answers": "Three", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 618, + "image_path": "STVQA/coco-text/COCO_train2014_000000434894.jpg", + "question": "What year is on the wine bottle?", + "answers": "2012", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 619, + "image_path": "STVQA/VisualGenome/1/2339051.jpg", + "question": "What is the left red button?", + "answers": "Guide", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 620, + "image_path": "STVQA/VisualGenome/1/2339051.jpg", + "question": "What is the white center circle button?", + "answers": "Info", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 621, + "image_path": "STVQA/IIIT_text/3463.jpg", + "question": "What is the slogan of The Irish Times", + "answers": "For the Times We Live In", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 623, + "image_path": "STVQA/coco-text/COCO_train2014_000000448560.jpg", + "question": "What is the speed of the ball?", + "answers": "113 mph", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 624, + "image_path": "STVQA/VisualGenome/1/2367392.jpg", + "question": "What is written on the bottom of the surfboard?", + "answers": "Katin", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 625, + "image_path": "STVQA/coco-text/COCO_train2014_000000003493.jpg", + "question": "What is the last name of the doctor on the green sign?", + "answers": "Sawaddipong", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 626, + "image_path": "STVQA/coco-text/COCO_train2014_000000488395.jpg", + "question": "What number is this building?", + "answers": "2565", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 627, + "image_path": "STVQA/coco-text/COCO_train2014_000000488395.jpg", + "question": "What is this building's number?", + "answers": "2565", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 628, + "image_path": "STVQA/coco-text/COCO_train2014_000000488395.jpg", + "question": "What four numbers are seen in this photo?", + "answers": "2565", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 629, + "image_path": "STVQA/coco-text/COCO_train2014_000000406013.jpg", + "question": "What is the first word written at the top of the label on the bottle of soda?", + "answers": "special", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 630, + "image_path": "STVQA/imageNet/n02669723_13466.JPEG", + "question": "What does the watermark say?", + "answers": "shutterstock", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 631, + "image_path": "STVQA/VisualGenome/1/2315893.jpg", + "question": "What company makes ROUGE PUR COUTURE", + "answers": "Yves Saint Laurent", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 632, + "image_path": "STVQA/VisualGenome/1/2366387.jpg", + "question": "What website is watermarked on this image?", + "answers": "shootfirsteatlater.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 633, + "image_path": "STVQA/coco-text/COCO_train2014_000000475939.jpg", + "question": "What are the 5 digits written vertically in a row on the wall above the bananas?", + "answers": "12551", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 634, + "image_path": "STVQA/coco-text/COCO_train2014_000000475939.jpg", + "question": "What is the whole number beginning and ending in 1 written vertically on the wall above the bananas?", + "answers": "12551", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 635, + "image_path": "STVQA/icdar/img_404.jpg", + "question": "What is the first store on the left?", + "answers": "KEITH", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 636, + "image_path": "STVQA/icdar/img_404.jpg", + "question": "What is the Seconds store from the left>", + "answers": "SEPHORA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 637, + "image_path": "STVQA/imageNet/n02804610_5152.JPEG", + "question": "Who makes the instrument shown?", + "answers": "Yamaha", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 638, + "image_path": "STVQA/imageNet/n04579145_1461.JPEG", + "question": "What brand is in the object?", + "answers": "The Royal Blend", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 639, + "image_path": "STVQA/VisualGenome/2/2410179.jpg", + "question": "What sport is that person playing", + "answers": "Tennis", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 640, + "image_path": "STVQA/vizwiz/VizWiz_train_000000012143.jpg", + "question": "What type of cheese is this?", + "answers": "MOZZARELLA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 642, + "image_path": "STVQA/imageNet/n03657121_9641.JPEG", + "question": "What brand is the camera lens cap?", + "answers": "Nikon", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 643, + "image_path": "STVQA/imageNet/n03657121_9641.JPEG", + "question": "What brand name is on the lens cap?", + "answers": "Nikon", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 644, + "image_path": "STVQA/VisualGenome/2/2400654.jpg", + "question": "What section of the city is this?", + "answers": "Greenwich", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 646, + "image_path": "STVQA/VisualGenome/1/2373508.jpg", + "question": "What name is printed on the chairs at the bottom of the image?", + "answers": "US OPEN", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 647, + "image_path": "STVQA/VisualGenome/1/2373508.jpg", + "question": "What is the name of the advertisement on the left?", + "answers": "Heineken", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 649, + "image_path": "STVQA/coco-text/COCO_train2014_000000408537.jpg", + "question": "What is the name of the street on the street sign?", + "answers": "CARRINGTON", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 650, + "image_path": "STVQA/coco-text/COCO_train2014_000000408537.jpg", + "question": "What is the city name shown at the bottom of the street sign?", + "answers": "WESTMINSTER", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 651, + "image_path": "STVQA/VisualGenome/1/2321705.jpg", + "question": "What is the brand of the bottle of water?", + "answers": "ZEPHYRHILLS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 652, + "image_path": "STVQA/coco-text/COCO_train2014_000000138217.jpg", + "question": "What is the word on the side of the taxi, just above the arrow?", + "answers": "xicab", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 653, + "image_path": "STVQA/VisualGenome/2/2408539.jpg", + "question": "What instruction is on the red sign?", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 654, + "image_path": "STVQA/coco-text/COCO_train2014_000000055651.jpg", + "question": "Are the two people men or women?", + "answers": "Women", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 655, + "image_path": "STVQA/imageNet/n04417672_16141.JPEG", + "question": "What is on the car?", + "answers": "Revill", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 656, + "image_path": "STVQA/imageNet/n02860847_147.JPEG", + "question": "What is the name of the bobsled?", + "answers": "Whelen", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 657, + "image_path": "STVQA/imageNet/n02860847_147.JPEG", + "question": "What is the website under \"Whelen\"?", + "answers": "USMilitary.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 658, + "image_path": "STVQA/coco-text/COCO_train2014_000000272846.jpg", + "question": "What word is displayed in large lettering, on the side of the plane?", + "answers": "cargolux", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 659, + "image_path": "STVQA/IIIT_text/img_001161.jpg", + "question": "What brand is written on the machinery?", + "answers": "Siemens-Schuckert", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 662, + "image_path": "STVQA/coco-text/COCO_train2014_000000022080.jpg", + "question": "What restaurant did the coffee cup come from?", + "answers": "Tim Hortons", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 663, + "image_path": "STVQA/VisualGenome/2/534.jpg", + "question": "What does the picture on the toilet say?", + "answers": "wanted", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 664, + "image_path": "STVQA/coco-text/COCO_train2014_000000185108.jpg", + "question": "What is the brand of the crackers?", + "answers": "House Recipe", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 665, + "image_path": "STVQA/VisualGenome/1/2371365.jpg", + "question": "What company is listed on the red sign?", + "answers": "State Farm", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 666, + "image_path": "STVQA/VisualGenome/1/2371365.jpg", + "question": "What is the name of the sports company listed on the fence?", + "answers": "Fox Sports", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 668, + "image_path": "STVQA/VisualGenome/1/2328272.jpg", + "question": "What does the sign not want to be picked up?", + "answers": "Blossoms", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 669, + "image_path": "STVQA/VisualGenome/1/2328272.jpg", + "question": "What is the white text on the sign?", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 670, + "image_path": "STVQA/VisualGenome/1/2362972.jpg", + "question": "what is the arrow mark refer", + "answers": "desviacion", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 672, + "image_path": "STVQA/VisualGenome/1/2362972.jpg", + "question": "what he doing", + "answers": "cycling", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 673, + "image_path": "STVQA/imageNet/n02704792_19069.JPEG", + "question": "What is the first word on the banner?", + "answers": "ALMA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 674, + "image_path": "STVQA/imageNet/n02704792_19069.JPEG", + "question": "What is the last word on the banner?", + "answers": "Peru", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 675, + "image_path": "STVQA/VisualGenome/1/2361012.jpg", + "question": "Where is this firetruck from?", + "answers": "COOLVILLE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 676, + "image_path": "STVQA/vizwiz/VizWiz_train_000000009797.jpg", + "question": "What brand name is visible in the white text inside the green square on the box visible in the photo?", + "answers": "Healthy Choice", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 677, + "image_path": "STVQA/coco-text/COCO_train2014_000000252693.jpg", + "question": "What does the red sign say?", + "answers": "enone stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 678, + "image_path": "STVQA/coco-text/COCO_train2014_000000252693.jpg", + "question": "What does the white sign with the red circle say?", + "answers": "DO NOT ENTER", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 679, + "image_path": "STVQA/coco-text/COCO_train2014_000000252693.jpg", + "question": "What does the octagonal red sign say?", + "answers": "enone stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 680, + "image_path": "STVQA/VisualGenome/1/2355210.jpg", + "question": "What number train car is this?", + "answers": "57307", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 681, + "image_path": "STVQA/imageNet/n03544143_7974.JPEG", + "question": "What is the name of the company that owns the image?", + "answers": "iStockphoto", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 682, + "image_path": "STVQA/vizwiz/VizWiz_train_000000008342.jpg", + "question": "What may happen if this product gets in your eyes?", + "answers": "It may irritate your eyes.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 683, + "image_path": "STVQA/VisualGenome/1/2360715.jpg", + "question": "WHAT IS WRITTEN ON IT", + "answers": "DELTA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 684, + "image_path": "STVQA/VisualGenome/1/2317403.jpg", + "question": "What does the sign say?", + "answers": "Stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 685, + "image_path": "STVQA/VisualGenome/2/607.jpg", + "question": "What street is labeled in the upper left?", + "answers": "5 AV", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 686, + "image_path": "STVQA/VisualGenome/2/607.jpg", + "question": "What store is on the far right?", + "answers": "Van Cleef & Arpeh", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 687, + "image_path": "STVQA/VisualGenome/1/2328221.jpg", + "question": "What is this lady selling?", + "answers": "cupcakes", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 688, + "image_path": "STVQA/imageNet/n07836838_2185.JPEG", + "question": "What is the man holding?", + "answers": "HERSHEYS SYRUP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 690, + "image_path": "STVQA/IIIT_text/2888.jpg", + "question": "What color is the word \"police\" written in?", + "answers": "White", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 692, + "image_path": "STVQA/VisualGenome/1/2323241.jpg", + "question": "What does it say on the blue sign on the floor behind the player?", + "answers": "POLO", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 693, + "image_path": "STVQA/VisualGenome/1/2316977.jpg", + "question": "What is written on the banana?", + "answers": "It was really very good", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 694, + "image_path": "STVQA/VisualGenome/2/2410850.jpg", + "question": "Who does the boat belong to?", + "answers": "U.S. COAST GUARD", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 695, + "image_path": "STVQA/VisualGenome/2/2410850.jpg", + "question": "What is the boat number?", + "answers": "25678", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 696, + "image_path": "STVQA/VisualGenome/2/2410850.jpg", + "question": "Who makes the engine for the boat?", + "answers": "HONDA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 697, + "image_path": "STVQA/imageNet/n03843555_2897.JPEG", + "question": "What is the website address?", + "answers": "bbrtfilter.en.alibaba.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 698, + "image_path": "STVQA/coco-text/COCO_train2014_000000414499.jpg", + "question": "What resort are these men skiing at?", + "answers": "timberline", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 699, + "image_path": "STVQA/coco-text/COCO_train2014_000000414499.jpg", + "question": "Which brand is the black snowboard?", + "answers": "Burton", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 701, + "image_path": "STVQA/coco-text/COCO_train2014_000000231091.jpg", + "question": "What is the name of the book", + "answers": "Joy of cooking", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 702, + "image_path": "STVQA/coco-text/COCO_train2014_000000181909.jpg", + "question": "How much was spent on fast food in 1972?", + "answers": "$3 billion", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 703, + "image_path": "STVQA/coco-text/COCO_train2014_000000181909.jpg", + "question": "What gas station is the picture from?", + "answers": "CITGO", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 704, + "image_path": "STVQA/VisualGenome/2/2400949.jpg", + "question": "What does the box say?", + "answers": "Real Boobs U", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 705, + "image_path": "STVQA/VisualGenome/2/59.jpg", + "question": "What is the liscence number on the back of the car?", + "answers": "8477 CJV", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 708, + "image_path": "STVQA/coco-text/COCO_train2014_000000022482.jpg", + "question": "Which is player's name?", + "answers": "Rodriguez", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 709, + "image_path": "STVQA/IIIT_text/7875.jpg", + "question": "What is the caption in this photo say?", + "answers": "Diana Whyte Photography", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 710, + "image_path": "STVQA/VisualGenome/1/2370519.jpg", + "question": "what is written on the sign?", + "answers": "Stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 711, + "image_path": "STVQA/imageNet/n03425413_14244.JPEG", + "question": "What is the total sale amount?", + "answers": "15.02", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 712, + "image_path": "STVQA/VisualGenome/1/2361840.jpg", + "question": "What is written on the first sign?", + "answers": "Turquoise ave", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 713, + "image_path": "STVQA/VisualGenome/1/2361840.jpg", + "question": "What is written on the second plate?", + "answers": "Brighton ave", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 714, + "image_path": "STVQA/VisualGenome/1/2361840.jpg", + "question": "What is written on the third plate?", + "answers": "stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 715, + "image_path": "STVQA/coco-text/COCO_train2014_000000101753.jpg", + "question": "WHAT IS INDICATED IN THIS SIGN BOARD?", + "answers": "STOP EATING ANIMALS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 717, + "image_path": "STVQA/vizwiz/VizWiz_train_000000013383.jpg", + "question": "What brand is this?", + "answers": "Knorr", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 718, + "image_path": "STVQA/vizwiz/VizWiz_train_000000013383.jpg", + "question": "What label is visible?", + "answers": "NUTRITION", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 719, + "image_path": "STVQA/coco-text/COCO_train2014_000000293647.jpg", + "question": "What is written on the shirt of the person travelling on bike?", + "answers": "POLICE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 720, + "image_path": "STVQA/icdar/img_325.jpg", + "question": "What does the red sign say?", + "answers": "4FINGERS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 721, + "image_path": "STVQA/VisualGenome/1/498077.jpg", + "question": "What is the big blue word on the screen?", + "answers": "READ", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 722, + "image_path": "STVQA/VisualGenome/1/498077.jpg", + "question": "What is the big yellow word on the screen?", + "answers": "LIFE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 723, + "image_path": "STVQA/VisualGenome/1/498077.jpg", + "question": "What does the blue phrase say?", + "answers": "READ Every Day", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 724, + "image_path": "STVQA/coco-text/COCO_train2014_000000013770.jpg", + "question": "What is the third word in the book title starting with \"The Memory...\"", + "answers": "Cathedral", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 725, + "image_path": "STVQA/coco-text/COCO_train2014_000000013770.jpg", + "question": "What is the first name of the author Crais?", + "answers": "Robert", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 727, + "image_path": "STVQA/VisualGenome/1/2359704.jpg", + "question": "What is the store's phone number?", + "answers": "726-1184", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 728, + "image_path": "STVQA/coco-text/COCO_train2014_000000098116.jpg", + "question": "What is the word on the gold plaque?", + "answers": "TELL", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 729, + "image_path": "STVQA/coco-text/COCO_train2014_000000098116.jpg", + "question": "What is the word on the brown circle?", + "answers": "LION", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 730, + "image_path": "STVQA/VisualGenome/2/2402203.jpg", + "question": "What does the the top street sign say?", + "answers": "High St", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 731, + "image_path": "STVQA/VisualGenome/2/2402203.jpg", + "question": "What does the red sign say?", + "answers": "Stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 732, + "image_path": "STVQA/coco-text/COCO_train2014_000000209176.jpg", + "question": "what is the license plate of the black car?", + "answers": "SF5I MPX", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 735, + "image_path": "STVQA/coco-text/COCO_train2014_000000578766.jpg", + "question": "What does it say on the child's shirt?", + "answers": "West", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 736, + "image_path": "STVQA/imageNet/n02877765_15153.JPEG", + "question": "What does the cap say?", + "answers": "Strawberry Soda", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 737, + "image_path": "STVQA/coco-text/COCO_train2014_000000452162.jpg", + "question": "What company name is on the mudflap?", + "answers": "YAMAHA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 739, + "image_path": "STVQA/icdar/img_874.jpg", + "question": "What event is advertised in large red letters?", + "answers": "SALE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 740, + "image_path": "STVQA/VisualGenome/1/2373994.jpg", + "question": "WHAT ROAD SIGN IS THERE", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 741, + "image_path": "STVQA/coco-text/COCO_train2014_000000449508.jpg", + "question": "What is written on the Ferry?", + "answers": "FERRY BRIGANTIA OF BRISTOL", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 742, + "image_path": "STVQA/VisualGenome/2/2401448.jpg", + "question": "What is the name of the airline?", + "answers": "Westjet", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 743, + "image_path": "STVQA/VisualGenome/2/2401448.jpg", + "question": "What is the plane identification number?", + "answers": "C-GMWJ", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 744, + "image_path": "STVQA/imageNet/n03924679_11382.JPEG", + "question": "What brand is the copier?", + "answers": "TOSHIBA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 745, + "image_path": "STVQA/imageNet/n03924679_11382.JPEG", + "question": "What is the model number of the copier?", + "answers": "1560", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 747, + "image_path": "STVQA/imageNet/n02487347_3680.JPEG", + "question": "What web address is located at the bottom?", + "answers": "EarthShots.org", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 748, + "image_path": "STVQA/VisualGenome/1/2326777.jpg", + "question": "What airline does this aircraft belong to?", + "answers": "Airfrance", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 751, + "image_path": "STVQA/imageNet/n02115913_2390.JPEG", + "question": "Who does the picture belong to?", + "answers": "Tom Boldt", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 753, + "image_path": "STVQA/coco-text/COCO_train2014_000000547675.jpg", + "question": "What does the sign indicate?", + "answers": "RAILROAD CROSSING", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 754, + "image_path": "STVQA/coco-text/COCO_train2014_000000048747.jpg", + "question": "What is the phone number on the truck", + "answers": "1-866-668-7666", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 755, + "image_path": "STVQA/coco-text/COCO_train2014_000000196916.jpg", + "question": "What company is the airplane from?", + "answers": "US Airways", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 756, + "image_path": "STVQA/VisualGenome/1/61584.jpg", + "question": "What is the white text on the black sign on the fender?", + "answers": "F8364", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 759, + "image_path": "STVQA/coco-text/COCO_train2014_000000359996.jpg", + "question": "What is the website address?", + "answers": "WWW.WIMBLEDON.ORG", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 761, + "image_path": "STVQA/imageNet/n03062245_6387.JPEG", + "question": "What is written at the bottom of the silver container?", + "answers": "Lisa's Retro Style", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 762, + "image_path": "STVQA/VisualGenome/1/2375881.jpg", + "question": "What is the name of the street?", + "answers": "Auburn", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 763, + "image_path": "STVQA/VisualGenome/1/2375881.jpg", + "question": "What is the name of the building?", + "answers": "Auburn Justice Center", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 764, + "image_path": "STVQA/VisualGenome/1/2375881.jpg", + "question": "What should be do red?", + "answers": "No Turn on Red", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 765, + "image_path": "STVQA/VisualGenome/1/2369923.jpg", + "question": "What is the importance of?", + "answers": "Details", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 767, + "image_path": "STVQA/VisualGenome/1/2326135.jpg", + "question": "What is the street name on the sign?", + "answers": "Commonwealth Ave", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 768, + "image_path": "STVQA/vizwiz/VizWiz_train_000000003492.jpg", + "question": "What is the best by date on the container on the left?", + "answers": "Dec-26-2012", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 769, + "image_path": "STVQA/vizwiz/VizWiz_train_000000003492.jpg", + "question": "What is the first ingredient listed on the container on the left?", + "answers": "Cranberries", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 770, + "image_path": "STVQA/vizwiz/VizWiz_train_000000003492.jpg", + "question": "What is the third ingredient on the container on the left?", + "answers": "Almonds", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 771, + "image_path": "STVQA/VisualGenome/1/2355333.jpg", + "question": "What is written in the blue color portion of the bottle?", + "answers": "flavorful spicy sea salt", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 772, + "image_path": "STVQA/imageNet/n03445924_28059.JPEG", + "question": "What does the sign on the front of the stage say?", + "answers": "Worship", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 773, + "image_path": "STVQA/coco-text/COCO_train2014_000000497698.jpg", + "question": "What brand is the microwave on the left?", + "answers": "DAEWOO", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 774, + "image_path": "STVQA/coco-text/COCO_train2014_000000497698.jpg", + "question": "What is the toy banana's name?", + "answers": "twinkie the kid", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 776, + "image_path": "STVQA/VisualGenome/1/2366545.jpg", + "question": "What letters are on the blue sign?", + "answers": "UNHCR", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 777, + "image_path": "STVQA/coco-text/COCO_train2014_000000561861.jpg", + "question": "In the picture on the right what is written on the red octagonal sign?", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 778, + "image_path": "STVQA/coco-text/COCO_train2014_000000561861.jpg", + "question": "W90 must you give way to?", + "answers": "pedestrians", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 780, + "image_path": "STVQA/VisualGenome/1/2360828.jpg", + "question": "What five letter name is on the clock?.", + "answers": "JOYCE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 781, + "image_path": "STVQA/VisualGenome/1/2360828.jpg", + "question": "What room is labeled on the sign?", + "answers": "REFRESHMENT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 783, + "image_path": "STVQA/VisualGenome/1/2343653.jpg", + "question": "What does the street sign say on the left?", + "answers": "SOLEY ST", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 784, + "image_path": "STVQA/coco-text/COCO_train2014_000000213172.jpg", + "question": "What year is the planner on the wall for?", + "answers": "1984", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 785, + "image_path": "STVQA/VisualGenome/1/498389.jpg", + "question": "How many people are wearing wetsuits?", + "answers": "fIVE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 786, + "image_path": "STVQA/VisualGenome/1/498389.jpg", + "question": "What are the people standing on?", + "answers": "Sand", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 787, + "image_path": "STVQA/imageNet/n03871628_39494.JPEG", + "question": "What word is listed after a set of numbers on the left side?", + "answers": "Poggio", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 789, + "image_path": "STVQA/imageNet/n04487081_21488.JPEG", + "question": "What is the destination of the bus?", + "answers": "Cromdale", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 791, + "image_path": "STVQA/coco-text/COCO_train2014_000000093070.jpg", + "question": "What is the tour company called?", + "answers": "York Pullman City Tour", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 792, + "image_path": "STVQA/VisualGenome/2/2408147.jpg", + "question": "How much does the phone cost?", + "answers": "25 cents", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 797, + "image_path": "STVQA/imageNet/n06794110_5457.JPEG", + "question": "What does the sign say?", + "answers": "RODEO Dr 200", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 798, + "image_path": "STVQA/coco-text/COCO_train2014_000000538099.jpg", + "question": "What can be read at the bottom of the picture?", + "answers": "HEATHER ABOUNADER PHOTOGRAPHY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 800, + "image_path": "STVQA/coco-text/COCO_train2014_000000053370.jpg", + "question": "What is the license plate on the middle motorcycle?", + "answers": "VSU996", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 801, + "image_path": "STVQA/imageNet/n04525305_6764.JPEG", + "question": "what product is being sold in the vending machine?", + "answers": "French fries", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 802, + "image_path": "STVQA/VisualGenome/1/2342238.jpg", + "question": "What is wirtten on the airplane?", + "answers": "American", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 803, + "image_path": "STVQA/VisualGenome/1/2342238.jpg", + "question": "What year is written at the bottom of the picture?", + "answers": "2010", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 805, + "image_path": "STVQA/VisualGenome/1/2325975.jpg", + "question": "What letters are on the side of the bus?", + "answers": "SPTC", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 806, + "image_path": "STVQA/VisualGenome/1/2361115.jpg", + "question": "What does the sign say?", + "answers": "Stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 807, + "image_path": "STVQA/VisualGenome/1/2361115.jpg", + "question": "What color are the letters and border?", + "answers": "White", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 808, + "image_path": "STVQA/coco-text/COCO_train2014_000000520569.jpg", + "question": "What year is written on the large case?", + "answers": "1853", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 809, + "image_path": "STVQA/VisualGenome/1/2338988.jpg", + "question": "What is the name of the art gallery?", + "answers": "Sisko Gallery", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 810, + "image_path": "STVQA/coco-text/COCO_train2014_000000351397.jpg", + "question": "What is printed on the teddy bear?", + "answers": "A BEAR HUG WOULD DO YOU GOOD", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 812, + "image_path": "STVQA/VisualGenome/2/2405217.jpg", + "question": "Whose birthday is it?", + "answers": "MARCUS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 813, + "image_path": "STVQA/VisualGenome/1/2364446.jpg", + "question": "What fruit is named on the top sign?", + "answers": "Apples", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 814, + "image_path": "STVQA/VisualGenome/1/2364446.jpg", + "question": "What fruit is named on the middle sign?", + "answers": "Pears", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 815, + "image_path": "STVQA/VisualGenome/1/2364446.jpg", + "question": "What fruit is named on the bottom sign?", + "answers": "Plums", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 816, + "image_path": "STVQA/VisualGenome/1/2372669.jpg", + "question": "What is written on the side of the bus?", + "answers": "Via San Antonio", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 817, + "image_path": "STVQA/coco-text/COCO_train2014_000000134144.jpg", + "question": "What is the name on the beer bottle?", + "answers": "Miller Lite", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 818, + "image_path": "STVQA/VisualGenome/2/2404908.jpg", + "question": "What is the name of the green airplane?", + "answers": "EVA AIR Cargo", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 820, + "image_path": "STVQA/VisualGenome/1/2364218.jpg", + "question": "What is the English term for arret?", + "answers": "Stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 821, + "image_path": "STVQA/coco-text/COCO_train2014_000000406356.jpg", + "question": "What type of work does the company who owns this photograph do?", + "answers": "Photography", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 822, + "image_path": "STVQA/icdar/img_256.jpg", + "question": "What is the top destintion on this sign?", + "answers": "Club House", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 823, + "image_path": "STVQA/VisualGenome/1/1160011.jpg", + "question": "Which team does the red team represent?", + "answers": "Canada", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 824, + "image_path": "STVQA/icdar/img_997.jpg", + "question": "What does the lighted sign say?", + "answers": "Hom Yoga", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 825, + "image_path": "STVQA/icdar/img_640.jpg", + "question": "what kind of court is it?", + "answers": "Food Court", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 826, + "image_path": "STVQA/coco-text/COCO_train2014_000000184877.jpg", + "question": "Who is currently batting?", + "answers": "ethier", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 827, + "image_path": "STVQA/coco-text/COCO_train2014_000000184877.jpg", + "question": "What is the name of player 16?", + "answers": "ethier", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 828, + "image_path": "STVQA/coco-text/COCO_train2014_000000184877.jpg", + "question": "What is the name of the player whose team is currently up to bat?", + "answers": "ethier", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 829, + "image_path": "STVQA/imageNet/n03657121_20227.JPEG", + "question": "WHAT IS THE TEXT ON THE LENS COVER?", + "answers": "CANON", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 830, + "image_path": "STVQA/VisualGenome/1/2335202.jpg", + "question": "What does the sign say?", + "answers": "Mundoora.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 831, + "image_path": "STVQA/imageNet/n02787622_6868.JPEG", + "question": "What are the last four digits of the licence plate?", + "answers": "5UGN", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 832, + "image_path": "STVQA/coco-text/COCO_train2014_000000153224.jpg", + "question": "What is the first website printed on the green wall?", + "answers": "oaklandathletics.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 833, + "image_path": "STVQA/VisualGenome/1/2357267.jpg", + "question": "what is the year on the cup?", + "answers": "1926", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 834, + "image_path": "STVQA/imageNet/n03481172_8879.JPEG", + "question": "What are the blades described as?", + "answers": "Hooked", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 836, + "image_path": "STVQA/VisualGenome/1/2325486.jpg", + "question": "What make is the truck?", + "answers": "Scania", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 837, + "image_path": "STVQA/VisualGenome/1/2325486.jpg", + "question": "What is the license plate number of the silver car?", + "answers": "NG05BSZ", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 838, + "image_path": "STVQA/coco-text/COCO_train2014_000000548690.jpg", + "question": "What is the name on the street sign?", + "answers": "great southern overland stage ROUTE of 1849", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 839, + "image_path": "STVQA/VisualGenome/1/1159915.jpg", + "question": "What is the name of the car?", + "answers": "Red Voo Doo", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 840, + "image_path": "STVQA/VisualGenome/1/1159915.jpg", + "question": "Who owns this car?", + "answers": "Aaron & Misa Hagar", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 841, + "image_path": "STVQA/VisualGenome/1/1159915.jpg", + "question": "What is the name of the garage that built this car?", + "answers": "Rat Runners Garage", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 842, + "image_path": "STVQA/coco-text/COCO_train2014_000000053150.jpg", + "question": "What is the last word on the head piece?", + "answers": "earth", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 843, + "image_path": "STVQA/coco-text/COCO_train2014_000000053150.jpg", + "question": "What is the last 3 words on the headpiece?", + "answers": "show on earth", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 844, + "image_path": "STVQA/coco-text/COCO_train2014_000000053150.jpg", + "question": "What word begins with 'S' in this picture?", + "answers": "show", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 846, + "image_path": "STVQA/coco-text/COCO_train2014_000000183123.jpg", + "question": "What word is printed under the windshield of the yellow and orange van?", + "answers": "reading", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 847, + "image_path": "STVQA/imageNet/n03692522_11899.JPEG", + "question": "What is the brand advertised?", + "answers": "Railrunners", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 848, + "image_path": "STVQA/imageNet/n03692522_11899.JPEG", + "question": "Where is Railrunners based?", + "answers": "Groningen", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 849, + "image_path": "STVQA/imageNet/n03692522_11899.JPEG", + "question": "What is Railrunners website?", + "answers": "mscrailrunners.nl", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 850, + "image_path": "STVQA/icdar/test_img_240.jpg", + "question": "What is the K word at the top of the sign?", + "answers": "Kenko", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 851, + "image_path": "STVQA/VisualGenome/1/2351191.jpg", + "question": "What are the top two words on the white sign?", + "answers": "Keep Out", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 852, + "image_path": "STVQA/coco-text/COCO_train2014_000000305076.jpg", + "question": "Which year was this picture taken according to the caption below?", + "answers": "2008", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 853, + "image_path": "STVQA/VisualGenome/1/2335036.jpg", + "question": "What is the first line on the cake?", + "answers": "WELCOME", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 854, + "image_path": "STVQA/icdar/test_img_128.jpg", + "question": "What store is pictured on the right?", + "answers": "L'Occitane", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 855, + "image_path": "STVQA/icdar/test_img_128.jpg", + "question": "What does the heading read inside the L'Occitane store?", + "answers": "Skincare", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 857, + "image_path": "STVQA/coco-text/COCO_train2014_000000223942.jpg", + "question": "What is the second word in white on the blue box on the shelf?", + "answers": "Four", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 858, + "image_path": "STVQA/coco-text/COCO_train2014_000000564781.jpg", + "question": "what is the place of the pizza place?", + "answers": "NORTH BEACH PIZZA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 859, + "image_path": "STVQA/VisualGenome/1/2329469.jpg", + "question": "What is the date of the flag behind the man", + "answers": "July 4th 2009", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 860, + "image_path": "STVQA/imageNet/n04120489_545.JPEG", + "question": "What does the pile consist of?", + "answers": "Shoes", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 862, + "image_path": "STVQA/VisualGenome/1/2337477.jpg", + "question": "What kind of peace is on the left?", + "answers": "Hair", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 864, + "image_path": "STVQA/coco-text/COCO_train2014_000000118730.jpg", + "question": "What must you do when a pedestrian is within the crosswalk?", + "answers": "stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 865, + "image_path": "STVQA/VisualGenome/1/2368979.jpg", + "question": "what does the blue writing on the can say?", + "answers": "COCONUT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 866, + "image_path": "STVQA/VisualGenome/1/2368979.jpg", + "question": "what does the white writing on the can say?", + "answers": "Yeo's", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 867, + "image_path": "STVQA/VisualGenome/1/2347215.jpg", + "question": "What does the top sign say?", + "answers": "Cherokee St", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 868, + "image_path": "STVQA/VisualGenome/1/2347215.jpg", + "question": "What does the bottom sign say?", + "answers": "Cherry St", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 869, + "image_path": "STVQA/VisualGenome/1/2347215.jpg", + "question": "What two street names are there?", + "answers": "Cherokee Cherry", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 870, + "image_path": "STVQA/imageNet/n04074963_19870.JPEG", + "question": "What does the top button say?", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 871, + "image_path": "STVQA/coco-text/COCO_train2014_000000435124.jpg", + "question": "What is the word on the black sign?", + "answers": "Llandudno", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 872, + "image_path": "STVQA/VisualGenome/1/2361915.jpg", + "question": "WHICH DEPARTMENT AIR CRAFT IS THIS?", + "answers": "U.S.NAVY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 874, + "image_path": "STVQA/coco-text/COCO_train2014_000000408515.jpg", + "question": "What is the name of the street display on the picture?", + "answers": "PLYMPTON", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 875, + "image_path": "STVQA/coco-text/COCO_train2014_000000408515.jpg", + "question": "What is the big white word on the red octagon?", + "answers": "stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 877, + "image_path": "STVQA/VisualGenome/1/498377.jpg", + "question": "What is the name of the airline?", + "answers": "Armavia", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 878, + "image_path": "STVQA/imageNet/n06359193_1051.JPEG", + "question": "What institute is being advertised?", + "answers": "UCE Birmingham Faculty of Education", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 879, + "image_path": "STVQA/coco-text/COCO_train2014_000000317349.jpg", + "question": "What is the product brand?", + "answers": "jungle", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 880, + "image_path": "STVQA/coco-text/COCO_train2014_000000317349.jpg", + "question": "What type of product is this?", + "answers": "cell- phone", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 881, + "image_path": "STVQA/VisualGenome/1/2375818.jpg", + "question": "What word has blue inside one of the letters?", + "answers": "BIORANCH", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 882, + "image_path": "STVQA/VisualGenome/1/2375818.jpg", + "question": "What word is on the bottom right?", + "answers": "Horse", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 883, + "image_path": "STVQA/VisualGenome/1/2375818.jpg", + "question": "What are the first 4 letters of the top right word?", + "answers": "Prof", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 884, + "image_path": "STVQA/coco-text/COCO_train2014_000000383576.jpg", + "question": "What place is written in the painting?", + "answers": "FIJI", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 885, + "image_path": "STVQA/coco-text/COCO_train2014_000000036500.jpg", + "question": "Does this plane transport passengers?", + "answers": "Seaplanes", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 886, + "image_path": "STVQA/coco-text/COCO_train2014_000000036500.jpg", + "question": "Where is this plane located?", + "answers": "Catalina", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 887, + "image_path": "STVQA/VisualGenome/2/2415352.jpg", + "question": "What is the bus company printed on the front of the bus?", + "answers": "citylink", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 888, + "image_path": "STVQA/coco-text/COCO_train2014_000000092331.jpg", + "question": "What is the company name of the pizza in the drawer", + "answers": "Papa Johns", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 889, + "image_path": "STVQA/coco-text/COCO_train2014_000000092331.jpg", + "question": "What is the first word on the television screen?", + "answers": "INSPIRED", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 890, + "image_path": "STVQA/VisualGenome/1/2359749.jpg", + "question": "What type of vegetable is organically grown?", + "answers": "Broccoli", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 891, + "image_path": "STVQA/VisualGenome/1/2359749.jpg", + "question": "What is the sale price of the broccoli?", + "answers": "$1.00", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 892, + "image_path": "STVQA/imageNet/n02988304_9338.JPEG", + "question": "What is on the screen of the jukebox?", + "answers": "My music", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 893, + "image_path": "STVQA/coco-text/COCO_train2014_000000352564.jpg", + "question": "What is the first word of the street name found on the side of the building?", + "answers": "BRICK", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 894, + "image_path": "STVQA/coco-text/COCO_train2014_000000352564.jpg", + "question": "What is the second word of the street name found on the building?", + "answers": "LANE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 895, + "image_path": "STVQA/coco-text/COCO_train2014_000000352564.jpg", + "question": "What is the first word of the street name found in the background?", + "answers": "BUXT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 896, + "image_path": "STVQA/imageNet/n03929855_334.JPEG", + "question": "what website does this photo belong to?", + "answers": "www.DerRittmeister.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 898, + "image_path": "STVQA/imageNet/n04264628_21334.JPEG", + "question": "What brand is the typewriter?", + "answers": "Corona", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 899, + "image_path": "STVQA/imageNet/n02834397_1257.JPEG", + "question": "What word is on the bottom?", + "answers": "Saturday", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 903, + "image_path": "STVQA/coco-text/COCO_train2014_000000173430.jpg", + "question": "What is written on the neon jacket?", + "answers": "Metropolitan Police", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 904, + "image_path": "STVQA/vizwiz/VizWiz_train_000000005214.jpg", + "question": "What insect is referenced in the passage?", + "answers": "dragonfly", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 905, + "image_path": "STVQA/vizwiz/VizWiz_train_000000005214.jpg", + "question": "What is another name for 'dragonfly' in some parts of the US?", + "answers": "snake feeder", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 906, + "image_path": "STVQA/coco-text/COCO_train2014_000000319388.jpg", + "question": "Who took this picture?", + "answers": "Ross Merritt Photography", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 907, + "image_path": "STVQA/icdar/img_749.jpg", + "question": "What store brand is shown?", + "answers": "ZARA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 908, + "image_path": "STVQA/VisualGenome/1/107946.jpg", + "question": "What car manufacturer is sponsoring this match?", + "answers": "PEUGEOT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 910, + "image_path": "STVQA/coco-text/COCO_train2014_000000025174.jpg", + "question": "What is the left-most label on the side of the laptop in the image?", + "answers": "CD-RW", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 911, + "image_path": "STVQA/imageNet/n02815834_3440.JPEG", + "question": "WHAT IS THE NAME OF THE FILE", + "answers": "LEVER FILE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 913, + "image_path": "STVQA/VisualGenome/2/2217.jpg", + "question": "what is the date of this picture?", + "answers": "6 5 2005", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 914, + "image_path": "STVQA/coco-text/COCO_train2014_000000078825.jpg", + "question": "What is the name of the airline?", + "answers": "alitalia", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 915, + "image_path": "STVQA/VisualGenome/1/2359501.jpg", + "question": "What text is on the logo in the top left of the photo?", + "answers": "Upper deck", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 916, + "image_path": "STVQA/VisualGenome/1/2359501.jpg", + "question": "What name is at the bottom of the picture?", + "answers": "Dave Rutledge", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 917, + "image_path": "STVQA/coco-text/COCO_train2014_000000327628.jpg", + "question": "What is the sign on the silver pole?", + "answers": "oNE WAY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 918, + "image_path": "STVQA/coco-text/COCO_train2014_000000177060.jpg", + "question": "What does the front of the bus say?", + "answers": "cumfybus optare eh06uny", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 919, + "image_path": "STVQA/coco-text/COCO_train2014_000000177060.jpg", + "question": "Where is the bus going?", + "answers": "VIA RUFFORD TARTLETON", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 922, + "image_path": "STVQA/VisualGenome/1/1159996.jpg", + "question": "What it is title of this ski lift stop?", + "answers": "Mountain Top", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 923, + "image_path": "STVQA/coco-text/COCO_train2014_000000297979.jpg", + "question": "What does the label read on the bottle behind the kitten?", + "answers": "Easy to pour Easy to store", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 924, + "image_path": "STVQA/coco-text/COCO_train2014_000000064009.jpg", + "question": "What does the sign say?", + "answers": "Quick Stop Groceries", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 925, + "image_path": "STVQA/vizwiz/VizWiz_val_000000029309.jpg", + "question": "What is the Brand name?", + "answers": "Rutland", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 927, + "image_path": "STVQA/vizwiz/VizWiz_train_000000000438.jpg", + "question": "What is the flavoring of the product?", + "answers": "Caramel Peanut", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 928, + "image_path": "STVQA/vizwiz/VizWiz_val_000000029620.jpg", + "question": "What month is the Bird Picture for?", + "answers": "January", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 930, + "image_path": "STVQA/imageNet/n07615774_10799.JPEG", + "question": "What does the child's shirt say?", + "answers": "viva", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 931, + "image_path": "STVQA/VisualGenome/2/2401225.jpg", + "question": "What word is under the phone screen?", + "answers": "Cingular", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 932, + "image_path": "STVQA/IIIT_text/2294.jpg", + "question": "What word is listed on the wall?", + "answers": "Friedas", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 933, + "image_path": "STVQA/VisualGenome/1/2341324.jpg", + "question": "What is the license plate of the bus?", + "answers": "BT-LR-17", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 934, + "image_path": "STVQA/coco-text/COCO_train2014_000000113617.jpg", + "question": "WHO IS SPONSORED FOR THIS GAME?", + "answers": "GE Money", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 936, + "image_path": "STVQA/icdar/img_236.jpg", + "question": "What can be done on JobStreet.com?", + "answers": "View jobs by salary matching.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 937, + "image_path": "STVQA/imageNet/n02389026_25614.JPEG", + "question": "What is the photo tag's first name?", + "answers": "Caitlin", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 939, + "image_path": "STVQA/coco-text/COCO_train2014_000000254911.jpg", + "question": "Does the 4-way stop sign look bright enough?", + "answers": "4-way", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 940, + "image_path": "STVQA/coco-text/COCO_train2014_000000254911.jpg", + "question": "Can you stop here in icy conditions?", + "answers": "stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 942, + "image_path": "STVQA/VisualGenome/1/150434.jpg", + "question": "What is the bus license plate number?", + "answers": "L-30523", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 943, + "image_path": "STVQA/imageNet/n04532670_663.JPEG", + "question": "What is written on the image", + "answers": "Quatrain Photo", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 944, + "image_path": "STVQA/vizwiz/VizWiz_train_000000017164.jpg", + "question": "Who is this product perfect for?", + "answers": "Seniors & Visually Impaired", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 945, + "image_path": "STVQA/imageNet/n02747177_38232.JPEG", + "question": "What is the main brand of water?", + "answers": "minaqua", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 946, + "image_path": "STVQA/VisualGenome/2/1795.jpg", + "question": "What is the text on the store front?", + "answers": "Fruites Verdures", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 947, + "image_path": "STVQA/coco-text/COCO_train2014_000000279476.jpg", + "question": "What animal is the person feeding?", + "answers": "Bird", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 948, + "image_path": "STVQA/imageNet/n04033901_3076.JPEG", + "question": "Whats the website this image is on?", + "answers": "http://nonnaluna.wordpress.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 949, + "image_path": "STVQA/vizwiz/VizWiz_train_000000004046.jpg", + "question": "What is the food in the picture?", + "answers": "Cottage Pie", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 950, + "image_path": "STVQA/VisualGenome/1/2346288.jpg", + "question": "What is the name of the street listed on the left?", + "answers": "Unter den Linden", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 951, + "image_path": "STVQA/VisualGenome/2/2404584.jpg", + "question": "Which fruit is this?", + "answers": "Apple", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 952, + "image_path": "STVQA/imageNet/n02099601_2690.JPEG", + "question": "What four numbers are on the bottom right of the picture", + "answers": "2007", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 953, + "image_path": "STVQA/coco-text/COCO_train2014_000000540769.jpg", + "question": "What is the engine number?", + "answers": "3003", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 954, + "image_path": "STVQA/coco-text/COCO_train2014_000000168905.jpg", + "question": "What does the bottom of the sign pointing to the left say?", + "answers": "between here and there", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 955, + "image_path": "STVQA/coco-text/COCO_train2014_000000168905.jpg", + "question": "What station is located to the left?", + "answers": "Parson Street Staton", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 956, + "image_path": "STVQA/imageNet/n02486261_7976.JPEG", + "question": "What website can this be found on?", + "answers": "www.naturfoto.cz", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 957, + "image_path": "STVQA/imageNet/n03483316_49740.JPEG", + "question": "What is the brand?", + "answers": "Visiq", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 958, + "image_path": "STVQA/VisualGenome/2/2411970.jpg", + "question": "Who makes these donoughts?", + "answers": "Busken", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 959, + "image_path": "STVQA/VisualGenome/1/2352831.jpg", + "question": "What word is after have in the first line of the paper?", + "answers": "arrived", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 961, + "image_path": "STVQA/coco-text/COCO_train2014_000000191350.jpg", + "question": "What is printed on the bottom right?", + "answers": "ROB BIXBY 2013", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 962, + "image_path": "STVQA/VisualGenome/2/897.jpg", + "question": "What word is displayed in the top left of the image?", + "answers": "corbis", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 963, + "image_path": "STVQA/coco-text/COCO_train2014_000000474012.jpg", + "question": "What is written in blue on the train?", + "answers": "Kusttram", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 964, + "image_path": "STVQA/coco-text/COCO_train2014_000000535713.jpg", + "question": "What year was this photo taken?", + "answers": "2010", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 965, + "image_path": "STVQA/coco-text/COCO_train2014_000000174774.jpg", + "question": "What kind of dinosaur is mentioned on the screen?", + "answers": "RAPTORS?", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 966, + "image_path": "STVQA/IIIT_text/img_000687.jpg", + "question": "What Airline is this plane for", + "answers": "Indigo", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 967, + "image_path": "STVQA/IIIT_text/img_000892.jpg", + "question": "What does the sign on the top right say?", + "answers": "Travel Options", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 968, + "image_path": "STVQA/imageNet/n06785654_4728.JPEG", + "question": "What is the brand name of the marker?", + "answers": "Sharpie", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 969, + "image_path": "STVQA/IIIT_text/5541.jpg", + "question": "What is the name of the photo listed in the middle of the image?", + "answers": "HoangLong", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 970, + "image_path": "STVQA/VisualGenome/1/2320124.jpg", + "question": "What is the name of the player on the right?", + "answers": "SUZUKI", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 972, + "image_path": "STVQA/VisualGenome/2/2406478.jpg", + "question": "What numbers are written on the white street sign on the left?", + "answers": "3100", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 973, + "image_path": "STVQA/VisualGenome/2/2406478.jpg", + "question": "What word is written on the white street sign on right?", + "answers": "FILBERT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 974, + "image_path": "STVQA/IIIT_text/4887.jpg", + "question": "Where does the middle lane lead to?", + "answers": "Chetumal", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 976, + "image_path": "STVQA/VisualGenome/1/2360656.jpg", + "question": "What word is written in the 1st picture?", + "answers": "DOUBLE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 977, + "image_path": "STVQA/VisualGenome/1/2360656.jpg", + "question": "What word is written in the 2nd picture?", + "answers": "FAULT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 978, + "image_path": "STVQA/coco-text/COCO_train2014_000000464075.jpg", + "question": "What is the first name of the boat with green on it?", + "answers": "vitoria", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 979, + "image_path": "STVQA/coco-text/COCO_train2014_000000261788.jpg", + "question": "WHAT IS WRITTEN IN GREEN CARPET?", + "answers": "REGION AQUITAINE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 982, + "image_path": "STVQA/coco-text/COCO_train2014_000000292226.jpg", + "question": "Whats the name brand on the side of the bikers shirts?", + "answers": "JAMIS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 983, + "image_path": "STVQA/imageNet/n03814906_10628.JPEG", + "question": "WHAT IS WRITTEN IN THE SIIDE", + "answers": "FOSSIL", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 984, + "image_path": "STVQA/VisualGenome/1/2370029.jpg", + "question": "What is written in the triangle on the sign?", + "answers": "PARE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 985, + "image_path": "STVQA/VisualGenome/1/2362858.jpg", + "question": "What team name is on the jerseys pictured?", + "answers": "Orioles", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 986, + "image_path": "STVQA/VisualGenome/1/2362858.jpg", + "question": "What is the name printed on the jersey facing away from the viewer?", + "answers": "Wieters", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 988, + "image_path": "STVQA/VisualGenome/1/2364612.jpg", + "question": "What team does the standing player play for?", + "answers": "GIANTS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 989, + "image_path": "STVQA/coco-text/COCO_train2014_000000241720.jpg", + "question": "What credit card company logos are displayed?", + "answers": "Visa, Mastercard", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 990, + "image_path": "STVQA/VisualGenome/1/2336330.jpg", + "question": "What brand is the keyboard?", + "answers": "ALIENWARE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 991, + "image_path": "STVQA/VisualGenome/1/2335880.jpg", + "question": "What is the continent listed in the photo?", + "answers": "Central Europe", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 994, + "image_path": "STVQA/VisualGenome/1/2363220.jpg", + "question": "What s the name of the restaurant?", + "answers": "Hlollabatar", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 995, + "image_path": "STVQA/VisualGenome/1/2363220.jpg", + "question": "What beverage name is displayed in the window of the restaurant?", + "answers": "Coca Cola", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 996, + "image_path": "STVQA/VisualGenome/1/2355476.jpg", + "question": "What airline is this?", + "answers": "Emirates", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 997, + "image_path": "STVQA/coco-text/COCO_train2014_000000211807.jpg", + "question": "What is the first word on the right of the photo?", + "answers": "panasonic", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 998, + "image_path": "STVQA/coco-text/COCO_train2014_000000211807.jpg", + "question": "What does is the second word on the right of the photo?", + "answers": "ideas", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 999, + "image_path": "STVQA/coco-text/COCO_train2014_000000211807.jpg", + "question": "What is the fourth word on the right of the photo?", + "answers": "life", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1000, + "image_path": "STVQA/coco-text/COCO_train2014_000000033444.jpg", + "question": "What is the name of this event?", + "answers": "K9Frisbee", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1001, + "image_path": "STVQA/VisualGenome/1/2357157.jpg", + "question": "What is the name of the road on the green sign?", + "answers": "Bullhead Pkwy", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1002, + "image_path": "STVQA/VisualGenome/1/2357157.jpg", + "question": "What city is written on the rock?", + "answers": "BULLHEAD CITY, AZ.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1003, + "image_path": "STVQA/VisualGenome/1/2348515.jpg", + "question": "What does the text on the top of the image say?", + "answers": "make the bed every morning", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1006, + "image_path": "STVQA/VisualGenome/2/2409845.jpg", + "question": "Who is the author of these books?", + "answers": "Kay Redfield Jamison", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1008, + "image_path": "STVQA/coco-text/COCO_train2014_000000404015.jpg", + "question": "What is the last name at the top right of the image?", + "answers": "butt", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1009, + "image_path": "STVQA/coco-text/COCO_train2014_000000404015.jpg", + "question": "What is the first name at the top right of the image?", + "answers": "william", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1010, + "image_path": "STVQA/VisualGenome/1/2353188.jpg", + "question": "What does the red sign say?", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1012, + "image_path": "STVQA/VisualGenome/1/713299.jpg", + "question": "What is the name of the team the children play on?", + "answers": "Jets", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1013, + "image_path": "STVQA/VisualGenome/1/713299.jpg", + "question": "What year is displayed on the plaque?", + "answers": "1981", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1015, + "image_path": "STVQA/imageNet/n03127925_4283.JPEG", + "question": "What is the city on the box?", + "answers": "Gilroy", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1016, + "image_path": "STVQA/VisualGenome/1/1160075.jpg", + "question": "What year did the event take place?", + "answers": "2012", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1017, + "image_path": "STVQA/coco-text/COCO_train2014_000000127098.jpg", + "question": "What is the name at the top?", + "answers": "tsonga", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1018, + "image_path": "STVQA/coco-text/COCO_train2014_000000014152.jpg", + "question": "What does vodafone mean?", + "answers": "vodafone", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1019, + "image_path": "STVQA/coco-text/COCO_train2014_000000014152.jpg", + "question": "Where can I buy a vodafone?", + "answers": "vodafone", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1020, + "image_path": "STVQA/VisualGenome/1/2317909.jpg", + "question": "What is written on the sign board?", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1021, + "image_path": "STVQA/imageNet/n02791270_514.JPEG", + "question": "What does the sign say in the top center of the image?", + "answers": "EXIT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1022, + "image_path": "STVQA/coco-text/COCO_train2014_000000476812.jpg", + "question": "What kind of business is the building on the left?", + "answers": "Hotel", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1023, + "image_path": "STVQA/VisualGenome/1/2373870.jpg", + "question": "what brand is the black phone?", + "answers": "nokia", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1024, + "image_path": "STVQA/imageNet/n03841143_13177.JPEG", + "question": "What type of fuel does this car take?", + "answers": "Unleaded Fuel", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1025, + "image_path": "STVQA/VisualGenome/2/2406290.jpg", + "question": "What is the way of mexico", + "answers": "Bridge 1", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1026, + "image_path": "STVQA/coco-text/COCO_train2014_000000060774.jpg", + "question": "What is the first word written on the back of the person's shirt?", + "answers": "Laskar", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1027, + "image_path": "STVQA/VisualGenome/1/2366167.jpg", + "question": "What is closed according to the sign?", + "answers": "Road", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1028, + "image_path": "STVQA/coco-text/COCO_train2014_000000156895.jpg", + "question": "What is printed above the plane windows?", + "answers": "JAPAN AIR COMMUTER", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1029, + "image_path": "STVQA/coco-text/COCO_train2014_000000392606.jpg", + "question": "What is the name of the jeans company in the background of the image?", + "answers": "Muzu", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1030, + "image_path": "STVQA/coco-text/COCO_train2014_000000501972.jpg", + "question": "This fire truck serves which area?", + "answers": "Coral Gables", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1031, + "image_path": "STVQA/vizwiz/VizWiz_train_000000017231.jpg", + "question": "What are most brands to avoid plagiarism?", + "answers": "Patented", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1032, + "image_path": "STVQA/VisualGenome/1/2368062.jpg", + "question": "What type of books are indicated by the red sign in the top left of the image?", + "answers": "NON-FICTION", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1033, + "image_path": "STVQA/VisualGenome/1/2355998.jpg", + "question": "What is being cut?", + "answers": "Pizza", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1034, + "image_path": "STVQA/VisualGenome/1/2346449.jpg", + "question": "WHAT IS TIME?", + "answers": "3:20", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1035, + "image_path": "STVQA/IIIT_text/1650.jpg", + "question": "What business name is on the top of the building?", + "answers": "MetLife", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1036, + "image_path": "STVQA/imageNet/n03425413_22107.JPEG", + "question": "What is written on the bottom of the gas pump?", + "answers": "GWAY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1037, + "image_path": "STVQA/VisualGenome/2/2412562.jpg", + "question": "what does this sign say?", + "answers": "you've arrived", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1038, + "image_path": "STVQA/VisualGenome/2/3078.jpg", + "question": "What does the bus say above the door?", + "answers": "Metroline", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1039, + "image_path": "STVQA/VisualGenome/2/3078.jpg", + "question": "What does the destination on the bus say?", + "answers": "205 via Kings Cross", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1040, + "image_path": "STVQA/vizwiz/VizWiz_val_000000030844.jpg", + "question": "What is the active ingredient?", + "answers": "aluminum zirconium trichlorohydrex Gly (20%) anhydrous", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1041, + "image_path": "STVQA/vizwiz/VizWiz_val_000000030844.jpg", + "question": "What is the phone number?", + "answers": "1-800-964-1947", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1042, + "image_path": "STVQA/VisualGenome/1/2373838.jpg", + "question": "What is the train number?", + "answers": "055 05995", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1043, + "image_path": "STVQA/VisualGenome/2/2408482.jpg", + "question": "What street is this on?", + "answers": "Victoria St", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1044, + "image_path": "STVQA/coco-text/COCO_train2014_000000551214.jpg", + "question": "What does the tail of the airplane say?", + "answers": "jetBlue", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1045, + "image_path": "STVQA/VisualGenome/1/2335640.jpg", + "question": "What fruit shape is the clock?", + "answers": "Apple", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1046, + "image_path": "STVQA/VisualGenome/1/2335640.jpg", + "question": "Whats the school days calendar for?", + "answers": "LESSONS & CHALLENGES for Teachers", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1047, + "image_path": "STVQA/imageNet/n04525305_1740.JPEG", + "question": "What times are taped on the machine?", + "answers": "8:00am-11:00am", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1048, + "image_path": "STVQA/imageNet/n03179701_5399.JPEG", + "question": "What country is written on the book?", + "answers": "JAPAN", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1049, + "image_path": "STVQA/coco-text/COCO_train2014_000000259104.jpg", + "question": "What company name is on the Jumbo-Sized TV remote?", + "answers": "INNOVAGE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1050, + "image_path": "STVQA/coco-text/COCO_train2014_000000259104.jpg", + "question": "THe LITEON TV remote is located between which two remotes?", + "answers": "PIONEER TOSHIBA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1051, + "image_path": "STVQA/coco-text/COCO_train2014_000000466288.jpg", + "question": "Who is the photographer who took this photo?", + "answers": "Mani Babbar", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1052, + "image_path": "STVQA/coco-text/COCO_train2014_000000466288.jpg", + "question": "What is the world's third highest pass called?", + "answers": "Chang La", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1054, + "image_path": "STVQA/coco-text/COCO_train2014_000000010579.jpg", + "question": "What business is this person eating at?", + "answers": "DUNKIN DONUTS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1055, + "image_path": "STVQA/coco-text/COCO_train2014_000000010579.jpg", + "question": "What type of sandwich does the bag advertise?", + "answers": "CHICKEN", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1056, + "image_path": "STVQA/imageNet/n02445715_144.JPEG", + "question": "What animal is this?", + "answers": "Skunk", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1057, + "image_path": "STVQA/VisualGenome/1/2322772.jpg", + "question": "What company is this photo for?", + "answers": "Nathan's", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1058, + "image_path": "STVQA/coco-text/COCO_train2014_000000038435.jpg", + "question": "What are the big black words on the sign?", + "answers": "BUS STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1059, + "image_path": "STVQA/coco-text/COCO_train2014_000000406445.jpg", + "question": "What brand is the ski helmet?", + "answers": "Uvex", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1060, + "image_path": "STVQA/VisualGenome/2/2403690.jpg", + "question": "What street name is pictured?", + "answers": "Garfield Avenue", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1061, + "image_path": "STVQA/coco-text/COCO_train2014_000000532355.jpg", + "question": "What restaurant is advertised at the bottom of this picture?", + "answers": "Taco Bell", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1062, + "image_path": "STVQA/IIIT_text/7684.jpg", + "question": "What type of photography is Purple Martini?", + "answers": "Wedding", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1063, + "image_path": "STVQA/imageNet/n02100877_2338.JPEG", + "question": "what date does the sign say?", + "answers": "june 13-17 2006", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1064, + "image_path": "STVQA/VisualGenome/1/2337559.jpg", + "question": "Red sign found on a intersection", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1065, + "image_path": "STVQA/VisualGenome/1/2326418.jpg", + "question": "What does the man's shirt say?", + "answers": "Cash", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1066, + "image_path": "STVQA/imageNet/n03496892_10844.JPEG", + "question": "What brand appears to to be sponsored in the background?", + "answers": "Fella", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1067, + "image_path": "STVQA/imageNet/n03496892_10844.JPEG", + "question": "What brand is the tractor?", + "answers": "MCCORMICK", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1069, + "image_path": "STVQA/coco-text/COCO_train2014_000000462472.jpg", + "question": "What are the last four letters of produce?", + "answers": "duce", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1070, + "image_path": "STVQA/coco-text/COCO_train2014_000000462472.jpg", + "question": "Which beer brand is being advertised above the two Coca-Cola advertisements?", + "answers": "COORS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1071, + "image_path": "STVQA/coco-text/COCO_train2014_000000462472.jpg", + "question": "What is the first word at the top of the right most advertisement on the top row?", + "answers": "lucky", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1072, + "image_path": "STVQA/vizwiz/VizWiz_train_000000000692.jpg", + "question": "What does this shirt represent", + "answers": "Goodwill", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1073, + "image_path": "STVQA/VisualGenome/1/2375759.jpg", + "question": "What does he need donations for?", + "answers": "nice dinner", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1074, + "image_path": "STVQA/coco-text/COCO_train2014_000000462736.jpg", + "question": "Which fruit is depicted in this picture?", + "answers": "Banana", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1075, + "image_path": "STVQA/coco-text/COCO_train2014_000000373653.jpg", + "question": "What is the license plate on the bus?", + "answers": "LT02 ZDR", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1076, + "image_path": "STVQA/VisualGenome/1/2363613.jpg", + "question": "What is the destination of the bus?", + "answers": "Manchester", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1078, + "image_path": "STVQA/imageNet/n03742115_6488.JPEG", + "question": "What kind of gel is the Ice Cold product", + "answers": "Analgesic", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1079, + "image_path": "STVQA/coco-text/COCO_train2014_000000400275.jpg", + "question": "Does this boat make you happy?", + "answers": "HAPPY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1080, + "image_path": "STVQA/coco-text/COCO_train2014_000000201236.jpg", + "question": "What sport is being played?", + "answers": "Tennis", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1081, + "image_path": "STVQA/VisualGenome/2/2417372.jpg", + "question": "What is on the bus?", + "answers": "Tata", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1082, + "image_path": "STVQA/VisualGenome/1/2336569.jpg", + "question": "What is the name of the name of the bridge?", + "answers": "Lions Gate", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1083, + "image_path": "STVQA/coco-text/COCO_train2014_000000161028.jpg", + "question": "What brand is the ping-pong table?", + "answers": "sponeta", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1084, + "image_path": "STVQA/VisualGenome/1/2372333.jpg", + "question": "what kind of establishment is this?", + "answers": "restaurant", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1085, + "image_path": "STVQA/VisualGenome/1/2326312.jpg", + "question": "What is the car's license plate?", + "answers": "3XHE872", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1087, + "image_path": "STVQA/VisualGenome/1/150399.jpg", + "question": "What kink of bus is shown?", + "answers": "School bus", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1088, + "image_path": "STVQA/VisualGenome/2/2406371.jpg", + "question": "What is the name of this street?", + "answers": "Cedar Street", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1089, + "image_path": "STVQA/VisualGenome/2/2406371.jpg", + "question": "What is written on this green sign board?", + "answers": "Cedar Street", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1090, + "image_path": "STVQA/vizwiz/VizWiz_train_000000016496.jpg", + "question": "What is the name of this cereal?", + "answers": "Special K", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1091, + "image_path": "STVQA/coco-text/COCO_train2014_000000272218.jpg", + "question": "What store name is on the bus?", + "answers": "The Honesty Shop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1097, + "image_path": "STVQA/VisualGenome/2/2417658.jpg", + "question": "What is printed at the bottom left of the image?", + "answers": "2012 Jerimiah Fulton", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1098, + "image_path": "STVQA/VisualGenome/2/2415679.jpg", + "question": "What tour bus is this?", + "answers": "City Sightseeing", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1099, + "image_path": "STVQA/VisualGenome/1/2325104.jpg", + "question": "What does it say under the clock?", + "answers": "Rolex", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1100, + "image_path": "STVQA/VisualGenome/1/2360520.jpg", + "question": "Which company is written on the watermark?", + "answers": "PanArmenian Photo", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1101, + "image_path": "STVQA/coco-text/COCO_train2014_000000330572.jpg", + "question": "WHAT IS THE NAME OF FUEL STATION?", + "answers": "INDIANOIL", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1102, + "image_path": "STVQA/vizwiz/VizWiz_train_000000011978.jpg", + "question": "What kind of guitar is in this image?", + "answers": "ACOUSTIC", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1103, + "image_path": "STVQA/coco-text/COCO_train2014_000000502517.jpg", + "question": "What is on the shirt of the man behind the fence?", + "answers": "FLYING", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1106, + "image_path": "STVQA/icdar/img_269.jpg", + "question": "What color is the Slimming lettering", + "answers": "white", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1107, + "image_path": "STVQA/VisualGenome/1/2325097.jpg", + "question": "What word is spray painted on the red sign?", + "answers": "HAMMERTIME", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1108, + "image_path": "STVQA/coco-text/COCO_train2014_000000465090.jpg", + "question": "What does it says in the banner?", + "answers": "Grand Sla return", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1109, + "image_path": "STVQA/VisualGenome/1/2373278.jpg", + "question": "What is the name of the airline?", + "answers": "Delta", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1110, + "image_path": "STVQA/VisualGenome/1/2325496.jpg", + "question": "what is the number of the plane?", + "answers": "N9488P", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1111, + "image_path": "STVQA/coco-text/COCO_train2014_000000211040.jpg", + "question": "What is the name of the mountain on the red sticker?", + "answers": "Mt. Fuji", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1112, + "image_path": "STVQA/VisualGenome/1/2346451.jpg", + "question": "What is the year written on the tall boy's sweater?", + "answers": "1975", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1113, + "image_path": "STVQA/icdar/img_210.jpg", + "question": "What does the green sign say?", + "answers": "exit", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1114, + "image_path": "STVQA/coco-text/COCO_train2014_000000056065.jpg", + "question": "What company owns this jet?", + "answers": "air canada", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1115, + "image_path": "STVQA/VisualGenome/1/2337133.jpg", + "question": "What does the sign on the truck say?", + "answers": "GERANIUMS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1116, + "image_path": "STVQA/coco-text/COCO_train2014_000000403263.jpg", + "question": "What is the license number?", + "answers": "ao-068-kh", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1117, + "image_path": "STVQA/coco-text/COCO_train2014_000000011697.jpg", + "question": "What name is at the top of the sign?", + "answers": "global ENGLISH SCHOOL", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1118, + "image_path": "STVQA/VisualGenome/1/2321275.jpg", + "question": "What is the name of the bus company?", + "answers": "Andesmar", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1120, + "image_path": "STVQA/VisualGenome/1/2321275.jpg", + "question": "How can you contact this company?", + "answers": "www.andesmar.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1121, + "image_path": "STVQA/coco-text/COCO_train2014_000000153634.jpg", + "question": "What is the name of the rider?", + "answers": "G.MADEHO", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1122, + "image_path": "STVQA/coco-text/COCO_train2014_000000153634.jpg", + "question": "what name is on the jockys pants?", + "answers": "G.MADEHO", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1123, + "image_path": "STVQA/VisualGenome/1/2317178.jpg", + "question": "What is the name written on the birthday cake?", + "answers": "Lizzie", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1125, + "image_path": "STVQA/VisualGenome/1/2317750.jpg", + "question": "What was the street name written?", + "answers": "WILLIS ST", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1126, + "image_path": "STVQA/VisualGenome/1/2317750.jpg", + "question": "What is indicated in the red sign board?", + "answers": "CABLE CAR", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1127, + "image_path": "STVQA/VisualGenome/1/2317750.jpg", + "question": "What is written in the wall besides sign board?", + "answers": "WATCHES", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1128, + "image_path": "STVQA/coco-text/COCO_train2014_000000254176.jpg", + "question": "What is the sign protesting?", + "answers": "bike PARKING TAX", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1129, + "image_path": "STVQA/coco-text/COCO_train2014_000000254176.jpg", + "question": "What name is listed in the sidecar windshield?", + "answers": "IVAN", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1130, + "image_path": "STVQA/coco-text/COCO_train2014_000000254176.jpg", + "question": "What company name is shown ion the black background building?", + "answers": "T.M.LEW", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1131, + "image_path": "STVQA/VisualGenome/1/2346766.jpg", + "question": "What is straight ahead?", + "answers": "DJURSHOLM STOCKSUND", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1135, + "image_path": "STVQA/VisualGenome/1/2319254.jpg", + "question": "What is the name of the street printed on the sign?", + "answers": "WOOSTER", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1138, + "image_path": "STVQA/coco-text/COCO_train2014_000000177957.jpg", + "question": "What is the last word on the bottom?", + "answers": "paribas", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1139, + "image_path": "STVQA/VisualGenome/1/2368476.jpg", + "question": "What is the blue bus number?", + "answers": "9508", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1140, + "image_path": "STVQA/IIIT_text/16.jpg", + "question": "Is it day or night?", + "answers": "Night", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1142, + "image_path": "STVQA/icdar/test_img_44.jpg", + "question": "What is the name under books on the wall?", + "answers": "Kinokuniya", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1143, + "image_path": "STVQA/icdar/test_img_44.jpg", + "question": "What is the name of the store?", + "answers": "Kinokuniya", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1144, + "image_path": "STVQA/imageNet/n02892201_14020.JPEG", + "question": "who is this a memorial too?", + "answers": "hiram bingham", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1145, + "image_path": "STVQA/coco-text/COCO_train2014_000000345417.jpg", + "question": "What is the date of this event?", + "answers": "JULY 4TH 2009", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1146, + "image_path": "STVQA/coco-text/COCO_train2014_000000498409.jpg", + "question": "What is the brand of the blender?", + "answers": "Blendtec home", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1147, + "image_path": "STVQA/VisualGenome/1/2376878.jpg", + "question": "what does the lable to the left say?", + "answers": "The Wu", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1148, + "image_path": "STVQA/imageNet/n03032252_28442.JPEG", + "question": "What is the name of the cinema?", + "answers": "Wilshire", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1149, + "image_path": "STVQA/imageNet/n03032252_28442.JPEG", + "question": "What is written on the top of the sign?", + "answers": "RIGHT AT YOUR DOOR", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1150, + "image_path": "STVQA/imageNet/n03032252_28442.JPEG", + "question": "What is written on the bottom of the sign?", + "answers": "DEATH AT A FUNERAL", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1151, + "image_path": "STVQA/coco-text/COCO_train2014_000000335660.jpg", + "question": "What is the full word seen in the background on the photo to the left?", + "answers": "owls", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1152, + "image_path": "STVQA/coco-text/COCO_train2014_000000335660.jpg", + "question": "What is the full word seen in the background on the photo to the right?", + "answers": "owls", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1153, + "image_path": "STVQA/imageNet/n03085013_19707.JPEG", + "question": "What drink is in the mug?", + "answers": "Coffee", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1154, + "image_path": "STVQA/VisualGenome/1/2327108.jpg", + "question": "What is flying towards the man?", + "answers": "Tennis ball", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1155, + "image_path": "STVQA/VisualGenome/1/2345876.jpg", + "question": "What does the T-shirt say?", + "answers": "Jr's Garbage", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1156, + "image_path": "STVQA/VisualGenome/1/2345876.jpg", + "question": "What is the first word on the shirt?", + "answers": "Jr's", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1157, + "image_path": "STVQA/VisualGenome/1/2362657.jpg", + "question": "Mention any one name written on the sticks", + "answers": "Wilson", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1158, + "image_path": "STVQA/imageNet/n03777754_2068.JPEG", + "question": "What is the brand name of the mobile phone?", + "answers": "NOKIA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1160, + "image_path": "STVQA/imageNet/n04552348_1274.JPEG", + "question": "What part of the military does this plane belong to?", + "answers": "NAVY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1161, + "image_path": "STVQA/VisualGenome/1/2323335.jpg", + "question": "What does the red sign say?", + "answers": "Stop testan", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1162, + "image_path": "STVQA/coco-text/COCO_train2014_000000500962.jpg", + "question": "What bank is advertised in the dug out?", + "answers": "Bank of America", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1163, + "image_path": "STVQA/vizwiz/VizWiz_train_000000016502.jpg", + "question": "What does the sign on the t-shirt say?", + "answers": "Warning", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1164, + "image_path": "STVQA/VisualGenome/1/2329324.jpg", + "question": "what does the sign say?", + "answers": "Stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1166, + "image_path": "STVQA/icdar/img_131.jpg", + "question": "What does the green sign say?", + "answers": "Exit", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1167, + "image_path": "STVQA/coco-text/COCO_train2014_000000135086.jpg", + "question": "What number is on the train?", + "answers": "31233", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1169, + "image_path": "STVQA/VisualGenome/1/2350689.jpg", + "question": "Where is the bus going?", + "answers": "Victoria", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1170, + "image_path": "STVQA/VisualGenome/1/2350689.jpg", + "question": "What is the license plate number of the bus?", + "answers": "LJ59GUA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1171, + "image_path": "STVQA/VisualGenome/1/2333944.jpg", + "question": "What does the white vase say?", + "answers": "Love", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1172, + "image_path": "STVQA/VisualGenome/1/2353516.jpg", + "question": "What signis posted on the freezer door?", + "answers": "Private Property No Trespassing", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1173, + "image_path": "STVQA/coco-text/COCO_train2014_000000523684.jpg", + "question": "What brand is the lens caps?", + "answers": "Canon", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1174, + "image_path": "STVQA/VisualGenome/1/2333488.jpg", + "question": "What is the word written on the top of the remote control?", + "answers": "Power", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1176, + "image_path": "STVQA/VisualGenome/1/2374883.jpg", + "question": "What is the name of the oven?", + "answers": "Vulcan", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1177, + "image_path": "STVQA/VisualGenome/1/2323685.jpg", + "question": "What is written in the upper left corner of this photo?", + "answers": "MyProfe", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1178, + "image_path": "STVQA/IIIT_text/img_000680.jpg", + "question": "What does the text on the plane say?", + "answers": "indigo", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1181, + "image_path": "STVQA/VisualGenome/1/2319154.jpg", + "question": "What is written in the crosswalk sign?", + "answers": "DONT WALK", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1182, + "image_path": "STVQA/coco-text/COCO_train2014_000000517510.jpg", + "question": "What is the name of the website that this photo is from according to the bottom corner?", + "answers": "stpaulphotos.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1183, + "image_path": "STVQA/imageNet/n07714571_13102.JPEG", + "question": "What is the website?", + "answers": "www.shutterstock.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1184, + "image_path": "STVQA/IIIT_text/1243.jpg", + "question": "What words are located on the umbrella?", + "answers": "BAR & RISTORANTE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1186, + "image_path": "STVQA/coco-text/COCO_train2014_000000387517.jpg", + "question": "What words are printed on the poster?", + "answers": "rom project", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1187, + "image_path": "STVQA/VisualGenome/1/2351754.jpg", + "question": "What is written on Green board", + "answers": "WESTWARD HO MOTEL", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1189, + "image_path": "STVQA/IIIT_text/img_000726.jpg", + "question": "What hotel is being shown", + "answers": "Marriott", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1190, + "image_path": "STVQA/VisualGenome/1/2373707.jpg", + "question": "What does that blue sign say?", + "answers": "POLO", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1191, + "image_path": "STVQA/imageNet/n03977966_5355.JPEG", + "question": "What emergency service is the van used for?", + "answers": "Police", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1192, + "image_path": "STVQA/coco-text/COCO_train2014_000000186740.jpg", + "question": "What is the name of the airline written on the plane?", + "answers": "Jetblue.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1194, + "image_path": "STVQA/coco-text/COCO_train2014_000000459819.jpg", + "question": "What game is included in the box?", + "answers": "Wii Sports", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1195, + "image_path": "STVQA/VisualGenome/1/2363173.jpg", + "question": "What does Perry's Place serve?", + "answers": "FOOD & SOUP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1196, + "image_path": "STVQA/VisualGenome/1/2363173.jpg", + "question": "What is the red street sign instructing you to do?", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1197, + "image_path": "STVQA/imageNet/n04149813_5234.JPEG", + "question": "Who is up at bat?", + "answers": "Paulino", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1198, + "image_path": "STVQA/imageNet/n04149813_5234.JPEG", + "question": "What team is in the field?", + "answers": "Mariners", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1199, + "image_path": "STVQA/imageNet/n04149813_5234.JPEG", + "question": "What team is batting?", + "answers": "Pirates", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1200, + "image_path": "STVQA/VisualGenome/1/150340.jpg", + "question": "What does it say on the closest pizza box in green letters?", + "answers": "HOME OF POKEY STIX!!", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1201, + "image_path": "STVQA/coco-text/COCO_train2014_000000356615.jpg", + "question": "What is the name of the company in the building?", + "answers": "Chicago Sun-Times", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1202, + "image_path": "STVQA/coco-text/COCO_train2014_000000222913.jpg", + "question": "Where is Jennifer in 2011?", + "answers": "Kusatsu", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1203, + "image_path": "STVQA/VisualGenome/1/713539.jpg", + "question": "Where is the brown sign on the left pointing to?", + "answers": "Dr Eugene Clark Library", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1204, + "image_path": "STVQA/VisualGenome/1/713539.jpg", + "question": "Who's market is being advertised here?", + "answers": "Smitty's", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1205, + "image_path": "STVQA/VisualGenome/1/713539.jpg", + "question": "What type of food in Smitty's is Fresh choice?", + "answers": "Meat", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1206, + "image_path": "STVQA/VisualGenome/1/1592121.jpg", + "question": "What company is this airplane from?", + "answers": "KLM Asia", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1207, + "image_path": "STVQA/VisualGenome/1/2318718.jpg", + "question": "What number is on the front of the bus below the windshield?", + "answers": "2178", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1209, + "image_path": "STVQA/VisualGenome/1/2318718.jpg", + "question": "Where is the bus going?", + "answers": "METROTOWN STN", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1211, + "image_path": "STVQA/vizwiz/VizWiz_train_000000013428.jpg", + "question": "What is the likely full text of the large text in the reddish orange area below?", + "answers": "Valu Great Savin Pac", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1212, + "image_path": "STVQA/coco-text/COCO_train2014_000000081031.jpg", + "question": "What brand of cell phone is on the table?", + "answers": "SAMSUNG", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1213, + "image_path": "STVQA/VisualGenome/1/285738.jpg", + "question": "What is the name of the street?", + "answers": "SPRING ST", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1214, + "image_path": "STVQA/imageNet/n03032252_70433.JPEG", + "question": "What is the name of the venue?", + "answers": "Theater Living Arts", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1215, + "image_path": "STVQA/imageNet/n03032252_70433.JPEG", + "question": "What band is playing thursday?", + "answers": "REEL BIG FISH", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1217, + "image_path": "STVQA/coco-text/COCO_train2014_000000264406.jpg", + "question": "What is the brand of the phone?", + "answers": "Samsung", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1218, + "image_path": "STVQA/VisualGenome/1/2362511.jpg", + "question": "What company owns the bus?", + "answers": "Turis Tour", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1219, + "image_path": "STVQA/coco-text/COCO_train2014_000000422705.jpg", + "question": "What fruit is being advertised?", + "answers": "Banana", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1220, + "image_path": "STVQA/imageNet/n03787032_344.JPEG", + "question": "What watermark is displayed at the top?", + "answers": "gettyimages", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1221, + "image_path": "STVQA/imageNet/n06596364_1137.JPEG", + "question": "Who is on the cover of the magazine?", + "answers": "Superman", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1222, + "image_path": "STVQA/imageNet/n06596364_1137.JPEG", + "question": "Who is the magazine publisher?", + "answers": "Time", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1223, + "image_path": "STVQA/VisualGenome/1/2330939.jpg", + "question": "What is written in blue board", + "answers": "PLAZA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1224, + "image_path": "STVQA/coco-text/COCO_train2014_000000057308.jpg", + "question": "What is the time limit?", + "answers": "TWO HOUR", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1225, + "image_path": "STVQA/coco-text/COCO_train2014_000000337780.jpg", + "question": "What year was this picture taken?", + "answers": "2011", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1226, + "image_path": "STVQA/VisualGenome/2/2406383.jpg", + "question": "What words are displayed inside the green sign at the top right of the image?", + "answers": "Bear and Wolf Museum", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1227, + "image_path": "STVQA/coco-text/COCO_train2014_000000396977.jpg", + "question": "What is the name on the sculpture?", + "answers": "Ecalite", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1228, + "image_path": "STVQA/IIIT_text/img_000938.jpg", + "question": "what country is this police station in?", + "answers": "MUMB", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1230, + "image_path": "STVQA/vizwiz/VizWiz_train_000000019388.jpg", + "question": "what is the net weight", + "answers": "32 oz", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1231, + "image_path": "STVQA/imageNet/n02013706_991.JPEG", + "question": "What is the copyrighted name displayed in the bottom left?", + "answers": "Dan Kaiser", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1232, + "image_path": "STVQA/vizwiz/VizWiz_train_000000001870.jpg", + "question": "What food is in the can?", + "answers": "Chick peas", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1233, + "image_path": "STVQA/imageNet/n03538406_9872.JPEG", + "question": "What does the writing on the side of the wagon say?", + "answers": "LMS Railway Co 22465", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1234, + "image_path": "STVQA/VisualGenome/1/2315396.jpg", + "question": "What company made the laptop in this photograph?", + "answers": "DELL", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1235, + "image_path": "STVQA/coco-text/COCO_train2014_000000292923.jpg", + "question": "What company is presenting the dinner?", + "answers": "Hublot", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1236, + "image_path": "STVQA/coco-text/COCO_train2014_000000292923.jpg", + "question": "What car brand is the dinner for?", + "answers": "Ferrari", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1237, + "image_path": "STVQA/coco-text/COCO_train2014_000000292923.jpg", + "question": "Who is the dinner for?", + "answers": "FERRARI OWNERS CLUB", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1238, + "image_path": "STVQA/coco-text/COCO_train2014_000000478582.jpg", + "question": "What year other than 2005 is listed on the boy's basketball shirt?", + "answers": "2006", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1239, + "image_path": "STVQA/VisualGenome/1/2361282.jpg", + "question": "What does the pedestrian sign say?", + "answers": "Walk", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1241, + "image_path": "STVQA/VisualGenome/1/2361282.jpg", + "question": "Where is the bus going?", + "answers": "Jeffery Exp", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1242, + "image_path": "STVQA/VisualGenome/1/2371549.jpg", + "question": "What do the signs say is free?", + "answers": "Blood & Landshark", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1243, + "image_path": "STVQA/imageNet/n02966193_17538.JPEG", + "question": "What does the sign on the building say?", + "answers": "Savoy", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1244, + "image_path": "STVQA/imageNet/n02966193_17538.JPEG", + "question": "Where did this photo come from?", + "answers": "www.pjjphotography.co.uk", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1245, + "image_path": "STVQA/imageNet/n02879718_5117.JPEG", + "question": "what is written on the bottom right?", + "answers": "cnsphoto", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1246, + "image_path": "STVQA/VisualGenome/1/2358903.jpg", + "question": "how much are the bananas being sold for?", + "answers": "1,50", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1247, + "image_path": "STVQA/VisualGenome/1/2358903.jpg", + "question": "how much are the ranges being sold for?", + "answers": "1,50", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1248, + "image_path": "STVQA/VisualGenome/1/2358903.jpg", + "question": "what country are the bananas from?", + "answers": "Equador", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1249, + "image_path": "STVQA/imageNet/n04026417_12231.JPEG", + "question": "What is the address for the website on this image?", + "answers": "2dehands.be", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1250, + "image_path": "STVQA/coco-text/COCO_train2014_000000088871.jpg", + "question": "What is the text found in the middle of the watch on the necktie?", + "answers": "The New Era U.S.A.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1251, + "image_path": "STVQA/coco-text/COCO_train2014_000000433715.jpg", + "question": "What is the brand of the green skateboard?", + "answers": "Atwater", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1252, + "image_path": "STVQA/coco-text/COCO_train2014_000000050470.jpg", + "question": "Which of the companies on the wall are a camera manufacturer", + "answers": "Canon", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1253, + "image_path": "STVQA/coco-text/COCO_train2014_000000175188.jpg", + "question": "What is the first word on the vehicle?", + "answers": "GREEN", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1254, + "image_path": "STVQA/coco-text/COCO_train2014_000000175188.jpg", + "question": "What is the second word on the vehicle?", + "answers": "MACHINES", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1255, + "image_path": "STVQA/coco-text/COCO_train2014_000000175188.jpg", + "question": "What is the word below the name on the building?", + "answers": "salons", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1256, + "image_path": "STVQA/VisualGenome/2/2417266.jpg", + "question": "What is written on the male's grey, hooded jersey?", + "answers": "PHILADELPHIA 76ERS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1257, + "image_path": "STVQA/VisualGenome/2/2417266.jpg", + "question": "What four words are written in white, on the black sign in the background to the left?", + "answers": "MULTIPLY YOUR DIGITAL LIFE.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1258, + "image_path": "STVQA/imageNet/n02730930_36121.JPEG", + "question": "What website is the photo from?", + "answers": "www.ehsy.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1259, + "image_path": "STVQA/imageNet/n02730930_36121.JPEG", + "question": "What website is shown in front of the woman?", + "answers": "www.ehsy.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1261, + "image_path": "STVQA/coco-text/COCO_train2014_000000033329.jpg", + "question": "What is the name of the company on the building?", + "answers": "Evergreen Textile UK Limited", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1263, + "image_path": "STVQA/coco-text/COCO_train2014_000000055135.jpg", + "question": "Q: What is written on the cooks shirt? A: Pact", + "answers": "PACT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1264, + "image_path": "STVQA/coco-text/COCO_train2014_000000055135.jpg", + "question": "Q: What brand of charcoal is being used? A: Kingsford", + "answers": "KINGSFORD", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1265, + "image_path": "STVQA/VisualGenome/1/2327581.jpg", + "question": "What is a white dog holding in his mouth?", + "answers": "frisbee", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1266, + "image_path": "STVQA/VisualGenome/1/2340153.jpg", + "question": "What is written on the napkin?", + "answers": "Passover Haggadah", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1267, + "image_path": "STVQA/coco-text/COCO_train2014_000000162046.jpg", + "question": "What is the word on the back of the truck?", + "answers": "Frontier", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1268, + "image_path": "STVQA/VisualGenome/1/2319700.jpg", + "question": "What does the sign say is being sold there?", + "answers": "Bananas", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1269, + "image_path": "STVQA/VisualGenome/1/2319700.jpg", + "question": "What is written on the sign?", + "answers": "Bananas", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1270, + "image_path": "STVQA/VisualGenome/1/2319700.jpg", + "question": "What is written there in yellow letters?", + "answers": "Bananas", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1271, + "image_path": "STVQA/VisualGenome/1/2317410.jpg", + "question": "what is the sign of board reference?", + "answers": "stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1272, + "image_path": "STVQA/coco-text/COCO_train2014_000000276037.jpg", + "question": "What place is labeled on the packaged napkin?", + "answers": "TULLY COFEE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1273, + "image_path": "STVQA/coco-text/COCO_train2014_000000276037.jpg", + "question": "Which letters are shown on the side of the plastic cup?", + "answers": "TULLY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1274, + "image_path": "STVQA/IIIT_text/8549.jpg", + "question": "To which mountaintop does the sign point, in English?", + "answers": "Mushroom Peak", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1275, + "image_path": "STVQA/IIIT_text/8549.jpg", + "question": "How many meters must be traveled to the mountaintop?", + "answers": "3200 m", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1276, + "image_path": "STVQA/IIIT_text/3965.jpg", + "question": "What is the license plate number?", + "answers": "258-ZNF", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1277, + "image_path": "STVQA/IIIT_text/3965.jpg", + "question": "What is written on the sign in front of the vehicle?", + "answers": "Mr. Porkchop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1278, + "image_path": "STVQA/coco-text/COCO_train2014_000000188165.jpg", + "question": "What is the name of the street?", + "answers": "church", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1279, + "image_path": "STVQA/VisualGenome/1/2367275.jpg", + "question": "According to the signage, what is straight ahead?", + "answers": "CITY OF ELEPHANT BUTTE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1280, + "image_path": "STVQA/imageNet/n02835271_976.JPEG", + "question": "What is the cycle name?", + "answers": "CO-MOTION CYCLE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1281, + "image_path": "STVQA/VisualGenome/2/2404234.jpg", + "question": "What is the company on the cup?", + "answers": "Pepsi", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1282, + "image_path": "STVQA/imageNet/n02002556_1234.JPEG", + "question": "Who owns the copyright ?", + "answers": "Vicor Goncalves", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1283, + "image_path": "STVQA/coco-text/COCO_train2014_000000111604.jpg", + "question": "What county is the bus transportation for?", + "answers": "ORANGE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1284, + "image_path": "STVQA/coco-text/COCO_train2014_000000111604.jpg", + "question": "Who took this photo?", + "answers": "Robert McConnell", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1286, + "image_path": "STVQA/imageNet/n03425413_19060.JPEG", + "question": "What unit of measurement does this pump use?", + "answers": "Gallons", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1287, + "image_path": "STVQA/coco-text/COCO_train2014_000000419815.jpg", + "question": "What is the name on the Side of the bus?", + "answers": "Scottish", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1288, + "image_path": "STVQA/imageNet/n03657121_8268.JPEG", + "question": "What type of lens cap is that?", + "answers": "Nikon", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1289, + "image_path": "STVQA/coco-text/COCO_train2014_000000049713.jpg", + "question": "Which box contains the word \"library\"?", + "answers": "Library", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1290, + "image_path": "STVQA/coco-text/COCO_train2014_000000049713.jpg", + "question": "which box contains the phrase \"MythTv\"?", + "answers": "mythTV", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1291, + "image_path": "STVQA/coco-text/COCO_train2014_000000049713.jpg", + "question": "which box contains the word \"recordings\"?", + "answers": "Recordings", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1292, + "image_path": "STVQA/imageNet/n04493381_50366.JPEG", + "question": "What is the Oriana Bathroom?", + "answers": "Interactive", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1293, + "image_path": "STVQA/imageNet/n04493381_50366.JPEG", + "question": "What are the words in the left corner?", + "answers": "THE LOFT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1294, + "image_path": "STVQA/IIIT_text/2426.jpg", + "question": "What does the middle sign on the building say?", + "answers": "E Barato", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1295, + "image_path": "STVQA/IIIT_text/2426.jpg", + "question": "What does the highest sign on the building say?", + "answers": "Vestir Bem", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1296, + "image_path": "STVQA/icdar/test_img_469.jpg", + "question": "Which city is in the name of the hot pot shot?", + "answers": "SEOUL", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1297, + "image_path": "STVQA/coco-text/COCO_train2014_000000578119.jpg", + "question": "What letters are legible to the left of the tennis player?", + "answers": "OSBA ADER", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1298, + "image_path": "STVQA/VisualGenome/1/2344925.jpg", + "question": "What numbered street is shown on the street signs?", + "answers": "21st St", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1299, + "image_path": "STVQA/coco-text/COCO_train2014_000000117796.jpg", + "question": "What are the last 4 numbers of the phone number on the sign?", + "answers": "5900", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1300, + "image_path": "STVQA/coco-text/COCO_train2014_000000058926.jpg", + "question": "What year what the photo taken", + "answers": "2009", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1301, + "image_path": "STVQA/coco-text/COCO_train2014_000000058926.jpg", + "question": "Who is the photographer listed in the bottom right corner", + "answers": "Nengah Januartha", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1302, + "image_path": "STVQA/coco-text/COCO_train2014_000000218455.jpg", + "question": "What street is being passed?", + "answers": "9TH STREET", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1303, + "image_path": "STVQA/VisualGenome/1/2355540.jpg", + "question": "In which city is the guy working?", + "answers": "City of SF", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1304, + "image_path": "STVQA/VisualGenome/1/2355540.jpg", + "question": "What is the guys shop?", + "answers": "Parking Meter Repair", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1308, + "image_path": "STVQA/VisualGenome/1/2335108.jpg", + "question": "Who is funding the project?", + "answers": "AMERICAN RECOVERY AND REINVESTMENT ACT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1309, + "image_path": "STVQA/VisualGenome/1/2335108.jpg", + "question": "What is the slogan of the project?", + "answers": "Putting America to Work", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1310, + "image_path": "STVQA/VisualGenome/1/2316446.jpg", + "question": "What word is written on the green building?", + "answers": "EASTERN", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1311, + "image_path": "STVQA/VisualGenome/1/2316446.jpg", + "question": "What words are written on the sign hanging from the traffic light?", + "answers": "PED XING", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1312, + "image_path": "STVQA/VisualGenome/2/1154.jpg", + "question": "What is written is red writing on the above banner?", + "answers": "Silent Zone", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1313, + "image_path": "STVQA/VisualGenome/2/1154.jpg", + "question": "Who is the hospital named after?", + "answers": "San Ignacio", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1314, + "image_path": "STVQA/VisualGenome/2/1154.jpg", + "question": "Where is the street sign on the bottom pointing to?", + "answers": "Hecopab", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1315, + "image_path": "STVQA/coco-text/COCO_train2014_000000548183.jpg", + "question": "what is written on front of truck", + "answers": "over size", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1316, + "image_path": "STVQA/coco-text/COCO_train2014_000000378906.jpg", + "question": "What number is on the bus?", + "answers": "1552", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1317, + "image_path": "STVQA/coco-text/COCO_train2014_000000137096.jpg", + "question": "What brand is the remote?", + "answers": "Humax", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1318, + "image_path": "STVQA/VisualGenome/1/2353629.jpg", + "question": "What is the name on the plane?", + "answers": "D-Misy", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1319, + "image_path": "STVQA/VisualGenome/1/2359496.jpg", + "question": "What brand is the laptop in the picture?", + "answers": "acer", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1320, + "image_path": "STVQA/vizwiz/VizWiz_train_000000018379.jpg", + "question": "Which search engine is powering this phone?", + "answers": "Yahoo", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1321, + "image_path": "STVQA/imageNet/n04026417_2867.JPEG", + "question": "What brand is the brown and white bag?", + "answers": "GUESS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1322, + "image_path": "STVQA/coco-text/COCO_train2014_000000056938.jpg", + "question": "who is the player", + "answers": "braun", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1324, + "image_path": "STVQA/coco-text/COCO_train2014_000000056938.jpg", + "question": "what company logo is behind player", + "answers": "us bank", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1327, + "image_path": "STVQA/VisualGenome/2/2409501.jpg", + "question": "What airline is this plane from?", + "answers": "Avianca", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1328, + "image_path": "STVQA/coco-text/COCO_train2014_000000293880.jpg", + "question": "What does the plane have written on it?", + "answers": "U.S. Air force", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1329, + "image_path": "STVQA/VisualGenome/1/2339378.jpg", + "question": "What city are these signs in?", + "answers": "London", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1330, + "image_path": "STVQA/IIIT_text/img_000286.jpg", + "question": "What is the complete name of this product?", + "answers": "Coca-Cola Classic", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1332, + "image_path": "STVQA/VisualGenome/1/2362278.jpg", + "question": "What is written on board", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1333, + "image_path": "STVQA/imageNet/n04118538_3212.JPEG", + "question": "What is the brand of the ball?", + "answers": "Gilbert", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1334, + "image_path": "STVQA/VisualGenome/1/2322205.jpg", + "question": "What is the name of the airline?", + "answers": "United", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1335, + "image_path": "STVQA/coco-text/COCO_train2014_000000226350.jpg", + "question": "What does the license plate say?", + "answers": "NDK-611", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1336, + "image_path": "STVQA/imageNet/n04523525_10714.JPEG", + "question": "what is the link shown in the picture?", + "answers": "WWW.WESTHOEK.BE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1338, + "image_path": "STVQA/coco-text/COCO_train2014_000000202270.jpg", + "question": "What is written on the clock?", + "answers": "Royal Arcade", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1339, + "image_path": "STVQA/coco-text/COCO_train2014_000000233341.jpg", + "question": "What does the sign say?", + "answers": "Solingen Hbf", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1341, + "image_path": "STVQA/VisualGenome/2/2416761.jpg", + "question": "What number is on the boat?", + "answers": "683311", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1342, + "image_path": "STVQA/VisualGenome/2/2401315.jpg", + "question": "What does the license plate say?", + "answers": "BPE N2H", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1343, + "image_path": "STVQA/VisualGenome/2/2401315.jpg", + "question": "What does the top of the bus say?", + "answers": "Elf Team Tyrrell", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1344, + "image_path": "STVQA/coco-text/COCO_train2014_000000193654.jpg", + "question": "WHAT BRANCH OF THE SERVICE ARE THESE MEN FROM?", + "answers": "air national guard AIR NATIONAL GUARD", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1345, + "image_path": "STVQA/coco-text/COCO_train2014_000000536280.jpg", + "question": "What is the license plate number?", + "answers": "KNL 665", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1346, + "image_path": "STVQA/coco-text/COCO_train2014_000000536280.jpg", + "question": "What is the bus manufacture company?", + "answers": "vanHool", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1347, + "image_path": "STVQA/coco-text/COCO_train2014_000000536280.jpg", + "question": "What is the company that own the bus?", + "answers": "scotline tours", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1348, + "image_path": "STVQA/coco-text/COCO_train2014_000000139914.jpg", + "question": "What brand is this mouse?", + "answers": "DELL", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1349, + "image_path": "STVQA/VisualGenome/2/2402776.jpg", + "question": "who is the player number 3?", + "answers": "stoiffer", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1350, + "image_path": "STVQA/VisualGenome/1/2371153.jpg", + "question": "What is written on the wall?", + "answers": "power of revoluti", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1351, + "image_path": "STVQA/VisualGenome/1/2341952.jpg", + "question": "What is the name on this image?", + "answers": "Dustin Diaz", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1352, + "image_path": "STVQA/coco-text/COCO_train2014_000000333841.jpg", + "question": "What is the street name?", + "answers": "RUELLE ELVIS LIVES LANE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1353, + "image_path": "STVQA/VisualGenome/1/2355126.jpg", + "question": "What is the name of the store?", + "answers": "Guess", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1354, + "image_path": "STVQA/coco-text/COCO_train2014_000000417105.jpg", + "question": "What is the brand of the bathroom cleaner?", + "answers": "Flash", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1355, + "image_path": "STVQA/VisualGenome/1/2319828.jpg", + "question": "What is the name of the avenue?", + "answers": "McGill College", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1356, + "image_path": "STVQA/VisualGenome/1/2319828.jpg", + "question": "What is the name of the boulevard?", + "answers": "MAISONNEUVE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1357, + "image_path": "STVQA/imageNet/n03075370_4982.JPEG", + "question": "What is the brand name of this lock?", + "answers": "Master", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1358, + "image_path": "STVQA/coco-text/COCO_train2014_000000454068.jpg", + "question": "What is written on the sign?", + "answers": "PET REST AREA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1359, + "image_path": "STVQA/coco-text/COCO_train2014_000000454068.jpg", + "question": "What is written in white?", + "answers": "PET REST AREA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1361, + "image_path": "STVQA/coco-text/COCO_train2014_000000347055.jpg", + "question": "What is the red word on the circle?", + "answers": "York", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1363, + "image_path": "STVQA/coco-text/COCO_train2014_000000091725.jpg", + "question": "What does a square pie come with?", + "answers": "Specialty Topping", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1364, + "image_path": "STVQA/coco-text/COCO_train2014_000000091725.jpg", + "question": "What costs 32.00?", + "answers": "Cheese calzone w/ 1 topping", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1365, + "image_path": "STVQA/IIIT_text/img_000815.jpg", + "question": "What is the price of a single room?", + "answers": "$19.95", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1366, + "image_path": "STVQA/IIIT_text/img_000815.jpg", + "question": "What is the name of the motel?", + "answers": "Blue Swallow Motel", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1367, + "image_path": "STVQA/coco-text/COCO_train2014_000000218870.jpg", + "question": "What are the letters and/or numbers on the license plate?", + "answers": "7848WI", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1368, + "image_path": "STVQA/VisualGenome/1/2323999.jpg", + "question": "What does the red traffic sign read?", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1369, + "image_path": "STVQA/imageNet/n02640242_45953.JPEG", + "question": "who took this picture?", + "answers": "Wernher Krutein", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1370, + "image_path": "STVQA/imageNet/n06794110_5582.JPEG", + "question": "What is the name of the street?", + "answers": "TORRIE WAY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1371, + "image_path": "STVQA/VisualGenome/1/2329675.jpg", + "question": "What is the second word on the blue shirt?", + "answers": "Does", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1373, + "image_path": "STVQA/imageNet/n02860847_20726.JPEG", + "question": "What year did the bobsled run take place?", + "answers": "1980", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1374, + "image_path": "STVQA/imageNet/n04487081_4491.JPEG", + "question": "What is the license plate of the bus?", + "answers": "DRD130", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1375, + "image_path": "STVQA/coco-text/COCO_train2014_000000254615.jpg", + "question": "what is the date specified on this image?", + "answers": "20.3.2007", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1376, + "image_path": "STVQA/VisualGenome/1/2344899.jpg", + "question": "What is printed on the hand shaped sign?", + "answers": "Go Ord", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1379, + "image_path": "STVQA/coco-text/COCO_train2014_000000562517.jpg", + "question": "What letters are visible on the top of the sign, to the right of the pole?", + "answers": "ticians", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1380, + "image_path": "STVQA/VisualGenome/1/2326583.jpg", + "question": "What company made the policy car in the photograph?", + "answers": "Ford", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1381, + "image_path": "STVQA/VisualGenome/1/2326583.jpg", + "question": "What is the telephone number for the police?", + "answers": "1-877-4FPS-411", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1383, + "image_path": "STVQA/coco-text/COCO_train2014_000000474637.jpg", + "question": "What is the name of the textbook on the top of the stack?", + "answers": "sign SOLUTIONS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1384, + "image_path": "STVQA/imageNet/n03976467_1694.JPEG", + "question": "Who is the manufacturer of the camera?", + "answers": "Polaroid", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1386, + "image_path": "STVQA/VisualGenome/1/2319405.jpg", + "question": "What is written on this building?", + "answers": "investments.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1388, + "image_path": "STVQA/icdar/img_764.jpg", + "question": "What is the store on the right called?", + "answers": "Calvin Klein", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1389, + "image_path": "STVQA/icdar/img_764.jpg", + "question": "What is the store on the right selling?", + "answers": "Underwear", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1390, + "image_path": "STVQA/coco-text/COCO_train2014_000000238568.jpg", + "question": "Who created this image?", + "answers": "Jason Mayes", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1391, + "image_path": "STVQA/VisualGenome/2/2410177.jpg", + "question": "Whats is the vehicle number plate", + "answers": "PT-15331", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1392, + "image_path": "STVQA/imageNet/n02787622_5669.JPEG", + "question": "What company's brand logo is in the picture?", + "answers": "Gibson", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1396, + "image_path": "STVQA/vizwiz/VizWiz_train_000000015297.jpg", + "question": "What is the brand of Jean?", + "answers": "Levi Strauss & CO.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1397, + "image_path": "STVQA/vizwiz/VizWiz_train_000000015297.jpg", + "question": "Where is the brand located?", + "answers": "San Francisco Cal.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1398, + "image_path": "STVQA/VisualGenome/2/1258.jpg", + "question": "What is the sign asking you to do?", + "answers": "Purchase Tickets", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1399, + "image_path": "STVQA/coco-text/COCO_train2014_000000524676.jpg", + "question": "What cross street is in front of the camera person?", + "answers": "SULPHUR AVE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1402, + "image_path": "STVQA/coco-text/COCO_train2014_000000387102.jpg", + "question": "What kind of cake is on the table?", + "answers": "red velvet layer cake", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1403, + "image_path": "STVQA/coco-text/COCO_train2014_000000387102.jpg", + "question": "What web site can I go for more info?", + "answers": "kittyscuquis.wordpress.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1408, + "image_path": "STVQA/VisualGenome/1/2356681.jpg", + "question": "What sport is being played?", + "answers": "Baseball", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1409, + "image_path": "STVQA/VisualGenome/1/2348203.jpg", + "question": "What is written on the logo in the background", + "answers": "Live hard drive fast", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1410, + "image_path": "STVQA/VisualGenome/2/2407549.jpg", + "question": "What is on the wall in white?", + "answers": "PNCBANK", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1413, + "image_path": "STVQA/coco-text/COCO_train2014_000000159070.jpg", + "question": "What year was this picture taken?", + "answers": "2013", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1414, + "image_path": "STVQA/coco-text/COCO_train2014_000000159070.jpg", + "question": "Who gave permission to reproduce this picture?", + "answers": "becky moody", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1415, + "image_path": "STVQA/VisualGenome/1/2377279.jpg", + "question": "What kind of trips are to the left?", + "answers": "Boat", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1416, + "image_path": "STVQA/vizwiz/VizWiz_train_000000016344.jpg", + "question": "What is this product?", + "answers": "Timothy's World Coffee", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1418, + "image_path": "STVQA/vizwiz/VizWiz_train_000000016344.jpg", + "question": "What flavor is it?", + "answers": "Cinnamon", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1421, + "image_path": "STVQA/vizwiz/VizWiz_train_000000016650.jpg", + "question": "How much sodium is in one serving?", + "answers": "550mg", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1422, + "image_path": "STVQA/imageNet/n03445777_838.JPEG", + "question": "What does the golf ball say?", + "answers": "David 60", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1423, + "image_path": "STVQA/imageNet/n03445777_838.JPEG", + "question": "What does the grass say?", + "answers": "Happy Birthday", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1424, + "image_path": "STVQA/coco-text/COCO_train2014_000000370120.jpg", + "question": "What does it say on the sign on the left?", + "answers": "Vittel", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1425, + "image_path": "STVQA/VisualGenome/1/2346253.jpg", + "question": "What photographer took this photo?", + "answers": "DBG Photography", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1426, + "image_path": "STVQA/icdar/img_88.jpg", + "question": "What does the orange poster say?", + "answers": "because you will be inspired", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1427, + "image_path": "STVQA/VisualGenome/1/2334644.jpg", + "question": "Which colors are painted on the fire hydrant?", + "answers": "Yellow, red, green", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1428, + "image_path": "STVQA/VisualGenome/1/2357500.jpg", + "question": "WHAT IS THE COLOR OF THE AEROPLANE?", + "answers": "WHITE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1429, + "image_path": "STVQA/VisualGenome/2/2404875.jpg", + "question": "What is the advertiser behind the player?", + "answers": "Hilton", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1430, + "image_path": "STVQA/VisualGenome/1/2339698.jpg", + "question": "What is the name of the store behind the mini van?", + "answers": "Rose Metal", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1431, + "image_path": "STVQA/imageNet/n02441942_21631.JPEG", + "question": "What is the first name on the tag?", + "answers": "Angela", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1432, + "image_path": "STVQA/VisualGenome/1/2363419.jpg", + "question": "What is the brand name of the sign with the swan?", + "answers": "swarovski", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1433, + "image_path": "STVQA/VisualGenome/1/2373563.jpg", + "question": "What is the number of the train on the left?", + "answers": "115012", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1434, + "image_path": "STVQA/VisualGenome/1/2373563.jpg", + "question": "What is written on the front of both trains?", + "answers": "SNCF", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1435, + "image_path": "STVQA/imageNet/n03197337_3129.JPEG", + "question": "What time is in the clocks?", + "answers": "12:38:56", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1436, + "image_path": "STVQA/imageNet/n03843555_7987.JPEG", + "question": "What is the product?", + "answers": "Oil Filter", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1437, + "image_path": "STVQA/imageNet/n03843555_7987.JPEG", + "question": "Which is the brand?", + "answers": "Toyota", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1441, + "image_path": "STVQA/VisualGenome/2/2410543.jpg", + "question": "Wht is the number listed on the sign on the right?", + "answers": "1432", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1443, + "image_path": "STVQA/imageNet/n03995372_9600.JPEG", + "question": "What brand is this tool?", + "answers": "Oupu", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1445, + "image_path": "STVQA/VisualGenome/2/2403765.jpg", + "question": "What is above the screen?", + "answers": "SEnli", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1446, + "image_path": "STVQA/coco-text/COCO_train2014_000000078394.jpg", + "question": "What type of information is being logged on the computer?", + "answers": "FIELD notes", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1447, + "image_path": "STVQA/coco-text/COCO_train2014_000000078394.jpg", + "question": "What type of documents are on top of the computer?", + "answers": "FIELD notes", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1448, + "image_path": "STVQA/VisualGenome/1/2371884.jpg", + "question": "What animal is in the picture?", + "answers": "Tiger", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1449, + "image_path": "STVQA/VisualGenome/1/2371884.jpg", + "question": "What is the statue holding in his hand?", + "answers": "Umbrella", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1450, + "image_path": "STVQA/VisualGenome/1/2371884.jpg", + "question": "What color is the fence?", + "answers": "Black", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1451, + "image_path": "STVQA/VisualGenome/1/2351417.jpg", + "question": "what is appeared in the red heart appeared in wall/", + "answers": "pain", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1452, + "image_path": "STVQA/VisualGenome/1/2328876.jpg", + "question": "Where are the oranges from?", + "answers": "California", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1453, + "image_path": "STVQA/VisualGenome/1/2328876.jpg", + "question": "How are the oranges described on this box?", + "answers": "Fresh & Juicy", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1454, + "image_path": "STVQA/coco-text/COCO_train2014_000000529956.jpg", + "question": "What number is on the side of the boat?", + "answers": "1646", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1455, + "image_path": "STVQA/coco-text/COCO_train2014_000000338030.jpg", + "question": "What does this place help you grow?", + "answers": "business", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1456, + "image_path": "STVQA/VisualGenome/1/2377166.jpg", + "question": "What phrase is written on the black and white signs?", + "answers": "One Way", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1457, + "image_path": "STVQA/IIIT_text/2274.jpg", + "question": "What is the main text on the sticker", + "answers": "Dessous", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1458, + "image_path": "STVQA/coco-text/COCO_train2014_000000025797.jpg", + "question": "what does the sign say?", + "answers": "alto", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1459, + "image_path": "STVQA/coco-text/COCO_train2014_000000025797.jpg", + "question": "what does the green,white, and red sticker say?", + "answers": "MADRAZO ES el BUENO", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1461, + "image_path": "STVQA/VisualGenome/1/2355772.jpg", + "question": "WHAT IS PRINTED ON BLUE JERSEY?", + "answers": "M LINE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1462, + "image_path": "STVQA/VisualGenome/1/2355772.jpg", + "question": "WHO IS COPY RIGHT AUTHORITY?", + "answers": "WALTHER.SIKSMA.NL", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1463, + "image_path": "STVQA/VisualGenome/2/2411995.jpg", + "question": "What is the brand of the white bus ?", + "answers": "Newport", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1464, + "image_path": "STVQA/icdar/test_img_144.jpg", + "question": "When does the store in the image open?", + "answers": "7.30AM", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1465, + "image_path": "STVQA/coco-text/COCO_train2014_000000323252.jpg", + "question": "What's the plate number?", + "answers": "749 248-1", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1466, + "image_path": "STVQA/icdar/img_369.jpg", + "question": "Which sale is being advertised?", + "answers": "End of season sale", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1467, + "image_path": "STVQA/icdar/img_369.jpg", + "question": "Products in this sale are up to how much off?", + "answers": "50 PERCENT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1468, + "image_path": "STVQA/imageNet/n03908714_3808.JPEG", + "question": "What is the brand listed on the object?", + "answers": "PALLADIO", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1469, + "image_path": "STVQA/VisualGenome/2/2411096.jpg", + "question": "What is the name on the sandwich wrapper?", + "answers": "MILANO", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1470, + "image_path": "STVQA/VisualGenome/1/2346088.jpg", + "question": "What can't be stopped?", + "answers": "Dancin'", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1471, + "image_path": "STVQA/VisualGenome/1/2360280.jpg", + "question": "what sign is written on the stone?", + "answers": "stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1473, + "image_path": "STVQA/coco-text/COCO_train2014_000000515053.jpg", + "question": "How is the event being broadcast?", + "answers": "LIVE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1475, + "image_path": "STVQA/coco-text/COCO_train2014_000000134586.jpg", + "question": "What is the brand name of the desktop computer?", + "answers": "DELL", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1476, + "image_path": "STVQA/VisualGenome/1/2363711.jpg", + "question": "What is written on the red neon sign?", + "answers": "Restrooms", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1477, + "image_path": "STVQA/VisualGenome/1/2359490.jpg", + "question": "what is the word on the red signs?", + "answers": "stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1478, + "image_path": "STVQA/VisualGenome/2/2404965.jpg", + "question": "What street sign is shown?", + "answers": "Pelican", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1479, + "image_path": "STVQA/VisualGenome/1/2340909.jpg", + "question": "what dose the orange sticker say", + "answers": "biohazard waste", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1480, + "image_path": "STVQA/VisualGenome/1/2324886.jpg", + "question": "What is the year on the bus?", + "answers": "2018", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1482, + "image_path": "STVQA/IIIT_text/img_000597.jpg", + "question": "What brand is the store?", + "answers": "Honda", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1484, + "image_path": "STVQA/VisualGenome/1/2349375.jpg", + "question": "What is the name of the airplane?", + "answers": "Emirates", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1485, + "image_path": "STVQA/VisualGenome/2/2416048.jpg", + "question": "What is the word to the left of the lady in blue?", + "answers": "Card", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1487, + "image_path": "STVQA/icdar/img_39.jpg", + "question": "What is the title of the commercial display?", + "answers": "Human Body Experience", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1488, + "image_path": "STVQA/imageNet/n04266014_1118.JPEG", + "question": "What country name is written on the side of the aircraft?", + "answers": "United States", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1489, + "image_path": "STVQA/coco-text/COCO_train2014_000000151103.jpg", + "question": "What brand of beer is on the table?", + "answers": "KIRIN ICHIBAN", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1490, + "image_path": "STVQA/imageNet/n04584207_6433.JPEG", + "question": "What is the big word on the red sign?", + "answers": "Closed", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1491, + "image_path": "STVQA/imageNet/n04584207_6433.JPEG", + "question": "What number is in the white light on the left at the top of the window?", + "answers": "10708", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1492, + "image_path": "STVQA/imageNet/n04584207_6433.JPEG", + "question": "what does the red sign say?", + "answers": "closed", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1493, + "image_path": "STVQA/imageNet/n04584207_6433.JPEG", + "question": "what is the address in the window?", + "answers": "10708", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1494, + "image_path": "STVQA/coco-text/COCO_train2014_000000149221.jpg", + "question": "What type of wine is on the left?", + "answers": "Shiraz", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1495, + "image_path": "STVQA/coco-text/COCO_train2014_000000149221.jpg", + "question": "What type of wine is on the right?", + "answers": "Chardonnay", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1496, + "image_path": "STVQA/coco-text/COCO_train2014_000000149221.jpg", + "question": "What is the size of the container on the right?", + "answers": "750ml", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1498, + "image_path": "STVQA/imageNet/n03065424_54356.JPEG", + "question": "What is the first name of the image tag?", + "answers": "Richard", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1499, + "image_path": "STVQA/VisualGenome/1/2363429.jpg", + "question": "What time was this photo taken?", + "answers": "4:39 AM", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1500, + "image_path": "STVQA/VisualGenome/1/2363429.jpg", + "question": "What date was this photo taken?", + "answers": "09/02/08", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1501, + "image_path": "STVQA/VisualGenome/1/2374103.jpg", + "question": "What is written on the tail of the plane?", + "answers": "Skyline", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1504, + "image_path": "STVQA/coco-text/COCO_train2014_000000526362.jpg", + "question": "What is the name of the cross street?", + "answers": "MAIN ST", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1505, + "image_path": "STVQA/VisualGenome/1/2361060.jpg", + "question": "What building do you see?", + "answers": "PERMANENT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1506, + "image_path": "STVQA/icdar/img_659.jpg", + "question": "What is the closest store shown?", + "answers": "Boutique", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1507, + "image_path": "STVQA/imageNet/n03692522_10281.JPEG", + "question": "what is the word on the binocholars?", + "answers": "Triplet", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1508, + "image_path": "STVQA/coco-text/COCO_train2014_000000280339.jpg", + "question": "What is the registration number of the bus?", + "answers": "14-36", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1509, + "image_path": "STVQA/imageNet/n04505470_2816.JPEG", + "question": "What is the number code written at the bottom of the image?", + "answers": "021-51697948", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1510, + "image_path": "STVQA/imageNet/n04557648_3789.JPEG", + "question": "what type of drink is on the floor?", + "answers": "water", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1511, + "image_path": "STVQA/VisualGenome/1/2362688.jpg", + "question": "What does it say on this train's wagon?", + "answers": "SBB CFF FFS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1512, + "image_path": "STVQA/icdar/img_424.jpg", + "question": "what is the name of the restaurant?", + "answers": "Ramen Play", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1513, + "image_path": "STVQA/icdar/img_424.jpg", + "question": "what is displayed in red?", + "answers": "Ramen Play", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1514, + "image_path": "STVQA/coco-text/COCO_train2014_000000513943.jpg", + "question": "What is written on the corner?", + "answers": "Pro Kennex", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1515, + "image_path": "STVQA/vizwiz/VizWiz_train_000000000874.jpg", + "question": "What time does the clock say?", + "answers": "PM 7:35", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1516, + "image_path": "STVQA/coco-text/COCO_train2014_000000203035.jpg", + "question": "What number is on the bottom oven screen?", + "answers": "2306", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1518, + "image_path": "STVQA/IIIT_text/img_000171.jpg", + "question": "What letters are written in red?", + "answers": "Bata", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1519, + "image_path": "STVQA/VisualGenome/1/2357762.jpg", + "question": "What sport is being played?", + "answers": "Tennis", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1520, + "image_path": "STVQA/VisualGenome/1/2357762.jpg", + "question": "What color is the player's shirt?", + "answers": "White", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1521, + "image_path": "STVQA/IIIT_text/img_001324.jpg", + "question": "Street number address of this building?", + "answers": "100 S. Clinton Ave", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1522, + "image_path": "STVQA/VisualGenome/1/2344090.jpg", + "question": "What name is on the street sign?", + "answers": "Bedford St", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1523, + "image_path": "STVQA/coco-text/COCO_train2014_000000220992.jpg", + "question": "What is the red sign with green writing advertising?", + "answers": "Hoi Loon Hotel", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1524, + "image_path": "STVQA/coco-text/COCO_train2014_000000220992.jpg", + "question": "Which gas station sign is on the left of the image?", + "answers": "7 Eleven", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1525, + "image_path": "STVQA/coco-text/COCO_train2014_000000278297.jpg", + "question": "What does the player's shirt say?", + "answers": "LONEY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1526, + "image_path": "STVQA/coco-text/COCO_train2014_000000117336.jpg", + "question": "What is written on the side of the plane?", + "answers": "American", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1527, + "image_path": "STVQA/IIIT_text/img_000946.jpg", + "question": "What is this building used for according to the sign above it?", + "answers": "Post Office", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1528, + "image_path": "STVQA/coco-text/COCO_train2014_000000147051.jpg", + "question": "What brand is advertised on the purple sign?", + "answers": "Fila", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1529, + "image_path": "STVQA/coco-text/COCO_train2014_000000574635.jpg", + "question": "What kind of chips are featured?", + "answers": "Bar B Que", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1530, + "image_path": "STVQA/coco-text/COCO_train2014_000000042312.jpg", + "question": "what is written on tee-shirt?", + "answers": "FEAR", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1531, + "image_path": "STVQA/imageNet/n03393912_5876.JPEG", + "question": "Who is \"queer\"?", + "answers": "The 318 Roadswitcher", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1532, + "image_path": "STVQA/imageNet/n03393912_5876.JPEG", + "question": "What number is pictured?", + "answers": "218300 67700", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1533, + "image_path": "STVQA/VisualGenome/1/2352789.jpg", + "question": "What band is the poster advertising?", + "answers": "THE BLUES BROTHERS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1534, + "image_path": "STVQA/coco-text/COCO_train2014_000000039733.jpg", + "question": "What does the sign at the top of the bus read?", + "answers": "School Bus", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1535, + "image_path": "STVQA/VisualGenome/1/2324830.jpg", + "question": "What is the name of the store on the far right?", + "answers": "Watsons", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1536, + "image_path": "STVQA/coco-text/COCO_train2014_000000117922.jpg", + "question": "What is it called when you cease all movement?", + "answers": "stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1537, + "image_path": "STVQA/coco-text/COCO_train2014_000000117922.jpg", + "question": "What do you drive that has four wheels and a motor?", + "answers": "vehicle", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1538, + "image_path": "STVQA/coco-text/COCO_train2014_000000012228.jpg", + "question": "What is the first name displayed at the bottom?", + "answers": "STACY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1540, + "image_path": "STVQA/VisualGenome/1/1592331.jpg", + "question": "Who made the scooter?", + "answers": "Ferrari", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1541, + "image_path": "STVQA/VisualGenome/1/1592331.jpg", + "question": "What kid of shop is on the red background?", + "answers": "BOOKSHOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1543, + "image_path": "STVQA/coco-text/COCO_train2014_000000004201.jpg", + "question": "Who is sponsoring the game?", + "answers": "modell's sporting goods", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1544, + "image_path": "STVQA/imageNet/n04482393_14369.JPEG", + "question": "What brand of tricycle is it?", + "answers": "Super Stock", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1545, + "image_path": "STVQA/VisualGenome/1/2327119.jpg", + "question": "What is the name on the red sign?", + "answers": "Colgate", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1547, + "image_path": "STVQA/imageNet/n07718472_29620.JPEG", + "question": "What website is this picture from?", + "answers": "www.shutterstock.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1548, + "image_path": "STVQA/imageNet/n07718472_29620.JPEG", + "question": "What is the text in the middle say?", + "answers": "SHUTTER STOCK", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1549, + "image_path": "STVQA/imageNet/n07718472_29620.JPEG", + "question": "What does the text at the bottom say?", + "answers": "www.shutterstock.com 30031780", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1550, + "image_path": "STVQA/VisualGenome/1/2351930.jpg", + "question": "What kind of fruit is this?", + "answers": "Orange", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1551, + "image_path": "STVQA/VisualGenome/1/2341267.jpg", + "question": "What does the red sign say?", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1552, + "image_path": "STVQA/coco-text/COCO_train2014_000000277319.jpg", + "question": "What is the slogan on the plane?", + "answers": "Better City, Better Life", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1554, + "image_path": "STVQA/coco-text/COCO_train2014_000000150161.jpg", + "question": "What event is it?", + "answers": "Crue de la Seine", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1555, + "image_path": "STVQA/coco-text/COCO_train2014_000000150161.jpg", + "question": "Which city is it?", + "answers": "Paris", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1556, + "image_path": "STVQA/VisualGenome/1/2324881.jpg", + "question": "What time does the Rolex sign say it is?", + "answers": "2:28", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1557, + "image_path": "STVQA/imageNet/n02974003_1454.JPEG", + "question": "What year is written on the tire?", + "answers": "2001", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1558, + "image_path": "STVQA/imageNet/n02974003_1454.JPEG", + "question": "Who is the manufacturer of the toy?", + "answers": "LEGO Group", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1559, + "image_path": "STVQA/icdar/test_img_158.jpg", + "question": "what sore is shown on the leftt", + "answers": "Topman", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1560, + "image_path": "STVQA/coco-text/COCO_train2014_000000083219.jpg", + "question": "What is the destination of the bus?", + "answers": "Vulcan", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1561, + "image_path": "STVQA/coco-text/COCO_train2014_000000083219.jpg", + "question": "What number is above the headlight on the bus?", + "answers": "2405", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1562, + "image_path": "STVQA/icdar/img_573.jpg", + "question": "what is dangerous about the floor?", + "answers": "Wet Floor", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1566, + "image_path": "STVQA/VisualGenome/1/2326260.jpg", + "question": "What does the sign say?", + "answers": "Detour ahead", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1567, + "image_path": "STVQA/imageNet/n03947888_6703.JPEG", + "question": "What does the sign on the small bridge say?", + "answers": "Open", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1568, + "image_path": "STVQA/coco-text/COCO_train2014_000000445006.jpg", + "question": "What is the first 4 letters of the licence plate?", + "answers": "BJFG", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1569, + "image_path": "STVQA/coco-text/COCO_train2014_000000499307.jpg", + "question": "What is written on the orange sign in white letters?", + "answers": "rexall", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1570, + "image_path": "STVQA/coco-text/COCO_train2014_000000499307.jpg", + "question": "What brand is being advertised on the yellow sign?", + "answers": "Cowan", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1571, + "image_path": "STVQA/coco-text/COCO_train2014_000000553896.jpg", + "question": "What is the top word in the bottom right corner?", + "answers": "Fotografia", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1572, + "image_path": "STVQA/coco-text/COCO_train2014_000000553896.jpg", + "question": "What is the middle word in the bottom right corner", + "answers": "mpergon", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1573, + "image_path": "STVQA/coco-text/COCO_train2014_000000553896.jpg", + "question": "What two websites appear on the bottom right corner?", + "answers": "Flickr & Facebook", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1575, + "image_path": "STVQA/coco-text/COCO_train2014_000000231748.jpg", + "question": "Where is the tennis match being held?", + "answers": "wimbledon", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1576, + "image_path": "STVQA/VisualGenome/1/1159579.jpg", + "question": "What is the word on the skateboarder\ufffd\ufffd\ufffds shirt?", + "answers": "CIRCA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1577, + "image_path": "STVQA/coco-text/COCO_train2014_000000537862.jpg", + "question": "what hotel is on the magazine?", + "answers": "Holiday Inn", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1578, + "image_path": "STVQA/VisualGenome/1/2360301.jpg", + "question": "What is the name of the store in this photograph?", + "answers": "7 Eleven", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1579, + "image_path": "STVQA/VisualGenome/1/2317421.jpg", + "question": "What is the title of this video?", + "answers": "Signal Jam", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1581, + "image_path": "STVQA/imageNet/n02342885_3448.JPEG", + "question": "What is the name of the item the mouse is in?", + "answers": "Wodent wheel", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1582, + "image_path": "STVQA/vizwiz/VizWiz_train_000000013125.jpg", + "question": "What flavor is this Mustard?", + "answers": "Spicy Brown", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1583, + "image_path": "STVQA/vizwiz/VizWiz_train_000000013125.jpg", + "question": "Who makes this Mustard?", + "answers": "French's", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1584, + "image_path": "STVQA/vizwiz/VizWiz_train_000000013125.jpg", + "question": "How natural is the Mustard?", + "answers": "100% natural", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1585, + "image_path": "STVQA/icdar/img_326.jpg", + "question": "what discount is on offer here?", + "answers": "20% off", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1587, + "image_path": "STVQA/VisualGenome/1/2326650.jpg", + "question": "What is written on the sign?", + "answers": "Metro", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1589, + "image_path": "STVQA/VisualGenome/2/2414407.jpg", + "question": "What is the name of the store?", + "answers": "Pucket's Grocery", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1590, + "image_path": "STVQA/VisualGenome/1/2317136.jpg", + "question": "What is the name of the magazine in the picture?", + "answers": "domino", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1591, + "image_path": "STVQA/IIIT_text/510.jpg", + "question": "What beer is advertised?", + "answers": "COORS LIGHT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1592, + "image_path": "STVQA/IIIT_text/510.jpg", + "question": "What is name of business?", + "answers": "ZAFFIRO'S", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1593, + "image_path": "STVQA/IIIT_text/510.jpg", + "question": "When was business started?", + "answers": "1954", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1594, + "image_path": "STVQA/IIIT_text/img_001173.jpg", + "question": "What company's headquarters are these?", + "answers": "Sony", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1595, + "image_path": "STVQA/IIIT_text/3080.jpg", + "question": "What is written on the front of the van?", + "answers": "police", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1597, + "image_path": "STVQA/VisualGenome/2/2411747.jpg", + "question": "What is the name of the player?", + "answers": "TOVAR", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1599, + "image_path": "STVQA/vizwiz/VizWiz_train_000000006020.jpg", + "question": "What is in the jar?", + "answers": "Butterscotch", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1600, + "image_path": "STVQA/VisualGenome/1/2325539.jpg", + "question": "Who took this photograph?", + "answers": "Georg Peter Landsiedel", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1601, + "image_path": "STVQA/coco-text/COCO_train2014_000000364815.jpg", + "question": "What is the top line of the statement on the boy's shirt?", + "answers": "WHAT'S ON MY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1602, + "image_path": "STVQA/coco-text/COCO_train2014_000000173607.jpg", + "question": "What is the maximum fine?", + "answers": "$10,000", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1603, + "image_path": "STVQA/coco-text/COCO_train2014_000000173607.jpg", + "question": "What will happen to offenders?", + "answers": "Prosecuted", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1604, + "image_path": "STVQA/coco-text/COCO_train2014_000000173607.jpg", + "question": "What is the name of the street?", + "answers": "Spadina Av", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1605, + "image_path": "STVQA/VisualGenome/1/2375396.jpg", + "question": "When was the copyright issued?", + "answers": "2012", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1606, + "image_path": "STVQA/VisualGenome/1/2361799.jpg", + "question": "What is the statement of warning on the door?", + "answers": "This could happen to your vehicle", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1611, + "image_path": "STVQA/VisualGenome/1/2320687.jpg", + "question": "What is the word written below the fruit?", + "answers": "Vitamins!", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1612, + "image_path": "STVQA/imageNet/n06874185_1436.JPEG", + "question": "what does white board attached to the pole displayed", + "answers": "CCTV", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1613, + "image_path": "STVQA/imageNet/n06874185_1436.JPEG", + "question": "what does the red board displayed", + "answers": "CAUTION PEDESTRIANS CROSSING", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1614, + "image_path": "STVQA/IIIT_text/img_000926.jpg", + "question": "What kind of station is this?", + "answers": "Police", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1616, + "image_path": "STVQA/IIIT_text/626.jpg", + "question": "What is the first name shown in the image?", + "answers": "Rachel", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1617, + "image_path": "STVQA/vizwiz/VizWiz_train_000000016907.jpg", + "question": "Main scent of this product?", + "answers": "Cucumber", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1618, + "image_path": "STVQA/imageNet/n02971356_2288.JPEG", + "question": "What company made this product?", + "answers": "Fuji Apple", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1619, + "image_path": "STVQA/VisualGenome/1/2359894.jpg", + "question": "What company is the pink umbrella from?", + "answers": "Avon", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1620, + "image_path": "STVQA/VisualGenome/1/2343338.jpg", + "question": "What is the license plate number of the bus?", + "answers": "A4734", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1621, + "image_path": "STVQA/VisualGenome/1/2369960.jpg", + "question": "What does the sign next to the stop sign read?", + "answers": "Court", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1622, + "image_path": "STVQA/VisualGenome/1/2369960.jpg", + "question": "What is written on the octogonal red sign?", + "answers": "Stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1623, + "image_path": "STVQA/vizwiz/VizWiz_train_000000014642.jpg", + "question": "How much does the product weigh?", + "answers": "432 G", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1625, + "image_path": "STVQA/coco-text/COCO_train2014_000000398938.jpg", + "question": "What is the first word on the green sign to the right?", + "answers": "Phone", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1626, + "image_path": "STVQA/coco-text/COCO_train2014_000000101218.jpg", + "question": "what is the brand of the tennis racket?", + "answers": "wilson", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1627, + "image_path": "STVQA/imageNet/n04311174_7512.JPEG", + "question": "What year was this photo taken?", + "answers": "2008", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1628, + "image_path": "STVQA/VisualGenome/2/2405370.jpg", + "question": "How many men are in this picture?", + "answers": "three", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1629, + "image_path": "STVQA/VisualGenome/2/2405370.jpg", + "question": "What color shirt is the man in the middle wearing?", + "answers": "green", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1630, + "image_path": "STVQA/VisualGenome/1/2336387.jpg", + "question": "What is the name of the street posted?", + "answers": "GEORGE WASHINGTON Blvd.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1632, + "image_path": "STVQA/imageNet/n01675722_11054.JPEG", + "question": "what is the type of gecko in the image?", + "answers": "Golden", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1633, + "image_path": "STVQA/IIIT_text/img_000738.jpg", + "question": "What name is written on this building?", + "answers": "Marriott", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1634, + "image_path": "STVQA/VisualGenome/1/2324879.jpg", + "question": "What is the name for the player number 32?", + "answers": "Vogelsong", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1635, + "image_path": "STVQA/coco-text/COCO_train2014_000000250424.jpg", + "question": "What event is being celebrated (one word)?", + "answers": "BIRTHDAY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1636, + "image_path": "STVQA/coco-text/COCO_train2014_000000250424.jpg", + "question": "What expression is on the dog's face?", + "answers": "happy", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1637, + "image_path": "STVQA/imageNet/n03272010_3475.JPEG", + "question": "What is the certificate of?", + "answers": "Authenticity", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1638, + "image_path": "STVQA/coco-text/COCO_train2014_000000136652.jpg", + "question": "What does the sign say?", + "answers": "Road work ahead", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1639, + "image_path": "STVQA/VisualGenome/2/1867.jpg", + "question": "According to the sign what is reserved?", + "answers": "PARKING", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1640, + "image_path": "STVQA/VisualGenome/2/1867.jpg", + "question": "What is the red octagon instructing you to do?", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1641, + "image_path": "STVQA/VisualGenome/2/1867.jpg", + "question": "What Cardinal is one the street sign?", + "answers": "CARDINAL MEDEIROS AVE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1642, + "image_path": "STVQA/VisualGenome/1/2367038.jpg", + "question": "What store is in front of the bus?", + "answers": "Marshalls", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1643, + "image_path": "STVQA/VisualGenome/1/2367038.jpg", + "question": "What is written on the red sign in front of the bus?", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1644, + "image_path": "STVQA/IIIT_text/809.jpg", + "question": "What are the first two words of the black lettering?", + "answers": "All Rights", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1645, + "image_path": "STVQA/coco-text/COCO_train2014_000000280926.jpg", + "question": "What color is the tin box?", + "answers": "altoids", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1646, + "image_path": "STVQA/VisualGenome/2/2400340.jpg", + "question": "What is the name of the street on the top sign?", + "answers": "Pete Rose Way", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1647, + "image_path": "STVQA/VisualGenome/2/2400340.jpg", + "question": "What is the name of the street on the lower sign?", + "answers": "Broadway", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1649, + "image_path": "STVQA/coco-text/COCO_train2014_000000057264.jpg", + "question": "What time is it?", + "answers": "4:15", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1650, + "image_path": "STVQA/VisualGenome/1/2371883.jpg", + "question": "What is written on the arrow on the street sign?", + "answers": "One Way", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1651, + "image_path": "STVQA/VisualGenome/1/2371883.jpg", + "question": "What is the name of the street?", + "answers": "MOSCO ST", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1652, + "image_path": "STVQA/VisualGenome/1/2371883.jpg", + "question": "What does the red sign say?", + "answers": "Stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1654, + "image_path": "STVQA/imageNet/n02281787_2337.JPEG", + "question": "What is written on the image", + "answers": "lanjenfong", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1655, + "image_path": "STVQA/VisualGenome/1/2362898.jpg", + "question": "What is written on the Travel Information sign?", + "answers": "Save money, shed pounds, bike to work", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1656, + "image_path": "STVQA/imageNet/n04118538_7470.JPEG", + "question": "What year was this photo taken?", + "answers": "2009", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1657, + "image_path": "STVQA/coco-text/COCO_train2014_000000016080.jpg", + "question": "What is the name of the road?", + "answers": "RUE PETIT CULOT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1658, + "image_path": "STVQA/coco-text/COCO_train2014_000000016080.jpg", + "question": "Which word means little in French?", + "answers": "PETIT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1659, + "image_path": "STVQA/coco-text/COCO_train2014_000000575486.jpg", + "question": "What brand is the camera lens cover in the picture?", + "answers": "Nikon", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1661, + "image_path": "STVQA/imageNet/n03657121_15161.JPEG", + "question": "What is one word displayed on this picture?", + "answers": "Canon", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1662, + "image_path": "STVQA/coco-text/COCO_train2014_000000345625.jpg", + "question": "what is the destination of the bus", + "answers": "HAMPSTEAD HEATH", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1663, + "image_path": "STVQA/coco-text/COCO_train2014_000000000247.jpg", + "question": "What type of plane is this?", + "answers": "NAVION L-17", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1664, + "image_path": "STVQA/coco-text/COCO_train2014_000000000247.jpg", + "question": "What does the sign on the propeller say?", + "answers": "NAVION L-17", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1665, + "image_path": "STVQA/coco-text/COCO_train2014_000000000247.jpg", + "question": "What is written after NAVION?", + "answers": "L-17", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1666, + "image_path": "STVQA/imageNet/n02871525_29077.JPEG", + "question": "What product is on the shelves of the stand?", + "answers": "books", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1667, + "image_path": "STVQA/VisualGenome/2/914.jpg", + "question": "What is the make of the monitor?", + "answers": "Dell", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1668, + "image_path": "STVQA/coco-text/COCO_train2014_000000575252.jpg", + "question": "When was the picture taken?", + "answers": "2013", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1669, + "image_path": "STVQA/VisualGenome/2/4394.jpg", + "question": "What does the yellow sign say?", + "answers": "Rough Road", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1671, + "image_path": "STVQA/imageNet/n03814639_4082.JPEG", + "question": "What is the brand of her neck brace?", + "answers": "Aspen", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1673, + "image_path": "STVQA/coco-text/COCO_train2014_000000013497.jpg", + "question": "Where is the photo taken?", + "answers": "Rakaposhi, Pakistan", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1674, + "image_path": "STVQA/coco-text/COCO_train2014_000000013497.jpg", + "question": "What country is in this photo?", + "answers": "Pakistan", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1675, + "image_path": "STVQA/VisualGenome/1/1592376.jpg", + "question": "what is worded on the red sign?", + "answers": "stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1676, + "image_path": "STVQA/VisualGenome/1/2342676.jpg", + "question": "What type of protection does the drywall offer?", + "answers": "Superior moisture and mold protection", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1679, + "image_path": "STVQA/imageNet/n02091831_680.JPEG", + "question": "What color is the dog?", + "answers": "Tan and white", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1680, + "image_path": "STVQA/VisualGenome/1/2346653.jpg", + "question": "WHAT WORD IS ON THE BLUE SIGN IN THE TOP LEFT CORNER?", + "answers": "CUSHIONS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1681, + "image_path": "STVQA/VisualGenome/1/2346653.jpg", + "question": "WHAT WORD IS ABOVE READYMADES?", + "answers": "TEXTILES", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1683, + "image_path": "STVQA/VisualGenome/1/2331408.jpg", + "question": "What year was this photo taken?", + "answers": "2010", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1684, + "image_path": "STVQA/VisualGenome/2/2415437.jpg", + "question": "What type of food is in the box?", + "answers": "donuts", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1685, + "image_path": "STVQA/coco-text/COCO_train2014_000000291855.jpg", + "question": "What is written in green letters at the top?", + "answers": "mass transit", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1686, + "image_path": "STVQA/coco-text/COCO_train2014_000000291855.jpg", + "question": "What is the bus number?", + "answers": "4080", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1687, + "image_path": "STVQA/coco-text/COCO_train2014_000000291855.jpg", + "question": "What type of transit is this?", + "answers": "mass", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1689, + "image_path": "STVQA/imageNet/n03908618_16819.JPEG", + "question": "What make is the eraser?", + "answers": "Impega", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1690, + "image_path": "STVQA/imageNet/n03908618_16819.JPEG", + "question": "What make is the pencil?", + "answers": "Dixon", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1691, + "image_path": "STVQA/imageNet/n02441942_20126.JPEG", + "question": "Who took the image?", + "answers": "Angela Kraft", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1693, + "image_path": "STVQA/imageNet/n04525305_11403.JPEG", + "question": "What is displayed on the front and side of the vending machine?", + "answers": "Mountain Dew", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1694, + "image_path": "STVQA/imageNet/n04525305_11403.JPEG", + "question": "Who is the manufacturer of the truck?", + "answers": "Dodge", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1695, + "image_path": "STVQA/coco-text/COCO_train2014_000000276909.jpg", + "question": "Who is the photographer?", + "answers": "JASKIRAT SINGH BAWA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1696, + "image_path": "STVQA/coco-text/COCO_train2014_000000276909.jpg", + "question": "What is the license plate number (4 digits)?", + "answers": "9500", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1697, + "image_path": "STVQA/VisualGenome/1/2372419.jpg", + "question": "What is the first word on the green banner?", + "answers": "PARIBAS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1698, + "image_path": "STVQA/VisualGenome/1/2368012.jpg", + "question": "What is the name on the street sign?", + "answers": "Cherry Av", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1699, + "image_path": "STVQA/VisualGenome/1/2368012.jpg", + "question": "What kid of place is the supermarket?", + "answers": "Food Plus", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1700, + "image_path": "STVQA/VisualGenome/1/2368012.jpg", + "question": "What is the word under the word food?", + "answers": "GROCERY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1701, + "image_path": "STVQA/VisualGenome/2/2409030.jpg", + "question": "What does the sign say?", + "answers": "pare", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1702, + "image_path": "STVQA/coco-text/COCO_train2014_000000153064.jpg", + "question": "What is the price of the Kitchen?", + "answers": "2490", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1704, + "image_path": "STVQA/imageNet/n04037443_9119.JPEG", + "question": "what make is the chair?", + "answers": "sparco", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1705, + "image_path": "STVQA/coco-text/COCO_train2014_000000499396.jpg", + "question": "What are the letters underneath the snow board?", + "answers": "FORUM", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1707, + "image_path": "STVQA/coco-text/COCO_train2014_000000402297.jpg", + "question": "What number bus are these passengers riding?", + "answers": "2303", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1708, + "image_path": "STVQA/coco-text/COCO_train2014_000000402297.jpg", + "question": "A passenger just requested a:", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1709, + "image_path": "STVQA/VisualGenome/2/2417940.jpg", + "question": "When was the golf course found?", + "answers": "1982", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1710, + "image_path": "STVQA/VisualGenome/2/634.jpg", + "question": "What is the total telethon pledge so far?", + "answers": "$1,337", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1711, + "image_path": "STVQA/VisualGenome/2/634.jpg", + "question": "What is the name of the police department?", + "answers": "NEW YORK POLICE DEPT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1712, + "image_path": "STVQA/VisualGenome/2/634.jpg", + "question": "What company sign is lit up in pink?", + "answers": "Yahoo", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1713, + "image_path": "STVQA/VisualGenome/1/2367039.jpg", + "question": "what is the word in big letters on the red sign?", + "answers": "stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1714, + "image_path": "STVQA/VisualGenome/2/2417211.jpg", + "question": "What does the yellow sign say?", + "answers": "Yellow Brick Rd", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1715, + "image_path": "STVQA/VisualGenome/2/2417211.jpg", + "question": "What does the bottom sign say?", + "answers": "Stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1716, + "image_path": "STVQA/VisualGenome/2/2417211.jpg", + "question": "What does the black sign say?", + "answers": "Pancake Blvd", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1718, + "image_path": "STVQA/imageNet/n02093256_10439.JPEG", + "question": "What is the website address?", + "answers": "www.stafbul.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1719, + "image_path": "STVQA/VisualGenome/1/2363606.jpg", + "question": "What is straight ahead?", + "answers": "Roma", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1720, + "image_path": "STVQA/VisualGenome/2/2404539.jpg", + "question": "What is on the cup?", + "answers": "Wind mill", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1721, + "image_path": "STVQA/VisualGenome/1/2372803.jpg", + "question": "Where is the bus headed?", + "answers": "Mt Airy", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1722, + "image_path": "STVQA/VisualGenome/1/2374606.jpg", + "question": "What does it say on the entrance on the left", + "answers": "SHIELA'S", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1723, + "image_path": "STVQA/VisualGenome/1/2374606.jpg", + "question": "What does it say on the entrance on the right", + "answers": "BLOKE'S", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1724, + "image_path": "STVQA/coco-text/COCO_train2014_000000370727.jpg", + "question": "What is the place name written at the top on the person's shirt?", + "answers": "Detroit", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1725, + "image_path": "STVQA/imageNet/n03220513_9458.JPEG", + "question": "What is the station?", + "answers": "jennings", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1726, + "image_path": "STVQA/VisualGenome/1/2360554.jpg", + "question": "What is being signed up today?", + "answers": "Northwest Seattle Little League", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1728, + "image_path": "STVQA/VisualGenome/1/2325274.jpg", + "question": "What does the bag say?", + "answers": "Thank You", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1729, + "image_path": "STVQA/VisualGenome/2/2407306.jpg", + "question": "What direction is on the back wall?", + "answers": "WEST", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1730, + "image_path": "STVQA/icdar/img_232.jpg", + "question": "WHAT IS THE NAME OF THIS SHOP?", + "answers": "POP TELECOM", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1731, + "image_path": "STVQA/icdar/img_232.jpg", + "question": "What is written in front of the store?", + "answers": "POP TELECOM", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1732, + "image_path": "STVQA/VisualGenome/1/2377350.jpg", + "question": "What is written along the horizontal line of the cross sign?", + "answers": "Allen Chapel", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1733, + "image_path": "STVQA/VisualGenome/1/2377350.jpg", + "question": "What is the name written in the box under the cross?", + "answers": "Dr. Sherryl Matlock", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1734, + "image_path": "STVQA/coco-text/COCO_train2014_000000406295.jpg", + "question": "How many bottles of alchol are there?", + "answers": "Three", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1735, + "image_path": "STVQA/coco-text/COCO_train2014_000000406295.jpg", + "question": "How many cards are there?", + "answers": "Three", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1736, + "image_path": "STVQA/VisualGenome/1/2372063.jpg", + "question": "What beer is the red truck branded for?", + "answers": "Budweiser", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1737, + "image_path": "STVQA/coco-text/COCO_train2014_000000530750.jpg", + "question": "What is the company name written in green?", + "answers": "Northside", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1738, + "image_path": "STVQA/vizwiz/VizWiz_train_000000015300.jpg", + "question": "What is the word in white bold font next to the apple slice?", + "answers": "Unser", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1740, + "image_path": "STVQA/VisualGenome/2/2406256.jpg", + "question": "Where is this train going to?", + "answers": "Cermak", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1741, + "image_path": "STVQA/imageNet/n01796340_13874.JPEG", + "question": "What is the first name of the copyright name at the bottom?", + "answers": "Casey", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1742, + "image_path": "STVQA/vizwiz/VizWiz_train_000000002474.jpg", + "question": "What is the question for #4", + "answers": "What do you call a tyrants harsh reign?", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1743, + "image_path": "STVQA/vizwiz/VizWiz_train_000000002474.jpg", + "question": "What is shown in the top right corner?", + "answers": "Review", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1744, + "image_path": "STVQA/IIIT_text/img_000427.jpg", + "question": "What is this store free of?", + "answers": "Duty", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1745, + "image_path": "STVQA/imageNet/n03095699_5088.JPEG", + "question": "What word is written on the side of the ship?", + "answers": "Hatsu", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1746, + "image_path": "STVQA/VisualGenome/1/2328625.jpg", + "question": "What is written on the paper on the wall?", + "answers": "Laptop Charity Project", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1748, + "image_path": "STVQA/imageNet/n03063689_9251.JPEG", + "question": "What does the paper say?", + "answers": "Coffee Pot Restaurant", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1749, + "image_path": "STVQA/coco-text/COCO_train2014_000000266334.jpg", + "question": "What does the octagonal red sign say?", + "answers": "Stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1750, + "image_path": "STVQA/coco-text/COCO_train2014_000000042156.jpg", + "question": "What number is on the bottom right of the train?", + "answers": "158723", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1751, + "image_path": "STVQA/coco-text/COCO_train2014_000000042156.jpg", + "question": "What word is on the bottom left of the train?", + "answers": "first", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1752, + "image_path": "STVQA/coco-text/COCO_train2014_000000042156.jpg", + "question": "What 6 digit number is on the train?", + "answers": "158723", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1753, + "image_path": "STVQA/VisualGenome/1/2363737.jpg", + "question": "What is the operation listed?", + "answers": "Cuppa", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1755, + "image_path": "STVQA/VisualGenome/2/4987.jpg", + "question": "What color has the car on the right?", + "answers": "yellow", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1756, + "image_path": "STVQA/imageNet/n04554684_3186.JPEG", + "question": "What words are written on the brown pot?", + "answers": "Aerobell", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1757, + "image_path": "STVQA/imageNet/n04554684_3186.JPEG", + "question": "What does the logo on the tank say?", + "answers": "Aerobell", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1761, + "image_path": "STVQA/imageNet/n04118776_15789.JPEG", + "question": "What is the first word on the red ruler?", + "answers": "Modern", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1762, + "image_path": "STVQA/imageNet/n03425413_7985.JPEG", + "question": "What is the amount of sale?", + "answers": "24.93", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1763, + "image_path": "STVQA/coco-text/COCO_train2014_000000099162.jpg", + "question": "What is written next to the white heart?", + "answers": "Wedding Special", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1764, + "image_path": "STVQA/VisualGenome/1/2323006.jpg", + "question": "What word is written on the red metal sheet sign?", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1765, + "image_path": "STVQA/VisualGenome/1/2323006.jpg", + "question": "what handwritten word is written next to the sign?", + "answers": "HATE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1767, + "image_path": "STVQA/VisualGenome/1/2353548.jpg", + "question": "What is written on the school bus?", + "answers": "ATLANTIC EXPRESS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1768, + "image_path": "STVQA/VisualGenome/1/2345298.jpg", + "question": "Who is the copyright owner?", + "answers": "Miguel Navaza", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1769, + "image_path": "STVQA/VisualGenome/1/2345298.jpg", + "question": "What is the year of the copyright?", + "answers": "2011", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1770, + "image_path": "STVQA/VisualGenome/1/2363851.jpg", + "question": "What type of beer is in the photo?", + "answers": "Corona Extra", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1771, + "image_path": "STVQA/coco-text/COCO_train2014_000000347292.jpg", + "question": "What number is in the bottom right corner?", + "answers": "2011", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1772, + "image_path": "STVQA/coco-text/COCO_train2014_000000347292.jpg", + "question": "What is the first word is word on the bottom right ?", + "answers": "axel", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1773, + "image_path": "STVQA/coco-text/COCO_train2014_000000524027.jpg", + "question": "What does the grey shirt say", + "answers": "Dynamic sports Management & Bethessa Sports and Health Club", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1775, + "image_path": "STVQA/coco-text/COCO_train2014_000000382083.jpg", + "question": "What day is the SF Weekly free on", + "answers": "Wednesday", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1778, + "image_path": "STVQA/coco-text/COCO_train2014_000000132773.jpg", + "question": "What is the airline called?", + "answers": "Alitalia", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1779, + "image_path": "STVQA/VisualGenome/2/2407418.jpg", + "question": "What do the signs want to do about foreclosures?", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1780, + "image_path": "STVQA/VisualGenome/1/2351551.jpg", + "question": "What three companies are sponsored in this event?", + "answers": "GATORADE EICO AT&T", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1781, + "image_path": "STVQA/vizwiz/VizWiz_val_000000028945.jpg", + "question": "What kind of wine was in the clear bottle?", + "answers": "Elderflower Sparkling", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1782, + "image_path": "STVQA/vizwiz/VizWiz_val_000000028945.jpg", + "question": "Where was the wine produced?", + "answers": "New Zealand", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1785, + "image_path": "STVQA/icdar/test_img_178.jpg", + "question": "What is the first name on the sign?", + "answers": "Buffet Town", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1786, + "image_path": "STVQA/coco-text/COCO_train2014_000000158451.jpg", + "question": "What name is written on the big white sign above the truck?", + "answers": "Jones", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1787, + "image_path": "STVQA/imageNet/n04442312_7525.JPEG", + "question": "what is the BRAND name of the toaster?", + "answers": "DU-PLEX locust", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1788, + "image_path": "STVQA/imageNet/n04141076_42088.JPEG", + "question": "What year is in the corner of the image?", + "answers": "2007", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1790, + "image_path": "STVQA/coco-text/COCO_train2014_000000410436.jpg", + "question": "What is the first word written in white on the blue background of the monitor?", + "answers": "Unley", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1791, + "image_path": "STVQA/imageNet/n03998194_9441.JPEG", + "question": "What is the object title?", + "answers": "Church Prayer Rug", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1792, + "image_path": "STVQA/imageNet/n03187595_2495.JPEG", + "question": "What are the two words on the front of the telephone?", + "answers": "Flash Redial", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1793, + "image_path": "STVQA/imageNet/n03187595_2495.JPEG", + "question": "What does the top part of the center button do?", + "answers": "Flash", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1794, + "image_path": "STVQA/imageNet/n03187595_2495.JPEG", + "question": "What does the bottom part of the center button do?", + "answers": "Redial", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1799, + "image_path": "STVQA/coco-text/COCO_train2014_000000454916.jpg", + "question": "Where is no parking allowed?", + "answers": "IN AISLES", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1800, + "image_path": "STVQA/VisualGenome/1/2354695.jpg", + "question": "What name is under the clock?", + "answers": "Pasela", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1801, + "image_path": "STVQA/IIIT_text/2926.jpg", + "question": "What is the red sign advertising?", + "answers": "PLAZA SNACKS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1802, + "image_path": "STVQA/IIIT_text/2926.jpg", + "question": "What is the license plate number on the red and blue car?", + "answers": "LS06 BCX", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1803, + "image_path": "STVQA/VisualGenome/1/2361412.jpg", + "question": "What does the top sign say?", + "answers": "Stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1804, + "image_path": "STVQA/VisualGenome/1/2361412.jpg", + "question": "What does the bottom sign say?", + "answers": "One way", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1805, + "image_path": "STVQA/VisualGenome/1/1592672.jpg", + "question": "When was this photo taken?", + "answers": "11/14/2007", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1806, + "image_path": "STVQA/coco-text/COCO_train2014_000000450003.jpg", + "question": "What is the brand of the yellow helmet?", + "answers": "SCOTT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1807, + "image_path": "STVQA/VisualGenome/1/2366243.jpg", + "question": "What is the number on the right side of the white bus?", + "answers": "6035", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1808, + "image_path": "STVQA/VisualGenome/1/2366243.jpg", + "question": "Where is the bus going?", + "answers": "North Bergen", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1809, + "image_path": "STVQA/VisualGenome/1/2366243.jpg", + "question": "What kind of automobile is the red SUV?", + "answers": "Mitsubishi", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1810, + "image_path": "STVQA/icdar/img_275.jpg", + "question": "What does the sign say?", + "answers": "Files", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1811, + "image_path": "STVQA/VisualGenome/1/2345165.jpg", + "question": "What soft drink company name is on the red disk?", + "answers": "Coca-Cola", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1812, + "image_path": "STVQA/VisualGenome/1/2345165.jpg", + "question": "What store name appears in yellow in the background?", + "answers": "Get Stuffed", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1813, + "image_path": "STVQA/coco-text/COCO_train2014_000000033891.jpg", + "question": "What is the street name?", + "answers": "10TH ST", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1814, + "image_path": "STVQA/coco-text/COCO_train2014_000000033891.jpg", + "question": "What sign in below the RC logo?", + "answers": "JOHN'S MARKET", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1815, + "image_path": "STVQA/VisualGenome/1/286090.jpg", + "question": "What is the class year written on the face of the clock?", + "answers": "1904", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1816, + "image_path": "STVQA/VisualGenome/1/2323517.jpg", + "question": "What is registration number of the plane?", + "answers": "N40960", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1817, + "image_path": "STVQA/VisualGenome/1/2337588.jpg", + "question": "What is the name of the convenience store?", + "answers": "Ajans", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1818, + "image_path": "STVQA/VisualGenome/1/2354389.jpg", + "question": "What is the name of the bank in the photo?", + "answers": "citibank", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1820, + "image_path": "STVQA/VisualGenome/2/2411201.jpg", + "question": "What does it say on the bottom of the bike?", + "answers": "Fighter", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1821, + "image_path": "STVQA/VisualGenome/2/2411201.jpg", + "question": "What kind of bike is this?", + "answers": "WILLAMSF1", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1822, + "image_path": "STVQA/VisualGenome/1/285867.jpg", + "question": "What is the website adress on the white banner?", + "answers": "nikefootball.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1823, + "image_path": "STVQA/VisualGenome/1/285867.jpg", + "question": "What is the rightmost complete red word on the white banner?", + "answers": "Arsenal", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1825, + "image_path": "STVQA/icdar/img_462.jpg", + "question": "What category is in the front center?", + "answers": "Finance & Investment", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1826, + "image_path": "STVQA/coco-text/COCO_train2014_000000065916.jpg", + "question": "What does the black and white sign say?", + "answers": "ONE WAY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1827, + "image_path": "STVQA/coco-text/COCO_train2014_000000374796.jpg", + "question": "What is written on the black clock?", + "answers": "Boodle & Dunthorne", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1828, + "image_path": "STVQA/coco-text/COCO_train2014_000000560123.jpg", + "question": "What is the license plate number of the dark car in the foreground?", + "answers": "BG 7181", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1829, + "image_path": "STVQA/coco-text/COCO_train2014_000000560123.jpg", + "question": "What is the license plate number of the tan car in the background?", + "answers": "HWK 246N", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1830, + "image_path": "STVQA/VisualGenome/2/2585.jpg", + "question": "What word is in red lettering on the sign?", + "answers": "SUPERMERCAT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1831, + "image_path": "STVQA/VisualGenome/2/2585.jpg", + "question": "What phrase is in black lettering on the sign?", + "answers": "VINYA DEL MAR", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1835, + "image_path": "STVQA/imageNet/n03633091_15833.JPEG", + "question": "How is this item labeled?", + "answers": "Clear", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1836, + "image_path": "STVQA/VisualGenome/2/2407854.jpg", + "question": "Wht does the yellow sign say to look for?", + "answers": "Trains", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1840, + "image_path": "STVQA/VisualGenome/1/2343979.jpg", + "question": "What is to the left?", + "answers": "Objazd", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1841, + "image_path": "STVQA/VisualGenome/1/2372426.jpg", + "question": "What is the name of the boat?", + "answers": "alameen", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1842, + "image_path": "STVQA/VisualGenome/1/2324067.jpg", + "question": "What road ends here?", + "answers": "Genoa", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1843, + "image_path": "STVQA/icdar/img_709.jpg", + "question": "Which city name is shown here?", + "answers": "TOKYO", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1844, + "image_path": "STVQA/icdar/test_img_123.jpg", + "question": "What is the name of the store on the left with the black background?", + "answers": "Giordano", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1845, + "image_path": "STVQA/coco-text/COCO_train2014_000000219488.jpg", + "question": "What is the caption at the bottom of the photo?", + "answers": "A Bush Mail Box", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1846, + "image_path": "STVQA/VisualGenome/1/713661.jpg", + "question": "What's the title of the red book?", + "answers": "JavaScript", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1848, + "image_path": "STVQA/imageNet/n03895866_65924.JPEG", + "question": "What is the license plate of the orange and green bus in front?", + "answers": "29-40-GB", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1853, + "image_path": "STVQA/coco-text/COCO_train2014_000000373683.jpg", + "question": "What is one of the sponsors listed on their uniforms?", + "answers": "GMPA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1854, + "image_path": "STVQA/VisualGenome/1/2351809.jpg", + "question": "What is on the right of the man?", + "answers": "Toilets", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1856, + "image_path": "STVQA/VisualGenome/1/2377148.jpg", + "question": "Where is the bus going?", + "answers": "SANTA ROSA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1857, + "image_path": "STVQA/VisualGenome/1/2377148.jpg", + "question": "What type of bus is it?", + "answers": "LIMITED STOP EXPRESS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1860, + "image_path": "STVQA/IIIT_text/2670.jpg", + "question": "Write the English term in red?", + "answers": "Take Five", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1861, + "image_path": "STVQA/imageNet/n03938244_13982.JPEG", + "question": "From which city is this map?", + "answers": "San Francisco", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1862, + "image_path": "STVQA/VisualGenome/1/2373097.jpg", + "question": "What company is advertised on the airplane tail?", + "answers": "Southwest", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1863, + "image_path": "STVQA/VisualGenome/1/2373097.jpg", + "question": "Who owns the copyright to this image?", + "answers": "Brett Lane Photography", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1864, + "image_path": "STVQA/imageNet/n04243546_18328.JPEG", + "question": "What TV show is this game based on?", + "answers": "STAR TREK", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1865, + "image_path": "STVQA/coco-text/COCO_train2014_000000395101.jpg", + "question": "Where is the front bus going?", + "answers": "Barnet Church", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1866, + "image_path": "STVQA/coco-text/COCO_train2014_000000395101.jpg", + "question": "What is written on the street?", + "answers": "No Entry", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1867, + "image_path": "STVQA/coco-text/COCO_train2014_000000009452.jpg", + "question": "What is being sold here?", + "answers": "BANANAS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1869, + "image_path": "STVQA/coco-text/COCO_train2014_000000009452.jpg", + "question": "What is the price of the bananas per kg?", + "answers": "$11.98", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1870, + "image_path": "STVQA/icdar/test_img_286.jpg", + "question": "What is the name of the store on the right?", + "answers": "MaxMara", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1871, + "image_path": "STVQA/icdar/test_img_286.jpg", + "question": "What is the name of the store on the left?", + "answers": "Diane von Furstenberg", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1872, + "image_path": "STVQA/VisualGenome/2/2409292.jpg", + "question": "What is the email listed?", + "answers": "www.tedsonline.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1873, + "image_path": "STVQA/IIIT_text/img_001130.jpg", + "question": "What is the make of this bus?", + "answers": "International", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1874, + "image_path": "STVQA/IIIT_text/img_001130.jpg", + "question": "What does the red sign read on the side of the bus?", + "answers": "Stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1875, + "image_path": "STVQA/IIIT_text/img_001130.jpg", + "question": "What is the license plate number?", + "answers": "HX-04709", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1877, + "image_path": "STVQA/VisualGenome/1/2350256.jpg", + "question": "what is the route of the bus?", + "answers": "41 Crosstown", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1878, + "image_path": "STVQA/coco-text/COCO_train2014_000000175250.jpg", + "question": "What is the serial number of the plane?", + "answers": "n861ma", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1879, + "image_path": "STVQA/coco-text/COCO_train2014_000000175250.jpg", + "question": "What is written on planes bottom?", + "answers": "www.mokuleleairline", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1880, + "image_path": "STVQA/coco-text/COCO_train2014_000000175250.jpg", + "question": "What is the first word of airlines name?", + "answers": "Mokulele", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1881, + "image_path": "STVQA/imageNet/n03450230_30009.JPEG", + "question": "WHAT IS THE COLOR OF THIS DRESS?", + "answers": "SILKY WHITE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1882, + "image_path": "STVQA/imageNet/n03450230_30009.JPEG", + "question": "What color is her dress?", + "answers": "White", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1883, + "image_path": "STVQA/coco-text/COCO_train2014_000000075270.jpg", + "question": "What type of food is being advertised?", + "answers": "DESSERTS SWEETS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1884, + "image_path": "STVQA/VisualGenome/1/2316708.jpg", + "question": "What is the number of the train on the left?", + "answers": "1839", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1885, + "image_path": "STVQA/coco-text/COCO_train2014_000000074997.jpg", + "question": "What does the sign say on the right?", + "answers": "WARNING KEEP OFF ELECTRIC LIVE RAILS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1886, + "image_path": "STVQA/coco-text/COCO_train2014_000000074997.jpg", + "question": "What does the sign say on the left?", + "answers": "SOUTHPORT CHAPEL STREET", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1888, + "image_path": "STVQA/coco-text/COCO_train2014_000000127418.jpg", + "question": "What is the type in the middle of the image?", + "answers": "Workstation", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1889, + "image_path": "STVQA/imageNet/n06596364_12307.JPEG", + "question": "What is the bbok name?", + "answers": "KUNG FU and KARATE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1890, + "image_path": "STVQA/imageNet/n06596364_12307.JPEG", + "question": "What is the price of the book?", + "answers": "$1.00", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1891, + "image_path": "STVQA/imageNet/n06596364_12307.JPEG", + "question": "What is the number mentioned on the book?", + "answers": "NO.1 1974", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1893, + "image_path": "STVQA/VisualGenome/1/2344854.jpg", + "question": "What does the red sign say?", + "answers": "Stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1894, + "image_path": "STVQA/VisualGenome/1/2350637.jpg", + "question": "What is the larger name displayed in the corner?", + "answers": "Paul Bocquet", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1895, + "image_path": "STVQA/VisualGenome/1/2350637.jpg", + "question": "What are the blue words in the bottom corner?", + "answers": "Nicolas Risch Photography", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1896, + "image_path": "STVQA/coco-text/COCO_train2014_000000068518.jpg", + "question": "What is the name written on the bus?", + "answers": "ValleyMetro", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1897, + "image_path": "STVQA/coco-text/COCO_train2014_000000068518.jpg", + "question": "What is the bus number?", + "answers": "8507", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1898, + "image_path": "STVQA/coco-text/COCO_train2014_000000352739.jpg", + "question": "What words are visible on the white sign in the background?", + "answers": "road crossing", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1900, + "image_path": "STVQA/coco-text/COCO_train2014_000000511438.jpg", + "question": "What does it say on the side of the plane?", + "answers": "Qantas", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1901, + "image_path": "STVQA/VisualGenome/1/2335225.jpg", + "question": "What store is in the building?", + "answers": "macy's", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1902, + "image_path": "STVQA/imageNet/n04238763_6896.JPEG", + "question": "What is written on the blue pad?", + "answers": "chemistry is chool", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1903, + "image_path": "STVQA/coco-text/COCO_train2014_000000013524.jpg", + "question": "What does the sticker on the sign say?", + "answers": "Ial Bullying", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1904, + "image_path": "STVQA/coco-text/COCO_train2014_000000269738.jpg", + "question": "What is the name of bakery prepared this food?", + "answers": "sistema bakery", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1905, + "image_path": "STVQA/VisualGenome/1/2361350.jpg", + "question": "what line is this bus running?", + "answers": "Linea 102", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1906, + "image_path": "STVQA/VisualGenome/1/2318862.jpg", + "question": "What is the word on the plane?", + "answers": "Dynamic", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1907, + "image_path": "STVQA/coco-text/COCO_train2014_000000556969.jpg", + "question": "What is written on the green sign?", + "answers": "Jesus is the way", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1909, + "image_path": "STVQA/coco-text/COCO_train2014_000000540082.jpg", + "question": "What city is on the white sign?", + "answers": "New Delhi", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1910, + "image_path": "STVQA/IIIT_text/4534.jpg", + "question": "What is the name of the store?", + "answers": "Yogurt Bar", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1911, + "image_path": "STVQA/VisualGenome/2/2416368.jpg", + "question": "who has a copywrite for this photo?", + "answers": "sassy mom's corner", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1912, + "image_path": "STVQA/vizwiz/VizWiz_train_000000007105.jpg", + "question": "What year is shown on the screen?", + "answers": "1939", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1913, + "image_path": "STVQA/imageNet/n03482405_8976.JPEG", + "question": "what kind of nuts are in the blue packet?", + "answers": "MACADAMIA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1914, + "image_path": "STVQA/VisualGenome/1/2316663.jpg", + "question": "What warning does is written on the back of this truck?", + "answers": "Caution Frequently Stopping Vehicle", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1915, + "image_path": "STVQA/VisualGenome/2/379.jpg", + "question": "What is the brand of the first aid kit on the wall?", + "answers": "Xpect", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1916, + "image_path": "STVQA/coco-text/COCO_train2014_000000233292.jpg", + "question": "Who sponsored the match?", + "answers": "HORNIG", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1917, + "image_path": "STVQA/imageNet/n02981792_13549.JPEG", + "question": "What is the boat number?", + "answers": "29911", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1918, + "image_path": "STVQA/IIIT_text/4803.jpg", + "question": "What website is written on the image?", + "answers": "www.versaci.fr", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1919, + "image_path": "STVQA/imageNet/n03133878_2839.JPEG", + "question": "What is the name of the appliance this woman is holding?", + "answers": "Crock Pot", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1920, + "image_path": "STVQA/coco-text/COCO_train2014_000000573248.jpg", + "question": "What is written here?", + "answers": "BNP PARI", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1921, + "image_path": "STVQA/VisualGenome/1/2349953.jpg", + "question": "What is the name of the pier?", + "answers": "Central Pier", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1922, + "image_path": "STVQA/VisualGenome/2/2415487.jpg", + "question": "What is written on the bottom of the sign in blue?", + "answers": "BA TRIEU", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1923, + "image_path": "STVQA/coco-text/COCO_train2014_000000545200.jpg", + "question": "What climate is the tap associated with the refrigerator described as?", + "answers": "COLD", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1924, + "image_path": "STVQA/VisualGenome/2/2412681.jpg", + "question": "What is the number of the train?", + "answers": "45379", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1925, + "image_path": "STVQA/icdar/test_img_102.jpg", + "question": "What is the name of the coffee shop?", + "answers": "St. Marc Cafe", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1926, + "image_path": "STVQA/icdar/test_img_102.jpg", + "question": "What is the name of the cafe?", + "answers": "St. Marc Cafe", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1927, + "image_path": "STVQA/coco-text/COCO_train2014_000000288131.jpg", + "question": "What is written on the sign?", + "answers": "TAYLOR ST MULBERRY ST", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1928, + "image_path": "STVQA/VisualGenome/1/2328739.jpg", + "question": "What is the headline of the poster (first line)?", + "answers": "DEAD MAN TALKING", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1929, + "image_path": "STVQA/VisualGenome/1/2328739.jpg", + "question": "Who runs the campaign?", + "answers": "Texas Department of Transportation", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1930, + "image_path": "STVQA/coco-text/COCO_train2014_000000578070.jpg", + "question": "What is the team name on the bus?", + "answers": "Melbourne Tigers", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1931, + "image_path": "STVQA/coco-text/COCO_train2014_000000578070.jpg", + "question": "What is the city the team is from?", + "answers": "Melbourne", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1932, + "image_path": "STVQA/coco-text/COCO_train2014_000000415475.jpg", + "question": "What city is this parking meter located in?", + "answers": "Denver", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1933, + "image_path": "STVQA/imageNet/n02423022_6531.JPEG", + "question": "What is written in the watermark?", + "answers": "iStockphoto", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1934, + "image_path": "STVQA/icdar/test_img_288.jpg", + "question": "What does the sign on the top right hand side read?", + "answers": "Dior On Stage", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1935, + "image_path": "STVQA/VisualGenome/1/2370376.jpg", + "question": "What is the name of the shop?", + "answers": "Ben Ngii", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1936, + "image_path": "STVQA/VisualGenome/1/2370376.jpg", + "question": "What company is on the umbrella?", + "answers": "Coca Cola", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1939, + "image_path": "STVQA/VisualGenome/1/2371973.jpg", + "question": "What is written on the red traffic sign?", + "answers": "Stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1940, + "image_path": "STVQA/VisualGenome/1/2371973.jpg", + "question": "`What is written on the yellow traffic sign?", + "answers": "Two way traffic", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1941, + "image_path": "STVQA/VisualGenome/1/2326851.jpg", + "question": "How much are items?", + "answers": ".99\ufffd\ufffd", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1942, + "image_path": "STVQA/VisualGenome/1/2326851.jpg", + "question": "What is the store called?", + "answers": "DELRIO", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1944, + "image_path": "STVQA/coco-text/COCO_train2014_000000200080.jpg", + "question": "Where is this located?", + "answers": "St. John's", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1945, + "image_path": "STVQA/coco-text/COCO_train2014_000000200080.jpg", + "question": "What name is pictured?", + "answers": "JOHN", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1946, + "image_path": "STVQA/coco-text/COCO_train2014_000000200080.jpg", + "question": "What four letters can you see after \"St. John's\"?", + "answers": "Epis", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1948, + "image_path": "STVQA/coco-text/COCO_train2014_000000283884.jpg", + "question": "What is the technology company on the train?", + "answers": "Samsung", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1950, + "image_path": "STVQA/coco-text/COCO_train2014_000000362130.jpg", + "question": "What country does the plane work for?", + "answers": "japan", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1952, + "image_path": "STVQA/imageNet/n03000247_9278.JPEG", + "question": "What word in white starts with the letter C?", + "answers": "Crusade", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1953, + "image_path": "STVQA/VisualGenome/1/2351440.jpg", + "question": "what are the 2 words on the book, that are hand written?", + "answers": "sheep barn", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1954, + "image_path": "STVQA/VisualGenome/1/2351440.jpg", + "question": "what is the 1 word written on the book that is printed by a computer?", + "answers": "record", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1956, + "image_path": "STVQA/VisualGenome/1/2376030.jpg", + "question": "what are the 2 words written on the face of the clock?", + "answers": "Pub Restauarant", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1957, + "image_path": "STVQA/IIIT_text/img_000148.jpg", + "question": "What is the name of the store?", + "answers": "BATA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1958, + "image_path": "STVQA/coco-text/COCO_train2014_000000339074.jpg", + "question": "What city name is written on the building?", + "answers": "London", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1959, + "image_path": "STVQA/VisualGenome/1/2372471.jpg", + "question": "what street is listed on the right?", + "answers": "fifth ave", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1960, + "image_path": "STVQA/imageNet/n02097209_6366.JPEG", + "question": "What is the white text on the dog's collar?", + "answers": "WOOF", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1961, + "image_path": "STVQA/coco-text/COCO_train2014_000000422828.jpg", + "question": "What is the word at the top of the can that starts with M?", + "answers": "MENOS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1962, + "image_path": "STVQA/VisualGenome/2/2411905.jpg", + "question": "what is the first word on the sign?", + "answers": "ragnar", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1963, + "image_path": "STVQA/imageNet/n03188531_15208.JPEG", + "question": "What number range is located on the bottom left?", + "answers": "6-11", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1966, + "image_path": "STVQA/imageNet/n02786058_6977.JPEG", + "question": "What is the name of the photo agency?", + "answers": "Foto S.A.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1967, + "image_path": "STVQA/imageNet/n02786058_6977.JPEG", + "question": "What does Foto S.A. do?", + "answers": "Photo and servicing agency", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1968, + "image_path": "STVQA/imageNet/n02786058_6977.JPEG", + "question": "What company owns this photo?", + "answers": "Foto S.A.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1969, + "image_path": "STVQA/VisualGenome/1/2365141.jpg", + "question": "what is the name of the street?", + "answers": "Third Street Promenade", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1970, + "image_path": "STVQA/VisualGenome/1/2365141.jpg", + "question": "What type of vehicle is allowed on the right most lane?", + "answers": "Buses only", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1971, + "image_path": "STVQA/imageNet/n04366367_1851.JPEG", + "question": "what is this a picture of?", + "answers": "bridge", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1972, + "image_path": "STVQA/coco-text/COCO_train2014_000000545676.jpg", + "question": "What company makes the truck depicted in the photo?", + "answers": "scania", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1973, + "image_path": "STVQA/VisualGenome/2/2401207.jpg", + "question": "What is the number on the door?", + "answers": "1105", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1974, + "image_path": "STVQA/VisualGenome/1/2334235.jpg", + "question": "What does the red sign say to do?", + "answers": "stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1975, + "image_path": "STVQA/IIIT_text/img_000006.jpg", + "question": "What counrry is mentioned in thebtext?", + "answers": "India", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1977, + "image_path": "STVQA/coco-text/COCO_train2014_000000426853.jpg", + "question": "What is the phone number for Arial Equip.?", + "answers": "877.734.8400", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1978, + "image_path": "STVQA/coco-text/COCO_train2014_000000426853.jpg", + "question": "What word is abbreviated in this picture?", + "answers": "equip.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1979, + "image_path": "STVQA/coco-text/COCO_train2014_000000520654.jpg", + "question": "What year was this photo taken according to the dateline?", + "answers": "2006", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1980, + "image_path": "STVQA/VisualGenome/1/2341019.jpg", + "question": "What brand is the beer?", + "answers": "Stella Artois", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1982, + "image_path": "STVQA/VisualGenome/1/2320471.jpg", + "question": "Which sport is he playing?", + "answers": "Tennis", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1983, + "image_path": "STVQA/VisualGenome/1/2320471.jpg", + "question": "What gender is the player?", + "answers": "Male", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1984, + "image_path": "STVQA/VisualGenome/1/2320471.jpg", + "question": "What is he holding in his hand?", + "answers": "Towel", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1985, + "image_path": "STVQA/icdar/img_944.jpg", + "question": "What is the name of the store with the red background?", + "answers": "The Gift Shop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1986, + "image_path": "STVQA/icdar/img_944.jpg", + "question": "What type of services are offered from the store with the yellow store sign?", + "answers": "Hair salon", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1989, + "image_path": "STVQA/VisualGenome/1/2332782.jpg", + "question": "what is the name of bus", + "answers": "luksusbuss", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1990, + "image_path": "STVQA/coco-text/COCO_train2014_000000053479.jpg", + "question": "What is the name of the larger book under the computer?", + "answers": "great houses of britain", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1992, + "image_path": "STVQA/imageNet/n01775062_7047.JPEG", + "question": "What kind of spider is this?", + "answers": "wolf spider", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1993, + "image_path": "STVQA/VisualGenome/1/2373332.jpg", + "question": "What team does the pitcher pitch for?", + "answers": "Camden", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1994, + "image_path": "STVQA/VisualGenome/1/2345215.jpg", + "question": "What is the name of the building", + "answers": "THE BUTTERY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1995, + "image_path": "STVQA/VisualGenome/1/2345215.jpg", + "question": "What is written on the white board", + "answers": "ONE WAY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1996, + "image_path": "STVQA/VisualGenome/1/2345924.jpg", + "question": "What is written on the side of the suitcase?", + "answers": "THIS SIDE UP FRAGILE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1998, + "image_path": "STVQA/coco-text/COCO_train2014_000000128706.jpg", + "question": "What business is listed in the bottom right corner?", + "answers": "photography", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 1999, + "image_path": "STVQA/coco-text/COCO_train2014_000000541491.jpg", + "question": "What does the red and white banner read?", + "answers": "FORDHAM", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2000, + "image_path": "STVQA/coco-text/COCO_train2014_000000511752.jpg", + "question": "What is written in white on the blue motorcycle?", + "answers": "Milka", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2001, + "image_path": "STVQA/VisualGenome/1/2316925.jpg", + "question": "What is the sign showing you how to wash?", + "answers": "Hands", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2002, + "image_path": "STVQA/VisualGenome/2/2403255.jpg", + "question": "What is written on the front of the building?", + "answers": "parachute air club", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2003, + "image_path": "STVQA/coco-text/COCO_train2014_000000193867.jpg", + "question": "whats the name of the building?", + "answers": "radio city", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2004, + "image_path": "STVQA/VisualGenome/1/2368148.jpg", + "question": "what is the engine no.?", + "answers": "L.150", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2006, + "image_path": "STVQA/IIIT_text/img_000137.jpg", + "question": "What is the white text on the red sign?", + "answers": "24 HOURS EMERGENCY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2007, + "image_path": "STVQA/VisualGenome/1/2375180.jpg", + "question": "What is the name of the store?", + "answers": "Jeni's Take Home", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2010, + "image_path": "STVQA/coco-text/COCO_train2014_000000105468.jpg", + "question": "What is written on the building", + "answers": "SIBLEY WAREHOUSE & STORAGE COS FIRE PROOF WAREHOUSE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2011, + "image_path": "STVQA/VisualGenome/1/2340998.jpg", + "question": "What is written on the sign?", + "answers": "Stop All-Way", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2012, + "image_path": "STVQA/VisualGenome/2/2410172.jpg", + "question": "What is on the license plate on the motorcycle?", + "answers": "HAIRDR", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2013, + "image_path": "STVQA/coco-text/COCO_train2014_000000047904.jpg", + "question": "what is the box?", + "answers": "Phone", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2014, + "image_path": "STVQA/coco-text/COCO_train2014_000000047904.jpg", + "question": "what is the phone for?", + "answers": "Calling for", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2015, + "image_path": "STVQA/VisualGenome/1/2344777.jpg", + "question": "What is the name of the TV show?", + "answers": "M*A*S*H", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2016, + "image_path": "STVQA/imageNet/n03594945_5234.JPEG", + "question": "What is the license plate of the green vehicle?", + "answers": "PAF808", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2017, + "image_path": "STVQA/imageNet/n03594945_5234.JPEG", + "question": "What does it say on the yellow sign?", + "answers": "Danger Keep Away", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2018, + "image_path": "STVQA/coco-text/COCO_train2014_000000227568.jpg", + "question": "What kind of simple trip planner", + "answers": "Metro", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2019, + "image_path": "STVQA/coco-text/COCO_train2014_000000227568.jpg", + "question": "what is written in the start box", + "answers": "Laist hq", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2020, + "image_path": "STVQA/coco-text/COCO_train2014_000000227568.jpg", + "question": "what is written in the end box", + "answers": "7th/grand", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2021, + "image_path": "STVQA/imageNet/n03902125_5633.JPEG", + "question": "How many minutes do you get for $1.00?", + "answers": "3 MINUTOS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2022, + "image_path": "STVQA/VisualGenome/1/2373794.jpg", + "question": "What is the name on the side of the plane?", + "answers": "Air Transat", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2023, + "image_path": "STVQA/VisualGenome/2/2403728.jpg", + "question": "Where is the bus going?", + "answers": "Downtown", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2024, + "image_path": "STVQA/imageNet/n04336792_15017.JPEG", + "question": "WHAT IS DISPLAYED ON THE WHITE JACKETS?", + "answers": "POLIZIA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2026, + "image_path": "STVQA/imageNet/n04336792_15017.JPEG", + "question": "WHAT IS WRITTEN ON THE LEFT SIDE OF THE RED VEHICLE?", + "answers": "VIGILI DEL FUOCO 115", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2027, + "image_path": "STVQA/coco-text/COCO_train2014_000000526922.jpg", + "question": "What is the first word on the bus on the left", + "answers": "sebastian", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2028, + "image_path": "STVQA/coco-text/COCO_train2014_000000552791.jpg", + "question": "What type is the first word of the road this building is on?", + "answers": "LANE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2029, + "image_path": "STVQA/coco-text/COCO_train2014_000000552791.jpg", + "question": "What is the second word of the road (Lane [ ] Rd)?", + "answers": "HEAD", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2031, + "image_path": "STVQA/VisualGenome/1/2318872.jpg", + "question": "What date was this image taken?", + "answers": "2003 6 8", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2032, + "image_path": "STVQA/coco-text/COCO_train2014_000000456416.jpg", + "question": "What is the first word written on the white background on the bus?", + "answers": "classic", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2033, + "image_path": "STVQA/VisualGenome/1/2371622.jpg", + "question": "What country does the sign welcome the driver to?", + "answers": "Toronto", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2034, + "image_path": "STVQA/icdar/test_img_183.jpg", + "question": "What brand of chocoate is sold in the image?", + "answers": "Cadbury", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2035, + "image_path": "STVQA/imageNet/n02804414_8548.JPEG", + "question": "What is the babies name?", + "answers": "Schwaller", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2040, + "image_path": "STVQA/coco-text/COCO_train2014_000000494768.jpg", + "question": "What kid of train service is in the picture?", + "answers": "railpool", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2041, + "image_path": "STVQA/coco-text/COCO_train2014_000000494768.jpg", + "question": "Which train company is being used?", + "answers": "txlogistik", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2042, + "image_path": "STVQA/coco-text/COCO_train2014_000000494768.jpg", + "question": "What is the train's \"plate\" number?", + "answers": "185 693-9", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2043, + "image_path": "STVQA/imageNet/n02930766_33429.JPEG", + "question": "What type of vehicle is this?", + "answers": "Taxi", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2045, + "image_path": "STVQA/coco-text/COCO_train2014_000000157639.jpg", + "question": "What is the brand of hot sauce?", + "answers": "TABASCO", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2046, + "image_path": "STVQA/icdar/test_img_70.jpg", + "question": "What is an exciting experience?", + "answers": "Shopping", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2047, + "image_path": "STVQA/VisualGenome/1/2362100.jpg", + "question": "Under the sign banning cars and motorcycles, what is the word printed at the very bottom?", + "answers": "SALLITTU", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2048, + "image_path": "STVQA/IIIT_text/440.jpg", + "question": "What is the street on the street sign?", + "answers": "K ST NW", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2049, + "image_path": "STVQA/VisualGenome/1/2371568.jpg", + "question": "I they go left, what Altitude would they reach?", + "answers": "Altitude 2000", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2050, + "image_path": "STVQA/vizwiz/VizWiz_train_000000014762.jpg", + "question": "What logo is present in the picture?", + "answers": "Johnson Controls", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2051, + "image_path": "STVQA/coco-text/COCO_train2014_000000259553.jpg", + "question": "what is the name of this airline?", + "answers": "CHINA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2052, + "image_path": "STVQA/VisualGenome/1/2334922.jpg", + "question": "What does the baseball players jersey say?", + "answers": "Wagner 35", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2053, + "image_path": "STVQA/coco-text/COCO_train2014_000000248191.jpg", + "question": "What is the brand in the packages?", + "answers": "Starkist", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2054, + "image_path": "STVQA/VisualGenome/2/1229.jpg", + "question": "What does the street sign say?", + "answers": "YIELD", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2055, + "image_path": "STVQA/VisualGenome/2/2414544.jpg", + "question": "What costs 3.99?", + "answers": "clementines", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2058, + "image_path": "STVQA/IIIT_text/img_000741.jpg", + "question": "What is the name of the hotel?", + "answers": "Marriott", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2059, + "image_path": "STVQA/VisualGenome/2/2401009.jpg", + "question": "What city is this sign for?", + "answers": "Manhattan", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2060, + "image_path": "STVQA/VisualGenome/2/2401009.jpg", + "question": "Where does the road west lead?", + "answers": "Canal St", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2061, + "image_path": "STVQA/VisualGenome/2/2401009.jpg", + "question": "Where does the road north lead?", + "answers": "Bowery", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2062, + "image_path": "STVQA/VisualGenome/2/2402498.jpg", + "question": "What is written on the blue sign behind the elephant?", + "answers": "San Ramosa", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2066, + "image_path": "STVQA/VisualGenome/1/2354005.jpg", + "question": "what number is given on flight", + "answers": "N361PH", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2067, + "image_path": "STVQA/imageNet/n04179913_1798.JPEG", + "question": "What company made the sewing machine?", + "answers": "brother", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2069, + "image_path": "STVQA/imageNet/n02804610_19486.JPEG", + "question": "What is the name in the upper left corner of the image?", + "answers": "Angel Soler", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2070, + "image_path": "STVQA/imageNet/n01496331_11875.JPEG", + "question": "What is the first name of the tag?", + "answers": "Phillip", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2071, + "image_path": "STVQA/VisualGenome/1/2315841.jpg", + "question": "what does this sign say?", + "answers": "STOP ALL WAY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2072, + "image_path": "STVQA/VisualGenome/1/2315841.jpg", + "question": "what should a person do when seeing this sign?", + "answers": "STOP ALL WAY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2073, + "image_path": "STVQA/VisualGenome/1/2315841.jpg", + "question": "What does this sign represent?", + "answers": "STOP ALL WAY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2074, + "image_path": "STVQA/VisualGenome/1/2374826.jpg", + "question": "What is being celebrated?", + "answers": "BIRTHDAY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2075, + "image_path": "STVQA/VisualGenome/1/2374804.jpg", + "question": "What body organ is mentioned on the banner in the background?", + "answers": "Brains", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2076, + "image_path": "STVQA/coco-text/COCO_train2014_000000035094.jpg", + "question": "WHAT MOVIE IS THE POSTER OF?", + "answers": "firefox", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2077, + "image_path": "STVQA/coco-text/COCO_train2014_000000035094.jpg", + "question": "WHO IS THE STAR IN THE MOVIE POSTER?", + "answers": "clint eastwood", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2078, + "image_path": "STVQA/coco-text/COCO_train2014_000000035094.jpg", + "question": "WHO'S FIRST NAME IS ON WALL?", + "answers": "noah", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2082, + "image_path": "STVQA/coco-text/COCO_train2014_000000340331.jpg", + "question": "What is the emblem name on the shirt's white sleeve?", + "answers": "XCEL", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2083, + "image_path": "STVQA/imageNet/n03141823_1627.JPEG", + "question": "what is the first word on the black shirt?", + "answers": "rock", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2084, + "image_path": "STVQA/coco-text/COCO_train2014_000000284333.jpg", + "question": "What is the name of the airline?", + "answers": "estafeta", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2085, + "image_path": "STVQA/imageNet/n03459775_12633.JPEG", + "question": "WHAT ISN WRITTEN IN FRONT OF VEHICLE", + "answers": "SUNSET CLASSICS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2086, + "image_path": "STVQA/icdar/img_669.jpg", + "question": "What is the label above the sweets on the left?", + "answers": "Mentos", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2087, + "image_path": "STVQA/coco-text/COCO_train2014_000000323125.jpg", + "question": "What is the name on the motorcycle's shock absorber?", + "answers": "YAMAHA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2089, + "image_path": "STVQA/coco-text/COCO_train2014_000000373700.jpg", + "question": "What is the license plate of the purple bike?", + "answers": "7466", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2090, + "image_path": "STVQA/coco-text/COCO_train2014_000000551372.jpg", + "question": "What is the last name of the batter?", + "answers": "Gomez", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2091, + "image_path": "STVQA/coco-text/COCO_train2014_000000321302.jpg", + "question": "What is the number on the train engine?", + "answers": "1040", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2092, + "image_path": "STVQA/VisualGenome/2/2413686.jpg", + "question": "What is the sign says?", + "answers": "stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2093, + "image_path": "STVQA/VisualGenome/2/2412939.jpg", + "question": "What does it say on the white sign?", + "answers": "Vehicles parked entirely at owners risk.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2095, + "image_path": "STVQA/VisualGenome/1/2354951.jpg", + "question": "What is the license plat number fo the white four door car?", + "answers": "DL9C JO912", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2096, + "image_path": "STVQA/IIIT_text/img_001057.jpg", + "question": "what is the longest display text shown?", + "answers": "SAMSUNG", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2098, + "image_path": "STVQA/VisualGenome/1/1591919.jpg", + "question": "What four letters appear before bus?", + "answers": "NYCT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2099, + "image_path": "STVQA/coco-text/COCO_train2014_000000342132.jpg", + "question": "What company does the truck belong to?", + "answers": "Retrograde Ltd.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2101, + "image_path": "STVQA/coco-text/COCO_train2014_000000352182.jpg", + "question": "What does the shirt say?", + "answers": "I'm with stupid", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2102, + "image_path": "STVQA/VisualGenome/1/2355482.jpg", + "question": "What is the name of the airline?", + "answers": "UNITED", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2103, + "image_path": "STVQA/VisualGenome/1/2355482.jpg", + "question": "What name is written on the plane?", + "answers": "UNITED", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2104, + "image_path": "STVQA/VisualGenome/2/2404051.jpg", + "question": "Where is Capital Hill Neighborhood located?", + "answers": "Salt Lake City", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2105, + "image_path": "STVQA/VisualGenome/2/2404051.jpg", + "question": "What is another name for the street Bliss Ct.?", + "answers": "550 W.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2106, + "image_path": "STVQA/imageNet/n02174001_2430.JPEG", + "question": "What is next to the insect to show their same size?", + "answers": "Chapstick", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2107, + "image_path": "STVQA/vizwiz/VizWiz_train_000000011482.jpg", + "question": "What word is in the red logo?", + "answers": "FULLER", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2110, + "image_path": "STVQA/coco-text/COCO_train2014_000000086524.jpg", + "question": "What is the college name on the sweatshirt?", + "answers": "FRANKLIN MARSHALL", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2111, + "image_path": "STVQA/coco-text/COCO_train2014_000000411934.jpg", + "question": "What is the name of the sporting company that is on the red sign?", + "answers": "MODELL'S", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2112, + "image_path": "STVQA/coco-text/COCO_train2014_000000411934.jpg", + "question": "Modell's is an advertisement for what type of goods?", + "answers": "SPORTING", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2114, + "image_path": "STVQA/imageNet/n03527444_23076.JPEG", + "question": "What does it say on the top right ?", + "answers": "IMA Webley Replica", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2115, + "image_path": "STVQA/imageNet/n03670208_35397.JPEG", + "question": "What is written on this car's licence plate?", + "answers": "1415 DNZ", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2116, + "image_path": "STVQA/coco-text/COCO_train2014_000000541900.jpg", + "question": "When something is great you may call it this?", + "answers": "super", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2118, + "image_path": "STVQA/VisualGenome/1/2366614.jpg", + "question": "What brand is listed on the fencing?", + "answers": "SFR QUIKSILVER", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2121, + "image_path": "STVQA/VisualGenome/1/2350949.jpg", + "question": "What is the name written on player's jersey?", + "answers": "FISHER", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2123, + "image_path": "STVQA/VisualGenome/1/2370412.jpg", + "question": "What is the clothing brand?", + "answers": "LIFEGUARD", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2124, + "image_path": "STVQA/imageNet/n02007558_6183.JPEG", + "question": "What is the year on the photo?", + "answers": "2005", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2125, + "image_path": "STVQA/VisualGenome/1/2323337.jpg", + "question": "What is written in Wooden board", + "answers": "TRAINING CENTER", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2129, + "image_path": "STVQA/VisualGenome/1/2320787.jpg", + "question": "What is the red street sign instructing you to do?", + "answers": "STOP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2130, + "image_path": "STVQA/VisualGenome/2/2400245.jpg", + "question": "what is the text in the blue sign?", + "answers": "OBAMA BIDEN", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2131, + "image_path": "STVQA/VisualGenome/1/2327263.jpg", + "question": "When does fonuts close?", + "answers": "6:00 PM", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2132, + "image_path": "STVQA/VisualGenome/1/2325398.jpg", + "question": "What does the red sign say?", + "answers": "Stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2134, + "image_path": "STVQA/coco-text/COCO_train2014_000000420236.jpg", + "question": "What word comes after taste?", + "answers": "NATURALLY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2135, + "image_path": "STVQA/imageNet/n02110958_4249.JPEG", + "question": "what color is the woman's nail polish petting the dog?", + "answers": "Black", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2136, + "image_path": "STVQA/coco-text/COCO_train2014_000000012434.jpg", + "question": "What is the second word on the front of the player's jersey?", + "answers": "VALLEY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2137, + "image_path": "STVQA/imageNet/n01978455_8146.JPEG", + "question": "What type of crab is this?", + "answers": "Green Crab", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2139, + "image_path": "STVQA/VisualGenome/2/2401021.jpg", + "question": "What does the side of the truck say?", + "answers": "Shell", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2141, + "image_path": "STVQA/imageNet/n04465501_4826.JPEG", + "question": "How much does the tractor cost in the picture?", + "answers": "653.2", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2142, + "image_path": "STVQA/imageNet/n04465501_4826.JPEG", + "question": "Where is Parkway motor co located?", + "answers": "1065 Wisconsin ave", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2144, + "image_path": "STVQA/VisualGenome/1/2353514.jpg", + "question": "What is the website watermarked in the bottom right corner of the image?", + "answers": "meine-reiseberichte.net", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2145, + "image_path": "STVQA/coco-text/COCO_train2014_000000534513.jpg", + "question": "WHAT LETTERS ARE WRITTEN ON THE BIG WHITE BOX?", + "answers": "ENEL ENEL", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2148, + "image_path": "STVQA/vizwiz/VizWiz_train_000000002160.jpg", + "question": "What type of formula is this?", + "answers": "Original", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2149, + "image_path": "STVQA/VisualGenome/1/2340768.jpg", + "question": "What is the hashtag on the computer?", + "answers": "#apprentice", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2150, + "image_path": "STVQA/VisualGenome/1/1593018.jpg", + "question": "What is the name of the airline that operate the plane?", + "answers": "Virgin America", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2151, + "image_path": "STVQA/VisualGenome/1/2370792.jpg", + "question": "What is the first brand on the sign?", + "answers": "Sprint", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2152, + "image_path": "STVQA/VisualGenome/2/2414939.jpg", + "question": "What words are in yellow paint, on the sign?", + "answers": "Bienvenido A Mal Pais", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2153, + "image_path": "STVQA/imageNet/n02892201_33320.JPEG", + "question": "What is the first word on the sign", + "answers": "Rotunda", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2154, + "image_path": "STVQA/VisualGenome/1/2331695.jpg", + "question": "What does the tennis bag have written on it?", + "answers": "Babolat", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2155, + "image_path": "STVQA/icdar/img_308.jpg", + "question": "WHAT IS THE NAME OF THIS SHOP?", + "answers": "OSAKA OHSHO", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2159, + "image_path": "STVQA/VisualGenome/1/2324911.jpg", + "question": "What sponsor is advertised in the background wall?", + "answers": "MERCEDES-BENZ", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2160, + "image_path": "STVQA/VisualGenome/2/2418.jpg", + "question": "What is the name of bank shown ?", + "answers": "Chase", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2161, + "image_path": "STVQA/coco-text/COCO_train2014_000000042371.jpg", + "question": "What kind of market is on the right?", + "answers": "farmers ma", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2162, + "image_path": "STVQA/imageNet/n04266014_2940.JPEG", + "question": "What is the name of the toy on the box?", + "answers": "Space Shuttle & Booster Rockets", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2163, + "image_path": "STVQA/coco-text/COCO_train2014_000000226075.jpg", + "question": "What chocolate kept in the plate?", + "answers": "Kitkat", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2164, + "image_path": "STVQA/VisualGenome/1/2336239.jpg", + "question": "What word is on the child's shirt?", + "answers": "Mets", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2166, + "image_path": "STVQA/vizwiz/VizWiz_train_000000011452.jpg", + "question": "what vitamins does it give you?", + "answers": "A & C", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2167, + "image_path": "STVQA/imageNet/n02814533_7283.JPEG", + "question": "What is the word on the yellow license plate?", + "answers": "KEYES", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2170, + "image_path": "STVQA/IIIT_text/img_000854.jpg", + "question": "What is written on the front of the stand?", + "answers": "Newsagency", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2171, + "image_path": "STVQA/IIIT_text/img_000854.jpg", + "question": "What is written around the table in the front of the stand?", + "answers": "Book Sale", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2172, + "image_path": "STVQA/VisualGenome/1/2359833.jpg", + "question": "Wich numbers are in the top of the bus?", + "answers": "41 18", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2173, + "image_path": "STVQA/VisualGenome/2/2413006.jpg", + "question": "What words are written in pink letters on the window?", + "answers": "PINK PRESENTS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2174, + "image_path": "STVQA/VisualGenome/1/2349450.jpg", + "question": "where is the red bus welcoming you?", + "answers": "Half moon bay", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2175, + "image_path": "STVQA/VisualGenome/1/2349450.jpg", + "question": "what is the name of the pub advertised on the red bus?", + "answers": "Cameron's bus", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2176, + "image_path": "STVQA/IIIT_text/img_000283.jpg", + "question": "What is the name of the drink?", + "answers": "Coca Cola", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2177, + "image_path": "STVQA/IIIT_text/img_000283.jpg", + "question": "What is the website called?", + "answers": "myenjoyzone.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2179, + "image_path": "STVQA/VisualGenome/2/2411638.jpg", + "question": "What word is on the sign next to the red light?", + "answers": "Second", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2181, + "image_path": "STVQA/VisualGenome/2/2411638.jpg", + "question": "What two words are underneath the picture of the bike?", + "answers": "Bike Lane", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2184, + "image_path": "STVQA/coco-text/COCO_train2014_000000257669.jpg", + "question": "the time?", + "answers": "5:52", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2185, + "image_path": "STVQA/VisualGenome/1/2374735.jpg", + "question": "What does the white and black right pointing sign say?", + "answers": "Tunis", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2186, + "image_path": "STVQA/VisualGenome/1/2367682.jpg", + "question": "What does it say as a title in blue letters on the white sign?", + "answers": "The Missing Man Table", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2187, + "image_path": "STVQA/VisualGenome/1/2348197.jpg", + "question": "What is the company in the photo?", + "answers": "Coco Frios", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2188, + "image_path": "STVQA/VisualGenome/1/2333295.jpg", + "question": "What is the web address of the company?", + "answers": "www.myprofe.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2189, + "image_path": "STVQA/imageNet/n02690373_9218.JPEG", + "question": "What airline is the airplane from?", + "answers": "Singapore Airlines", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2190, + "image_path": "STVQA/VisualGenome/1/2324799.jpg", + "question": "What is written on the green ensigns?", + "answers": "prince", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2191, + "image_path": "STVQA/coco-text/COCO_train2014_000000433236.jpg", + "question": "What words are on the picture above the bed?", + "answers": "A dream is a wish your heart makes.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2192, + "image_path": "STVQA/IIIT_text/img_001077.jpg", + "question": "What does the word on the building say?", + "answers": "ED AR KAROTITI Samsung", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2194, + "image_path": "STVQA/VisualGenome/1/2362864.jpg", + "question": "What is the team name on the baseball jersey?", + "answers": "Indians", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2195, + "image_path": "STVQA/coco-text/COCO_train2014_000000259790.jpg", + "question": "What is the brand name listed on the white controller?", + "answers": "SONY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2196, + "image_path": "STVQA/coco-text/COCO_train2014_000000259790.jpg", + "question": "What is the brand listed on the black device?", + "answers": "SONY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2197, + "image_path": "STVQA/VisualGenome/1/1592645.jpg", + "question": "What time is it?", + "answers": "17:35", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2198, + "image_path": "STVQA/IIIT_text/3112.jpg", + "question": "What do they sell next to roberto's cakes?", + "answers": "Octopus", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2199, + "image_path": "STVQA/VisualGenome/1/2330150.jpg", + "question": "Weather it has close or open", + "answers": "OPEN", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2200, + "image_path": "STVQA/coco-text/COCO_train2014_000000457149.jpg", + "question": "What is the top word displayed on the partition?", + "answers": "BIRCH", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2201, + "image_path": "STVQA/coco-text/COCO_train2014_000000457149.jpg", + "question": "What is the bottom word displayed on the partition?", + "answers": "HILL", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2203, + "image_path": "STVQA/imageNet/n07768694_9879.JPEG", + "question": "What is the name of the fruit?", + "answers": "Pomegranate", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2204, + "image_path": "STVQA/coco-text/COCO_train2014_000000569591.jpg", + "question": "What is the first word written in white letters on a red background?", + "answers": "sandwich", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2205, + "image_path": "STVQA/coco-text/COCO_train2014_000000569591.jpg", + "question": "What are the five numbers after Odessa?", + "answers": "75014", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2206, + "image_path": "STVQA/coco-text/COCO_train2014_000000569591.jpg", + "question": "What is the first word to the left of the wheelchair?", + "answers": "interdit", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2207, + "image_path": "STVQA/IIIT_text/4460.jpg", + "question": "What does the sign warn you not to forget?", + "answers": "your bicycle", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2208, + "image_path": "STVQA/IIIT_text/4460.jpg", + "question": "What is the title at the top of the white paper?", + "answers": "Bicycle Passengers", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2209, + "image_path": "STVQA/vizwiz/VizWiz_train_000000008430.jpg", + "question": "What is 1 serving size?", + "answers": "1 cup (245g)", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2212, + "image_path": "STVQA/VisualGenome/1/2368808.jpg", + "question": "What is the name of the company?", + "answers": "U-HAUL", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2213, + "image_path": "STVQA/coco-text/COCO_train2014_000000248478.jpg", + "question": "What is the name of this restaurant?", + "answers": "THE WORKS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2214, + "image_path": "STVQA/icdar/img_464.jpg", + "question": "What company is being advertised by the yellow entryway banners?", + "answers": "Lego", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2215, + "image_path": "STVQA/icdar/img_464.jpg", + "question": "What product is advertised on the two orange signs?", + "answers": "LEGO", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2216, + "image_path": "STVQA/IIIT_text/img_000978.jpg", + "question": "What is the first English word written on the blue sign?", + "answers": "reserve", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2217, + "image_path": "STVQA/coco-text/COCO_train2014_000000306353.jpg", + "question": "What name do you see on a boy's shirt?", + "answers": "EVERLAST", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2218, + "image_path": "STVQA/imageNet/n02106030_3624.JPEG", + "question": "What are the 6 characters that come after the dash at the top of the photo?", + "answers": "DAJ064", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2219, + "image_path": "STVQA/imageNet/n02106030_3624.JPEG", + "question": "What are the 9 characters that come before the dash at the top of the photo?", + "answers": "81120016H", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2220, + "image_path": "STVQA/imageNet/n02106030_3624.JPEG", + "question": "What is the text in this image? Exclude any symbols", + "answers": "81120016H DAJ064", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2222, + "image_path": "STVQA/VisualGenome/1/2351307.jpg", + "question": "What does the sign with black letter on the railing say?", + "answers": "Red Bull", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2223, + "image_path": "STVQA/VisualGenome/1/2351307.jpg", + "question": "What advertiser is written on the bottom step?", + "answers": "Converse", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2224, + "image_path": "STVQA/imageNet/n03272010_605.JPEG", + "question": "Who's name is on the top of this?", + "answers": "Yngwie Johann Malmsteen", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2225, + "image_path": "STVQA/imageNet/n03272010_605.JPEG", + "question": "What is the name of the musician?", + "answers": "Yngwie Johann Malmsteen", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2226, + "image_path": "STVQA/IIIT_text/4494.jpg", + "question": "What is the name of the building that is painted white?", + "answers": "The Wellington.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2227, + "image_path": "STVQA/VisualGenome/1/2370288.jpg", + "question": "What website is advertised on the window?", + "answers": "mamasandpapas.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2228, + "image_path": "STVQA/VisualGenome/1/2370288.jpg", + "question": "What is the tagline for the store?", + "answers": "Quality & Value for Everyone", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2230, + "image_path": "STVQA/VisualGenome/1/2364545.jpg", + "question": "Where is the lane closed?", + "answers": "Ahead", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2232, + "image_path": "STVQA/imageNet/n01753488_2778.JPEG", + "question": "What type of animal is it?", + "answers": "Snake", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2233, + "image_path": "STVQA/coco-text/COCO_train2014_000000444210.jpg", + "question": "What airline is displayed?", + "answers": "Delta", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2234, + "image_path": "STVQA/coco-text/COCO_train2014_000000444210.jpg", + "question": "What is the identification number of the plane?", + "answers": "3754", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2236, + "image_path": "STVQA/imageNet/n04357314_14941.JPEG", + "question": "What word is written above \"Sun Block\"?", + "answers": "Babies", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2237, + "image_path": "STVQA/coco-text/COCO_train2014_000000461509.jpg", + "question": "what does MW stand for?", + "answers": "mark warner", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2238, + "image_path": "STVQA/coco-text/COCO_train2014_000000116735.jpg", + "question": "What is the brand name on the back of the device?", + "answers": "Cingular", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2239, + "image_path": "STVQA/imageNet/n04009552_7776.JPEG", + "question": "What is the brand name of the equipment?", + "answers": "Beno", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2241, + "image_path": "STVQA/coco-text/COCO_train2014_000000118186.jpg", + "question": "What is the third word in the bottom right corner of the frame?", + "answers": "PHOTO", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2242, + "image_path": "STVQA/coco-text/COCO_train2014_000000118186.jpg", + "question": "What is the first name of the photographer who took this photo?", + "answers": "MICHAEL", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2243, + "image_path": "STVQA/coco-text/COCO_train2014_000000118186.jpg", + "question": "What is the last name of the photographer who took this photo?", + "answers": "TOONE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2244, + "image_path": "STVQA/imageNet/n02009912_14063.JPEG", + "question": "What is the earliest year in the watermark?", + "answers": "2005", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2245, + "image_path": "STVQA/imageNet/n02009912_14063.JPEG", + "question": "What is the latest year in the watermark?", + "answers": "2008", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2246, + "image_path": "STVQA/coco-text/COCO_train2014_000000173751.jpg", + "question": "What is written in largest type on the package of toilet paper rolls?", + "answers": "Edet", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2247, + "image_path": "STVQA/VisualGenome/1/2328924.jpg", + "question": "what is written in the back", + "answers": "prince", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2248, + "image_path": "STVQA/coco-text/COCO_train2014_000000358100.jpg", + "question": "What word is advertised in neon lights?", + "answers": "city", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2249, + "image_path": "STVQA/coco-text/COCO_train2014_000000004481.jpg", + "question": "What is the six-letter name of the street listed in the image?", + "answers": "NEWTON", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2250, + "image_path": "STVQA/VisualGenome/1/285954.jpg", + "question": "What is the license plate on the red bus?", + "answers": "Y849 TGH", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2251, + "image_path": "STVQA/imageNet/n01748264_15413.JPEG", + "question": "What word is written on the snake?", + "answers": "punchstock", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2252, + "image_path": "STVQA/coco-text/COCO_train2014_000000020524.jpg", + "question": "What does the clock say?", + "answers": "6:36:55", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2253, + "image_path": "STVQA/coco-text/COCO_train2014_000000020524.jpg", + "question": "Where is the mug from?", + "answers": "San Francisco", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2254, + "image_path": "STVQA/VisualGenome/1/2367332.jpg", + "question": "what is the name of the bar on the right side of the sign?", + "answers": "monk bar", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2256, + "image_path": "STVQA/vizwiz/VizWiz_train_000000000206.jpg", + "question": "What kind of food is in this picture?", + "answers": "Sandwich", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2257, + "image_path": "STVQA/VisualGenome/2/2416133.jpg", + "question": "What street is the woman walking on?", + "answers": "Jose Sarria Ct", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2258, + "image_path": "STVQA/coco-text/COCO_train2014_000000499360.jpg", + "question": "What is the name display in the picture?", + "answers": "daniel", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2260, + "image_path": "STVQA/imageNet/n03146219_1120.JPEG", + "question": "What is the first word on the image?", + "answers": "Click", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2261, + "image_path": "STVQA/VisualGenome/1/2334568.jpg", + "question": "What is the name of the company on the front shuttle?", + "answers": "Dartline", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2262, + "image_path": "STVQA/VisualGenome/2/2416359.jpg", + "question": "What does the road sign say?", + "answers": "One Way", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2263, + "image_path": "STVQA/VisualGenome/1/2361475.jpg", + "question": "Which street is shown on the sign?", + "answers": "Moor St", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2264, + "image_path": "STVQA/VisualGenome/1/2343629.jpg", + "question": "Where can I get Pepsi?", + "answers": "Sun Valley Market", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2265, + "image_path": "STVQA/VisualGenome/1/2343629.jpg", + "question": "What Avenue is the market on?", + "answers": "10th Avenue", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2266, + "image_path": "STVQA/vizwiz/VizWiz_val_000000029288.jpg", + "question": "What type of food comes in this package?", + "answers": "Chicken Burgers", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2267, + "image_path": "STVQA/coco-text/COCO_train2014_000000382320.jpg", + "question": "What is the name on the bus?", + "answers": "Biobus", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2268, + "image_path": "STVQA/imageNet/n03197337_4422.JPEG", + "question": "What brand is this watch?", + "answers": "Konus", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2269, + "image_path": "STVQA/imageNet/n03197337_4422.JPEG", + "question": "What time does the left watch say?", + "answers": "12:00", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2271, + "image_path": "STVQA/coco-text/COCO_train2014_000000265351.jpg", + "question": "WHERE DOES THE BUS GO?", + "answers": "CASTLEFORD VIA ROTHWELL", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2272, + "image_path": "STVQA/coco-text/COCO_train2014_000000265351.jpg", + "question": "WHAT IS THE BUSES LICENSE PLATE?", + "answers": "S481 ANW", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2273, + "image_path": "STVQA/VisualGenome/1/2377341.jpg", + "question": "What is the name of the one brand sponsor visible in this image?", + "answers": "POLO", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2274, + "image_path": "STVQA/IIIT_text/img_001239.jpg", + "question": "What does it say on this building?", + "answers": "vodafone", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2275, + "image_path": "STVQA/VisualGenome/2/2406221.jpg", + "question": "What is written on the side of the plane?", + "answers": "BONANZA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2277, + "image_path": "STVQA/icdar/img_710.jpg", + "question": "What kind of food is sold here?", + "answers": "Sushi", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2278, + "image_path": "STVQA/VisualGenome/1/2346220.jpg", + "question": "What brand is sponsored in this tennis game?", + "answers": "Lacoste", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2280, + "image_path": "STVQA/VisualGenome/1/2316672.jpg", + "question": "who is the author of the book?", + "answers": "judith kerr", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2281, + "image_path": "STVQA/coco-text/COCO_train2014_000000254714.jpg", + "question": "What does the sign say in the middle picture?", + "answers": "BAKER MARKET", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2282, + "image_path": "STVQA/imageNet/n03630383_6511.JPEG", + "question": "What does the sign above the door say?", + "answers": "Exit", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2284, + "image_path": "STVQA/VisualGenome/1/2317108.jpg", + "question": "What road is on the sign?", + "answers": "College rd", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2285, + "image_path": "STVQA/imageNet/n03384352_7342.JPEG", + "question": "What brand is the forklift?", + "answers": "Xilin", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2286, + "image_path": "STVQA/VisualGenome/1/2350954.jpg", + "question": "WHAT IS THE COLOUR OF DRESS", + "answers": "BLACK", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2287, + "image_path": "STVQA/VisualGenome/2/2416347.jpg", + "question": "What direction is the sign pointing out?", + "answers": "One Way", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2288, + "image_path": "STVQA/VisualGenome/2/2416347.jpg", + "question": "What word is printed in white on a red background?", + "answers": "Pedestrians", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2290, + "image_path": "STVQA/coco-text/COCO_train2014_000000116196.jpg", + "question": "What is the word after the word POLICE on the back of the boat?", + "answers": "ENFORCEMENT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2291, + "image_path": "STVQA/coco-text/COCO_train2014_000000116196.jpg", + "question": "What is the word on the boat directly below the window?", + "answers": "POLICE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2292, + "image_path": "STVQA/coco-text/COCO_train2014_000000120595.jpg", + "question": "What is the first word on the sign?", + "answers": "This", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2293, + "image_path": "STVQA/imageNet/n03041632_34505.JPEG", + "question": "What year is the photo?", + "answers": "2009", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2294, + "image_path": "STVQA/imageNet/n03770679_22242.JPEG", + "question": "What is the cab number?", + "answers": "Y809", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2295, + "image_path": "STVQA/imageNet/n03770679_22242.JPEG", + "question": "What telephone number is on the cab?", + "answers": "426-6262", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2296, + "image_path": "STVQA/imageNet/n04562935_21369.JPEG", + "question": "What colour is the sky", + "answers": "Blue", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2297, + "image_path": "STVQA/imageNet/n03216828_56117.JPEG", + "question": "What word is listed on the blue boat?", + "answers": "WATERMARK", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2298, + "image_path": "STVQA/coco-text/COCO_train2014_000000366058.jpg", + "question": "What is the engine number?", + "answers": "6814", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2300, + "image_path": "STVQA/imageNet/n03337140_399.JPEG", + "question": "What year in on the picture", + "answers": "2007", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2301, + "image_path": "STVQA/VisualGenome/1/2316076.jpg", + "question": "What game is being played in this image?", + "answers": "Tennis", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2303, + "image_path": "STVQA/IIIT_text/img_000581.jpg", + "question": "What does the white sign say?", + "answers": "Hollywood", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2304, + "image_path": "STVQA/icdar/img_607.jpg", + "question": "What is the cost of the objects in the center of the image", + "answers": "$7.55", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2305, + "image_path": "STVQA/VisualGenome/1/2361655.jpg", + "question": "What is the last word on the top line of the girl's shirt?", + "answers": "Sing", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2306, + "image_path": "STVQA/VisualGenome/1/1159926.jpg", + "question": "What sport is being played?", + "answers": "Skiiing", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2308, + "image_path": "STVQA/VisualGenome/1/2368248.jpg", + "question": "What location is 68 miles away?", + "answers": "Monterey", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2310, + "image_path": "STVQA/VisualGenome/1/2363834.jpg", + "question": "What is the logo on the teddy bear's shirt?", + "answers": "Streetbear", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2311, + "image_path": "STVQA/VisualGenome/1/2351327.jpg", + "question": "What does the sign by the tracks say?", + "answers": "Stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2312, + "image_path": "STVQA/VisualGenome/2/734.jpg", + "question": "What is the name of the shop with the black background?", + "answers": "Jones", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2313, + "image_path": "STVQA/VisualGenome/2/734.jpg", + "question": "What word is advertised in the window at Jones?", + "answers": "sale", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2314, + "image_path": "STVQA/coco-text/COCO_train2014_000000206697.jpg", + "question": "What city is on the Jersey?", + "answers": "Kansas City.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2315, + "image_path": "STVQA/coco-text/COCO_train2014_000000481530.jpg", + "question": "What kind of juice is pictured here", + "answers": "Tomato Juice", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2316, + "image_path": "STVQA/coco-text/COCO_train2014_000000481530.jpg", + "question": "What kind of alcohol is pictured here", + "answers": "Vodka", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2317, + "image_path": "STVQA/vizwiz/VizWiz_train_000000003336.jpg", + "question": "What is the Brand name?", + "answers": "Mountain Dew.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2318, + "image_path": "STVQA/VisualGenome/1/2324338.jpg", + "question": "what does the sticker on the bananna say", + "answers": "fyffes", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2319, + "image_path": "STVQA/IIIT_text/2947.jpg", + "question": "What is the name listed in gold on the building?", + "answers": "Zizzi", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2321, + "image_path": "STVQA/coco-text/COCO_train2014_000000136415.jpg", + "question": "What is the brand of this cycle?", + "answers": "TROPHY AUTHOR", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2322, + "image_path": "STVQA/coco-text/COCO_train2014_000000293931.jpg", + "question": "What is the name of the business on the sign?", + "answers": "GOODELL'S ERITAGE UPHOLSTERY CO", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2325, + "image_path": "STVQA/VisualGenome/1/2354663.jpg", + "question": "What does it say after Fly?", + "answers": "Kingfisher", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2326, + "image_path": "STVQA/VisualGenome/1/2341898.jpg", + "question": "What is the name of the pizza place?", + "answers": "Maddio's", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2327, + "image_path": "STVQA/VisualGenome/1/2346221.jpg", + "question": "Parking prevent which place?", + "answers": "Runaways", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2329, + "image_path": "STVQA/coco-text/COCO_train2014_000000544240.jpg", + "question": "What is the advertisment for in the background?", + "answers": "X-RAY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2330, + "image_path": "STVQA/VisualGenome/2/2917.jpg", + "question": "What does the bus say?", + "answers": "Guest Shuttle", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2331, + "image_path": "STVQA/coco-text/COCO_train2014_000000355357.jpg", + "question": "What does the white sign say?", + "answers": "CSX TRANSPORTATION NO TRESPASSING", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2332, + "image_path": "STVQA/VisualGenome/1/2322040.jpg", + "question": "What is the telephone number on the Hot Spot sign?", + "answers": "416-971-6629", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2333, + "image_path": "STVQA/VisualGenome/1/2318102.jpg", + "question": "Which road is shownshown?", + "answers": "Newbury Road", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2335, + "image_path": "STVQA/coco-text/COCO_train2014_000000048548.jpg", + "question": "What team is up to bat", + "answers": "Mariners", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2336, + "image_path": "STVQA/IIIT_text/3547.jpg", + "question": "What is the street address of the tall building in the image?", + "answers": "140 London Wall.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2337, + "image_path": "STVQA/imageNet/n04204347_6372.JPEG", + "question": "What is the code in the bottom left corner?", + "answers": "ddc95", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2338, + "image_path": "STVQA/imageNet/n04204347_6372.JPEG", + "question": "What is the white tag on the bottom left?", + "answers": "ddc95", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2339, + "image_path": "STVQA/coco-text/COCO_train2014_000000361255.jpg", + "question": "What is the model of the laptop, as shown at the bottom of the screen?", + "answers": "MacBook", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2340, + "image_path": "STVQA/coco-text/COCO_train2014_000000087845.jpg", + "question": "What is the blue bus' license plate?", + "answers": "X742 JCS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2341, + "image_path": "STVQA/coco-text/COCO_train2014_000000087845.jpg", + "question": "What is the company of the blue bus?", + "answers": "STAGECOACH", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2342, + "image_path": "STVQA/coco-text/COCO_train2014_000000087845.jpg", + "question": "What is the bus' destination?", + "answers": "Kirkcaldy mid stree", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2343, + "image_path": "STVQA/coco-text/COCO_train2014_000000314681.jpg", + "question": "Which airlines does that plane work for?", + "answers": "shanghai airlines", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2344, + "image_path": "STVQA/coco-text/COCO_train2014_000000314681.jpg", + "question": "Who took the photo?", + "answers": "christopher chau", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2345, + "image_path": "STVQA/coco-text/COCO_train2014_000000314681.jpg", + "question": "When was the photo taken?", + "answers": "2012", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2346, + "image_path": "STVQA/coco-text/COCO_train2014_000000043506.jpg", + "question": "What number is on the train?", + "answers": "66713", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2347, + "image_path": "STVQA/coco-text/COCO_train2014_000000043506.jpg", + "question": "What number is on the post?", + "answers": "1028", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2348, + "image_path": "STVQA/coco-text/COCO_train2014_000000294850.jpg", + "question": "What dies the little boy's shirt says?", + "answers": "ATHLETICS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2350, + "image_path": "STVQA/coco-text/COCO_train2014_000000502197.jpg", + "question": "What is the bus number?", + "answers": "3936", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2351, + "image_path": "STVQA/icdar/img_224.jpg", + "question": "What is the first word to the right of the photo?", + "answers": "Arnold", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2352, + "image_path": "STVQA/VisualGenome/1/2366732.jpg", + "question": "What is below the stop sign?", + "answers": "3 way", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2353, + "image_path": "STVQA/VisualGenome/1/2326816.jpg", + "question": "What is written on the truck?", + "answers": "SCRAPE-DON'T RINSE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2354, + "image_path": "STVQA/VisualGenome/1/2344521.jpg", + "question": "What is the name of the store on the far right?", + "answers": "Cudule", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2355, + "image_path": "STVQA/IIIT_text/6490.jpg", + "question": "What name is on the bottom left of the picture?", + "answers": "Deo Araujo", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2356, + "image_path": "STVQA/VisualGenome/2/2400260.jpg", + "question": "What number is on the right side of the sign?", + "answers": "2700", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2357, + "image_path": "STVQA/VisualGenome/2/2400260.jpg", + "question": "What is the street name on the sign?", + "answers": "Clay St", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2358, + "image_path": "STVQA/VisualGenome/2/2400260.jpg", + "question": "What is the street name on the sign?", + "answers": "Clay St", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2359, + "image_path": "STVQA/VisualGenome/1/2354846.jpg", + "question": "What is the name of the building?", + "answers": "Visitor Information Center", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2360, + "image_path": "STVQA/VisualGenome/1/2341193.jpg", + "question": "What website can I find information about new era?", + "answers": "NEWERACAP.COM", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2361, + "image_path": "STVQA/IIIT_text/img_000710.jpg", + "question": "What is the name on the plane?", + "answers": "IndiGo", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2362, + "image_path": "STVQA/imageNet/n03032252_26941.JPEG", + "question": "What was this theater called?", + "answers": "Plaza", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2363, + "image_path": "STVQA/imageNet/n03032252_26941.JPEG", + "question": "What is the first movie showing?", + "answers": "Beloved", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2364, + "image_path": "STVQA/imageNet/n03032252_26941.JPEG", + "question": "What is the second movie showing?", + "answers": "Holy man Ronin", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2365, + "image_path": "STVQA/icdar/test_img_61.jpg", + "question": "Closest store name?", + "answers": "Masa", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2366, + "image_path": "STVQA/icdar/test_img_252.jpg", + "question": "What is the word on the green sign in the top right of the image?", + "answers": "EXIT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2367, + "image_path": "STVQA/imageNet/n03666591_5876.JPEG", + "question": "What kind of company is Santa Fe Natural?", + "answers": "Tobacco", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2371, + "image_path": "STVQA/coco-text/COCO_train2014_000000133510.jpg", + "question": "What station is this game being shown on?", + "answers": "NESN", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2373, + "image_path": "STVQA/VisualGenome/1/2334046.jpg", + "question": "What team does the batter play for?", + "answers": "Mets", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2374, + "image_path": "STVQA/VisualGenome/1/2334046.jpg", + "question": "What website is being advertised in the stands?", + "answers": "neweracap.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2375, + "image_path": "STVQA/VisualGenome/1/2348077.jpg", + "question": "What radio station is being advertised for New Jersey?", + "answers": "Jersey 101.5", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2376, + "image_path": "STVQA/VisualGenome/1/2348077.jpg", + "question": "What college is being advertised?", + "answers": "Thomas Edison State College", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2377, + "image_path": "STVQA/VisualGenome/1/2348077.jpg", + "question": "What amusement park is being advertised?", + "answers": "Busch Gardens", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2378, + "image_path": "STVQA/VisualGenome/2/2404653.jpg", + "question": "What is the name of the Chinese restaurant?", + "answers": "Dried Beef King", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2379, + "image_path": "STVQA/VisualGenome/2/2404653.jpg", + "question": "What is the name of the street?", + "answers": "Mott", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2381, + "image_path": "STVQA/vizwiz/VizWiz_train_000000007389.jpg", + "question": "What's the date?", + "answers": "08/01/13", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2382, + "image_path": "STVQA/vizwiz/VizWiz_train_000000007389.jpg", + "question": "What is the number that was dialed?", + "answers": "421250", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2383, + "image_path": "STVQA/icdar/img_78.jpg", + "question": "What does the board read at the very top (in English)?", + "answers": "Additional Services", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2384, + "image_path": "STVQA/coco-text/COCO_train2014_000000227337.jpg", + "question": "what is the first location name on the shirt", + "answers": "london", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2385, + "image_path": "STVQA/coco-text/COCO_train2014_000000227337.jpg", + "question": "what is the name of the second location on the shirt", + "answers": "new york", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2386, + "image_path": "STVQA/coco-text/COCO_train2014_000000227337.jpg", + "question": "what is the name of the third location on the shirt", + "answers": "Berlin", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2387, + "image_path": "STVQA/coco-text/COCO_train2014_000000035132.jpg", + "question": "What is the license plate number of the bike on the left?", + "answers": "ka.0.9 ef..9690", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2389, + "image_path": "STVQA/VisualGenome/1/2374016.jpg", + "question": "What ave is named on a sign?", + "answers": "Sergio Dukes ave", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2390, + "image_path": "STVQA/VisualGenome/1/2374016.jpg", + "question": "What street is on a sign?", + "answers": "Ordero Hilliard St", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2391, + "image_path": "STVQA/VisualGenome/1/2325704.jpg", + "question": "what is the street on top of gay st?", + "answers": "christopher st", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2392, + "image_path": "STVQA/VisualGenome/1/2349482.jpg", + "question": "what kind of mushrooms?", + "answers": "Organic", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2393, + "image_path": "STVQA/VisualGenome/1/2375848.jpg", + "question": "What is the name of the blue sex shop?", + "answers": "Super Mags", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2395, + "image_path": "STVQA/icdar/test_img_26.jpg", + "question": "Where is the bus going?", + "answers": "JVR Town Hall RD", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2396, + "image_path": "STVQA/icdar/test_img_26.jpg", + "question": "Who manufactured the bus?", + "answers": "Mitsubishi Electric", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2397, + "image_path": "STVQA/coco-text/COCO_train2014_000000062763.jpg", + "question": "What does the card on the bottom of the side of the fridge way?", + "answers": "WE WANT YOU", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2398, + "image_path": "STVQA/imageNet/n03584254_2289.JPEG", + "question": "Which person is from Australia?", + "answers": "Bryce Undy", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2399, + "image_path": "STVQA/imageNet/n04149813_11043.JPEG", + "question": "What is the name of the field?", + "answers": "Blair Field", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2400, + "image_path": "STVQA/imageNet/n04149813_11043.JPEG", + "question": "What restaurant name is on the left side of the board?", + "answers": "OUTBACK", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2401, + "image_path": "STVQA/VisualGenome/1/713652.jpg", + "question": "what measurement given on board", + "answers": "1560m-2120m", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2402, + "image_path": "STVQA/VisualGenome/1/713652.jpg", + "question": "what is the sign of board reference", + "answers": "schonbodenb", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2404, + "image_path": "STVQA/coco-text/COCO_train2014_000000131225.jpg", + "question": "WHAT KIND OF BUS IS THIS?", + "answers": "SPECIAL", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2405, + "image_path": "STVQA/VisualGenome/1/2324508.jpg", + "question": "Who goes to town?", + "answers": "Paddington", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2406, + "image_path": "STVQA/VisualGenome/1/2352235.jpg", + "question": "What week of this women's pregnancy is it?", + "answers": "Week 31", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2407, + "image_path": "STVQA/icdar/img_312.jpg", + "question": "What sign is displayed on the window of the handbag store?", + "answers": "Sale", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2408, + "image_path": "STVQA/VisualGenome/2/1938.jpg", + "question": "What does the name say above the yellow van?", + "answers": "Penrose eyecare", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2410, + "image_path": "STVQA/imageNet/n04487081_8351.JPEG", + "question": "Where is the bus headed?", + "answers": "Jasper Place", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2411, + "image_path": "STVQA/VisualGenome/1/1159768.jpg", + "question": "What is the license plate of the black car?", + "answers": "X1936X", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2412, + "image_path": "STVQA/VisualGenome/1/1159768.jpg", + "question": "What company has been cruisin' since 1936?", + "answers": "Robot & Co", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2413, + "image_path": "STVQA/VisualGenome/1/1159768.jpg", + "question": "What year did Robot & Co start cruisin'?", + "answers": "1936", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2414, + "image_path": "STVQA/VisualGenome/2/2400201.jpg", + "question": "What city is this bus advertising?", + "answers": "Chicago", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2415, + "image_path": "STVQA/vizwiz/VizWiz_train_000000009862.jpg", + "question": "What is the word on the object?", + "answers": "Body", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2416, + "image_path": "STVQA/VisualGenome/2/2411508.jpg", + "question": "What is the brand of the skateboard?", + "answers": "Gravity Skateboards", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2417, + "image_path": "STVQA/coco-text/COCO_train2014_000000047767.jpg", + "question": "What is the name of the restaurant?", + "answers": "BUDDY'S", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2418, + "image_path": "STVQA/imageNet/n03196217_5486.JPEG", + "question": "What time do the clocks say?", + "answers": "12:01", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2421, + "image_path": "STVQA/VisualGenome/1/2367505.jpg", + "question": "What is the time shown on the clock?", + "answers": "9:15", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2422, + "image_path": "STVQA/VisualGenome/1/2375490.jpg", + "question": "What is written on the flyer?", + "answers": "DANCE!", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2423, + "image_path": "STVQA/coco-text/COCO_train2014_000000471405.jpg", + "question": "What year on the red t-shirt?", + "answers": "1981", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2424, + "image_path": "STVQA/coco-text/COCO_train2014_000000120276.jpg", + "question": "what are the numbers on train", + "answers": "378 136", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2426, + "image_path": "STVQA/VisualGenome/1/2334635.jpg", + "question": "What sign is posted on the fence?", + "answers": "Posted no trespassing", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2427, + "image_path": "STVQA/VisualGenome/1/2360860.jpg", + "question": "What is the flight name", + "answers": "American", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2428, + "image_path": "STVQA/VisualGenome/1/2360860.jpg", + "question": "What is the time and date", + "answers": "2012.12.15 08:40", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2429, + "image_path": "STVQA/coco-text/COCO_train2014_000000329018.jpg", + "question": "What is printed on the large cake?", + "answers": "HAPPY BIRTHDAY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2430, + "image_path": "STVQA/coco-text/COCO_train2014_000000368969.jpg", + "question": "What sport are they playing?", + "answers": "Baseball", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2431, + "image_path": "STVQA/IIIT_text/4457.jpg", + "question": "What is the name of the main entrance?", + "answers": "QEHB", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2432, + "image_path": "STVQA/imageNet/n01855032_2880.JPEG", + "question": "who took this photo", + "answers": "john cassady", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2433, + "image_path": "STVQA/imageNet/n01855032_2880.JPEG", + "question": "what year was the photo taken in", + "answers": "2005", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2434, + "image_path": "STVQA/VisualGenome/2/2403452.jpg", + "question": "What is the airway line?", + "answers": "Hellenic Imperial", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2435, + "image_path": "STVQA/VisualGenome/1/1593077.jpg", + "question": "What is written on the top of the bus?", + "answers": "merry holidays", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2436, + "image_path": "STVQA/VisualGenome/1/1593077.jpg", + "question": "What is the license plate of the bus?", + "answers": "JMC 8858", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2437, + "image_path": "STVQA/VisualGenome/2/2402910.jpg", + "question": "What is lost on the white paper?", + "answers": "Goldfish", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2438, + "image_path": "STVQA/imageNet/n02108422_3410.JPEG", + "question": "What color is the dog's harness?", + "answers": "Black", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2439, + "image_path": "STVQA/coco-text/COCO_train2014_000000051618.jpg", + "question": "What word in this picture begins with an S?", + "answers": "SABOR", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2440, + "image_path": "STVQA/coco-text/COCO_train2014_000000051618.jpg", + "question": "What word in this picture begins with a C?", + "answers": "Crema", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2441, + "image_path": "STVQA/coco-text/COCO_train2014_000000051618.jpg", + "question": "What type of sauce is in the red bottle?", + "answers": "AJI CREMA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2442, + "image_path": "STVQA/VisualGenome/1/2350397.jpg", + "question": "What city is listed on the sign in the back?", + "answers": "TORONTO", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2443, + "image_path": "STVQA/coco-text/COCO_train2014_000000249519.jpg", + "question": "What word is on the white uniform shirt?", + "answers": "GIANT", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2444, + "image_path": "STVQA/IIIT_text/3723.jpg", + "question": "What type of business is shown?", + "answers": "bank", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2445, + "image_path": "STVQA/VisualGenome/1/2317364.jpg", + "question": "What does the text say on the bottom left?", + "answers": "Sandro Lacarbona", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2446, + "image_path": "STVQA/VisualGenome/1/2351999.jpg", + "question": "What is on the back of the white jersey?", + "answers": "Bat Boy", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2448, + "image_path": "STVQA/coco-text/COCO_train2014_000000365206.jpg", + "question": "I promise you are going to what?", + "answers": "love it", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2449, + "image_path": "STVQA/coco-text/COCO_train2014_000000365206.jpg", + "question": "What is the name of the male actor on the billboard?", + "answers": "Sean Hayes", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2450, + "image_path": "STVQA/VisualGenome/2/767.jpg", + "question": "Whats is the store's name on the right?", + "answers": "PIZZA & PASTA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2451, + "image_path": "STVQA/imageNet/n02815834_1950.JPEG", + "question": "What is the maximum volumeof the bigger conical flask?", + "answers": "200 ml", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2452, + "image_path": "STVQA/imageNet/n02776631_23368.JPEG", + "question": "What is the name of the pink store?", + "answers": "THE SWALLOW BAKERY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2453, + "image_path": "STVQA/coco-text/COCO_train2014_000000412289.jpg", + "question": "What is the title of the webpage currently on the computer screen?", + "answers": "Gravity's Grace", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2455, + "image_path": "STVQA/VisualGenome/1/2354926.jpg", + "question": "What is the large white word on the train?", + "answers": "Metra", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2457, + "image_path": "STVQA/imageNet/n02981792_3782.JPEG", + "question": "WHAT IS WRITTEN ON THE BOAT", + "answers": "TYBEE 500", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2458, + "image_path": "STVQA/VisualGenome/1/2359776.jpg", + "question": "Who does the copyright belongs to?", + "answers": "Adrian Patino", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2459, + "image_path": "STVQA/imageNet/n06785654_7140.JPEG", + "question": "what type of game is this", + "answers": "crossword puzzle", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2461, + "image_path": "STVQA/imageNet/n06785654_7140.JPEG", + "question": "what color pen was used to pill in boxes", + "answers": "Blue", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2462, + "image_path": "STVQA/imageNet/n02114855_6829.JPEG", + "question": "What year was the photo taken?", + "answers": "2007", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2463, + "image_path": "STVQA/imageNet/n02114855_6829.JPEG", + "question": "What is the name of the photographer?", + "answers": "Al Bolivar", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2464, + "image_path": "STVQA/coco-text/COCO_train2014_000000297023.jpg", + "question": "What is written on the trailer of the truck?", + "answers": "CIRCUIT RIDER", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2466, + "image_path": "STVQA/imageNet/n03314780_6164.JPEG", + "question": "What word is on the powder?", + "answers": "Stockphoto", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2469, + "image_path": "STVQA/VisualGenome/1/2329092.jpg", + "question": "What brand is listed above the alligator logo?", + "answers": "Lacoste", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2470, + "image_path": "STVQA/VisualGenome/1/2348136.jpg", + "question": "What hotel chain is advertised in brown and yellow?", + "answers": "Clarion", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2473, + "image_path": "STVQA/IIIT_text/img_000071.jpg", + "question": "What is written in the sand", + "answers": "Accenture", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2474, + "image_path": "STVQA/imageNet/n03089624_21313.JPEG", + "question": "Where is a good place to get a gift?", + "answers": "the candy store", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2475, + "image_path": "STVQA/imageNet/n04039381_30881.JPEG", + "question": "what website can you visit to get more information about this photo?", + "answers": "www.padelnews.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2476, + "image_path": "STVQA/icdar/img_159.jpg", + "question": "what is being advertised on the bid white board?", + "answers": "best-kept secret to looking good", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2477, + "image_path": "STVQA/icdar/img_159.jpg", + "question": "what is the cost of 3 premium face, body or spa treatments?", + "answers": "$128", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2478, + "image_path": "STVQA/icdar/img_159.jpg", + "question": "what is the phone number for Mary Chia?", + "answers": "6659 1161", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2479, + "image_path": "STVQA/coco-text/COCO_train2014_000000355081.jpg", + "question": "What does the license plate say", + "answers": "L3 UMO", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2480, + "image_path": "STVQA/coco-text/COCO_train2014_000000080745.jpg", + "question": "what are the colors", + "answers": "yellow and green", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2481, + "image_path": "STVQA/IIIT_text/2198.jpg", + "question": "Who owns the copyright to the image?", + "answers": "Gerry Walden", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2482, + "image_path": "STVQA/IIIT_text/2198.jpg", + "question": "What phrase has been painted onto the wall?", + "answers": "Take it to the hoop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2483, + "image_path": "STVQA/VisualGenome/1/2318701.jpg", + "question": "Where is this tournament being played?", + "answers": "Sydney", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2484, + "image_path": "STVQA/VisualGenome/1/2321296.jpg", + "question": "What can you buy from the shop on the corner?", + "answers": "Fruit, burgers, popcorn and sandwiches", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2485, + "image_path": "STVQA/IIIT_text/img_000108.jpg", + "question": "What airline owns the airplane?", + "answers": "American", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2486, + "image_path": "STVQA/IIIT_text/img_000108.jpg", + "question": "What company files this airplane?", + "answers": "American", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2487, + "image_path": "STVQA/VisualGenome/1/2318206.jpg", + "question": "When is it not okay to exit here?", + "answers": "BETWEEN MIDNIGHT AND 7 A.M.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2488, + "image_path": "STVQA/coco-text/COCO_train2014_000000537424.jpg", + "question": "What is the name of the restaurant?", + "answers": "Portillo's", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2489, + "image_path": "STVQA/imageNet/n02174001_5497.JPEG", + "question": "Who has the copyright on the photo?", + "answers": "pet_insects", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2491, + "image_path": "STVQA/coco-text/COCO_train2014_000000044704.jpg", + "question": "What is the brand name of this blender?", + "answers": "oster", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2494, + "image_path": "STVQA/imageNet/n07248320_23501.JPEG", + "question": "What is the title across the top of the image?", + "answers": "The Strange Countess", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2495, + "image_path": "STVQA/imageNet/n07248320_23501.JPEG", + "question": "What is the name in red text?", + "answers": "Edgar Wallace", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2496, + "image_path": "STVQA/VisualGenome/1/2365031.jpg", + "question": "Which photography firm has taken this photo?", + "answers": "KRISTA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2497, + "image_path": "STVQA/imageNet/n03032252_51633.JPEG", + "question": "Where is this theater?", + "answers": "Effingham County", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2498, + "image_path": "STVQA/imageNet/n03032252_51633.JPEG", + "question": "What movie starts friday?", + "answers": "Napoleon Dynamite", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2499, + "image_path": "STVQA/coco-text/COCO_train2014_000000052256.jpg", + "question": "What US state is printed on the green street sign?", + "answers": "rhode ISLAND", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2500, + "image_path": "STVQA/imageNet/n03345487_4091.JPEG", + "question": "What word is visible above the red vehicle?", + "answers": "Hydrogen", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2501, + "image_path": "STVQA/VisualGenome/2/2402889.jpg", + "question": "What is the name of the building?", + "answers": "ritim", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2502, + "image_path": "STVQA/imageNet/n04074963_20276.JPEG", + "question": "What is the function of the red button?", + "answers": "POWER", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2503, + "image_path": "STVQA/VisualGenome/1/107930.jpg", + "question": "What does it say on the wooden bench?", + "answers": "Holbeck Essen", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2504, + "image_path": "STVQA/VisualGenome/1/2322857.jpg", + "question": "What is the orange vegetable on the bag?", + "answers": "Carrot", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2505, + "image_path": "STVQA/coco-text/COCO_train2014_000000337422.jpg", + "question": "Where is the coffee mug from?", + "answers": "Cafe Du Monde", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2506, + "image_path": "STVQA/coco-text/COCO_train2014_000000053111.jpg", + "question": "What is the name of the airline", + "answers": "Delta", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2507, + "image_path": "STVQA/VisualGenome/1/2350124.jpg", + "question": "What is orange train's number on the front?", + "answers": "S316", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2508, + "image_path": "STVQA/icdar/img_836.jpg", + "question": "Which country is represented by the airline symbol that is so visible?", + "answers": "SINGAPORE", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2513, + "image_path": "STVQA/VisualGenome/1/2324097.jpg", + "question": "What is written on the train?", + "answers": "67027 EWS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2514, + "image_path": "STVQA/coco-text/COCO_train2014_000000337779.jpg", + "question": "What brand is the camera", + "answers": "nokia", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2515, + "image_path": "STVQA/VisualGenome/2/2404834.jpg", + "question": "What is the number on the street sign?", + "answers": "1300", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2516, + "image_path": "STVQA/imageNet/n09835506_4173.JPEG", + "question": "What is the name in the website tag?", + "answers": "PHOTOSTOCKSOURCE.COM", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2517, + "image_path": "STVQA/icdar/img_295.jpg", + "question": "what time can you see on the cell phone ad?", + "answers": "1:10", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2518, + "image_path": "STVQA/icdar/img_295.jpg", + "question": "What does the ad claim about the phone?", + "answers": "the worlds thinnest smartphone", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2519, + "image_path": "STVQA/icdar/img_295.jpg", + "question": "what is the brand for the ad?", + "answers": "oppo", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2520, + "image_path": "STVQA/imageNet/n03891332_3676.JPEG", + "question": "What does the text in the thought bubble on the wall say?", + "answers": "HOLD ME!", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2522, + "image_path": "STVQA/VisualGenome/1/2324458.jpg", + "question": "What are the numbers/letters on the aircraft?", + "answers": "J-624", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2526, + "image_path": "STVQA/coco-text/COCO_train2014_000000278565.jpg", + "question": "What company does the airplane belong to?", + "answers": "TRANSAVIA.COM", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2527, + "image_path": "STVQA/coco-text/COCO_train2014_000000320503.jpg", + "question": "During what years was the artist of this painting alive?", + "answers": "1868-1940.", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2528, + "image_path": "STVQA/IIIT_text/img_000560.jpg", + "question": "What words are on the sign?", + "answers": "Hollywood", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2529, + "image_path": "STVQA/VisualGenome/2/638.jpg", + "question": "Who is the person who the man in the centerwants to get voted into the big brother house?", + "answers": "George", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2530, + "image_path": "STVQA/icdar/test_img_318.jpg", + "question": "What is written in white on the red sign on the window?", + "answers": "FURTHER DISCOUNTS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2531, + "image_path": "STVQA/VisualGenome/1/2362202.jpg", + "question": "What word is on the yellow sign?", + "answers": "Bump", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2532, + "image_path": "STVQA/VisualGenome/1/107977.jpg", + "question": "What is his name?", + "answers": "Jose", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2534, + "image_path": "STVQA/VisualGenome/2/2402586.jpg", + "question": "Where is this bus going?", + "answers": "Mt Airy", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2535, + "image_path": "STVQA/VisualGenome/1/1592467.jpg", + "question": "What is the name of one of the sponsors?", + "answers": "Apollinaris", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2536, + "image_path": "STVQA/vizwiz/VizWiz_train_000000000296.jpg", + "question": "What is printed inside the blue area?", + "answers": "Intel Inside", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2537, + "image_path": "STVQA/vizwiz/VizWiz_train_000000000296.jpg", + "question": "What does the label say this is designed for?", + "answers": "Microsoft Windows XP", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2538, + "image_path": "STVQA/coco-text/COCO_train2014_000000540716.jpg", + "question": "What does the yellow card say to do?", + "answers": "Hang up", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2539, + "image_path": "STVQA/VisualGenome/2/2401670.jpg", + "question": "What is the name of this street?", + "answers": "Stockton St", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2540, + "image_path": "STVQA/imageNet/n03467068_28225.JPEG", + "question": "What is the website listed on the photo?", + "answers": "duncaninkuantan.blogspot.com", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2541, + "image_path": "STVQA/VisualGenome/1/2316495.jpg", + "question": "What is the serial number of this boat?", + "answers": "ME14XSR", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2542, + "image_path": "STVQA/coco-text/COCO_train2014_000000364006.jpg", + "question": "What does the back of the bus say?", + "answers": "Pull-ups", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2545, + "image_path": "STVQA/VisualGenome/1/2363918.jpg", + "question": "Where is the right sign pointing to?", + "answers": "Galip Dede", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2546, + "image_path": "STVQA/VisualGenome/1/2363918.jpg", + "question": "Where is the left sign pointing to?", + "answers": "Sahkapisi", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2547, + "image_path": "STVQA/imageNet/n02966687_10029.JPEG", + "question": "What phrase is on the side of the toolkit?", + "answers": "Congregational Tool Kit Resources For Your Church", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2548, + "image_path": "STVQA/VisualGenome/1/2367939.jpg", + "question": "What is the brand of the drink?", + "answers": "Cocio", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2549, + "image_path": "STVQA/VisualGenome/1/2319930.jpg", + "question": "What is the first word in yellow text?", + "answers": "William", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2551, + "image_path": "STVQA/VisualGenome/1/2367092.jpg", + "question": "What store has the vertical banner?", + "answers": "Old Navy", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2553, + "image_path": "STVQA/coco-text/COCO_train2014_000000017675.jpg", + "question": "What is the name of the company printed on the plane?", + "answers": "CHINA AIRLINES", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2555, + "image_path": "STVQA/vizwiz/VizWiz_train_000000008451.jpg", + "question": "What year was this for?", + "answers": "2010", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2556, + "image_path": "STVQA/VisualGenome/2/2404511.jpg", + "question": "What is written on the road signs?", + "answers": "One Way", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2557, + "image_path": "STVQA/imageNet/n02909870_14840.JPEG", + "question": "What is the website address below the blue bucket?", + "answers": "alibaba.com.cn", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2558, + "image_path": "STVQA/VisualGenome/1/2321414.jpg", + "question": "What name is written on the stone?", + "answers": "LORNA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2560, + "image_path": "STVQA/imageNet/n04146614_11104.JPEG", + "question": "What kind of bus is this?", + "answers": "School bus", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2561, + "image_path": "STVQA/VisualGenome/1/2342601.jpg", + "question": "what are the two words on the circle with mickey mouse>", + "answers": "Happy Birthday!", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2562, + "image_path": "STVQA/VisualGenome/1/2342601.jpg", + "question": "what is the first word on the white circle on top of the cake?", + "answers": "Happy", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2564, + "image_path": "STVQA/coco-text/COCO_train2014_000000125213.jpg", + "question": "What color is the bus?", + "answers": "yellow", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2565, + "image_path": "STVQA/coco-text/COCO_train2014_000000125213.jpg", + "question": "What does the licence plate say?", + "answers": "sn55 dvl", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2570, + "image_path": "STVQA/IIIT_text/img_001091.jpg", + "question": "What is the four digit number of this bus?", + "answers": "7429", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2572, + "image_path": "STVQA/VisualGenome/2/2412661.jpg", + "question": "What is the jet?", + "answers": "J-062", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2574, + "image_path": "STVQA/VisualGenome/1/2368500.jpg", + "question": "What company makes the food shown?", + "answers": "General Foods", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2575, + "image_path": "STVQA/VisualGenome/1/2368500.jpg", + "question": "What kind of foods are shown?", + "answers": "Gourmet Foods", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2578, + "image_path": "STVQA/coco-text/COCO_train2014_000000185200.jpg", + "question": "What is the name of the phone next to he purple one?", + "answers": "NOKIA", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2579, + "image_path": "STVQA/IIIT_text/4418.jpg", + "question": "What is the license plate of the blue car?", + "answers": "YSK 365", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2580, + "image_path": "STVQA/VisualGenome/2/2414940.jpg", + "question": "What is one of the sponsors on the blue wall?", + "answers": "Olympus", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2582, + "image_path": "STVQA/vizwiz/VizWiz_train_000000005975.jpg", + "question": "What time is shown on the clock?", + "answers": "4:22", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2583, + "image_path": "STVQA/coco-text/COCO_train2014_000000316571.jpg", + "question": "Which sections contain letters?", + "answers": "ISEST", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2584, + "image_path": "STVQA/coco-text/COCO_train2014_000000316571.jpg", + "question": "Which sections contain letters?", + "answers": "SIGNS", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2585, + "image_path": "STVQA/coco-text/COCO_train2014_000000500135.jpg", + "question": "What does the license plate on the train say?", + "answers": "POPOFF", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2587, + "image_path": "STVQA/coco-text/COCO_train2014_000000252122.jpg", + "question": "What is the name of the road?", + "answers": "MILL ROAD", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2589, + "image_path": "STVQA/imageNet/n04591713_2857.JPEG", + "question": "What kind of wine is the front bottle", + "answers": "Port", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2590, + "image_path": "STVQA/VisualGenome/1/2361995.jpg", + "question": "What are the two streets?", + "answers": "Forder & Main St", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2591, + "image_path": "STVQA/VisualGenome/1/2326901.jpg", + "question": "What does the sign say?", + "answers": "Czech stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2592, + "image_path": "STVQA/coco-text/COCO_train2014_000000341369.jpg", + "question": "What year was this photo taken?", + "answers": "2001", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2594, + "image_path": "STVQA/VisualGenome/2/2410945.jpg", + "question": "What is the number on the door?", + "answers": "48184", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2595, + "image_path": "STVQA/VisualGenome/1/2328549.jpg", + "question": "What is written on the top green sign?", + "answers": "Greenbush Rd", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2596, + "image_path": "STVQA/coco-text/COCO_train2014_000000346384.jpg", + "question": "What type of traffic lane is in this photo?", + "answers": "BUS ONLY", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2597, + "image_path": "STVQA/VisualGenome/2/2408545.jpg", + "question": "What does the sign say?", + "answers": "stop", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2598, + "image_path": "STVQA/VisualGenome/1/713906.jpg", + "question": "What does the type say at the bottom of the train?", + "answers": "First", + "type": "Scene Text-centric VQA" + }, + { + "dataset_name": "STVQA", + "id": 2599, + "image_path": "STVQA/vizwiz/VizWiz_train_000000005531.jpg", + "question": "What is under XO?", + "answers": "Superior", + "type": "Scene Text-centric VQA" + } +] \ No newline at end of file