Update preset images (#3493)

lm-sys · Aug 26, 2024 · 282534b · 282534b
1 parent 0b09cee
commit 282534b
Showing 1 changed file with 18 additions and 25 deletions.
diff --git a/fastchat/serve/vision/create_vqa_examples_dir.py b/fastchat/serve/vision/create_vqa_examples_dir.py
@@ -64,53 +64,45 @@ def download_images_and_create_json(
     args = parser.parse_args()
 
     datasets_info = {
-        "DocVQA": {
-            "path": "lmms-lab/DocVQA",
+        "realworldqa": {
+            "path": "visheratin/realworldqa",
             "image_key": "image",
             "question_key": "question",
-            "id_key": "questionId",
-            "subset": "DocVQA",
-            "split": "test",
-        },
-        "ChartQA": {
-            "path": "HuggingFaceM4/ChartQA",
-            "image_key": "image",
-            "question_key": "query",
             "id_key": "index",
             "subset": False,
             "split": "test",
         },
-        "realworldqa": {
-            "path": "visheratin/realworldqa",
+        "Memes": {
+            "path": "not-lain/meme-dataset",
             "image_key": "image",
-            "question_key": "question",
+            "question_key": "name",
             "id_key": "index",
             "subset": False,
-            "split": "test",
+            "split": "train",
         },
-        "NewYorker": {
-            "path": "jmhessel/newyorker_caption_contest",
+        "Floorplan": {
+            "path": "umesh16071973/Floorplan_Dataset_21022024",
             "image_key": "image",
-            "question_key": "questions",
+            "question_key": "caption",
             "id_key": "index",
-            "subset": "explanation",
+            "subset": False,
             "split": "train",
         },
-        "WikiArt": {
-            "path": "huggan/wikiart",
+        "Website": {
+            "path": "Zexanima/website_screenshots_image_dataset",
             "image_key": "image",
-            "question_key": "artist",
+            "question_key": "date_captured",
             "id_key": "index",
             "subset": False,
             "split": "train",
         },
-        "TextVQA": {
-            "path": "facebook/textvqa",
+        "IllusionVQA": {
+            "path": "csebuetnlp/illusionVQA-Comprehension",
             "image_key": "image",
             "question_key": "question",
-            "id_key": "question_id",
+            "id_key": "index",
             "subset": False,
-            "split": "train",
+            "split": "test",
         },
     }
 
@@ -121,6 +113,7 @@ def download_images_and_create_json(
     for dataset_name in datasets_info.keys():
         with open(f"{args.output_dir}/{dataset_name}/data.json") as f:
             data = json.load(f)
+            print(f"Dataset: {dataset_name}, Number of examples: {len(data)}")
             dataset_json.extend(np.random.choice(data, 500))
 
     with open(f"{args.output_dir}/metadata_sampled.json", "w") as f: