Skip to content

Commit

Permalink
Update preset images (#3493)
Browse files Browse the repository at this point in the history
  • Loading branch information
lisadunlap authored Aug 26, 2024
1 parent 0b09cee commit 282534b
Showing 1 changed file with 18 additions and 25 deletions.
43 changes: 18 additions & 25 deletions fastchat/serve/vision/create_vqa_examples_dir.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,53 +64,45 @@ def download_images_and_create_json(
args = parser.parse_args()

datasets_info = {
"DocVQA": {
"path": "lmms-lab/DocVQA",
"realworldqa": {
"path": "visheratin/realworldqa",
"image_key": "image",
"question_key": "question",
"id_key": "questionId",
"subset": "DocVQA",
"split": "test",
},
"ChartQA": {
"path": "HuggingFaceM4/ChartQA",
"image_key": "image",
"question_key": "query",
"id_key": "index",
"subset": False,
"split": "test",
},
"realworldqa": {
"path": "visheratin/realworldqa",
"Memes": {
"path": "not-lain/meme-dataset",
"image_key": "image",
"question_key": "question",
"question_key": "name",
"id_key": "index",
"subset": False,
"split": "test",
"split": "train",
},
"NewYorker": {
"path": "jmhessel/newyorker_caption_contest",
"Floorplan": {
"path": "umesh16071973/Floorplan_Dataset_21022024",
"image_key": "image",
"question_key": "questions",
"question_key": "caption",
"id_key": "index",
"subset": "explanation",
"subset": False,
"split": "train",
},
"WikiArt": {
"path": "huggan/wikiart",
"Website": {
"path": "Zexanima/website_screenshots_image_dataset",
"image_key": "image",
"question_key": "artist",
"question_key": "date_captured",
"id_key": "index",
"subset": False,
"split": "train",
},
"TextVQA": {
"path": "facebook/textvqa",
"IllusionVQA": {
"path": "csebuetnlp/illusionVQA-Comprehension",
"image_key": "image",
"question_key": "question",
"id_key": "question_id",
"id_key": "index",
"subset": False,
"split": "train",
"split": "test",
},
}

Expand All @@ -121,6 +113,7 @@ def download_images_and_create_json(
for dataset_name in datasets_info.keys():
with open(f"{args.output_dir}/{dataset_name}/data.json") as f:
data = json.load(f)
print(f"Dataset: {dataset_name}, Number of examples: {len(data)}")
dataset_json.extend(np.random.choice(data, 500))

with open(f"{args.output_dir}/metadata_sampled.json", "w") as f:
Expand Down

0 comments on commit 282534b

Please sign in to comment.