new task gqa-ru · EvolvingLMMs-Lab/lmms-eval@a0de897 (original) (raw)

2 files changed

lines changed

Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
1 +dataset_path: deepvk/GQA-ru
2 +dataset_name: testdev_balanced_instructions
3 +dataset_kwargs:
4 +token: True
5 +task: "gqa-ru"
6 +test_split: testdev
7 +output_type: generate_until
8 +doc_to_visual: !function utils.gqa_doc_to_visual
9 +doc_to_text: !function utils.gqa_doc_to_text
10 +doc_to_target: "answer"
11 +generation_kwargs:
12 +max_new_tokens: 16
13 +temperature: 0
14 +top_p: 1.0
15 +num_beams: 1
16 +do_sample: false
17 +metric_list:
18 + - metric: exact_match
19 +aggregation: mean
20 +higher_is_better: true
21 +ignore_case: true
22 +ignore_punctuation: true
23 +metadata:
24 + - version: 0.0
25 +
26 +model_specific_prompt_kwargs:
27 +default:
28 +pre_prompt: ""
29 +post_prompt: "\nОтветь одним словом."
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
1 +from datasets import load_dataset
2 +
3 +GQA_RAW_IMAGE_DATASET = None
4 +GQA_ID2IMAGE = None
5 +
6 +
7 +def gqa_doc_to_visual(doc):
8 +global GQA_RAW_IMAGE_DATASET
9 +global GQA_ID2IMAGE
10 +if GQA_RAW_IMAGE_DATASET is None:
11 +GQA_RAW_IMAGE_DATASET = load_dataset("deepvk/GQA-ru", "testdev_balanced_images", split="testdev", token=True)
12 +GQA_ID2IMAGE = {}
13 +for row in GQA_RAW_IMAGE_DATASET:
14 +GQA_ID2IMAGE[row["id"]] = row["image"].convert("RGB")
15 +image = GQA_ID2IMAGE[doc["imageId"]]
16 +return [image]
17 +
18 +
19 +def gqa_doc_to_text(doc, model_specific_prompt_kwargs):
20 +question = doc["question"]
21 +pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
22 +post_prompt = model_specific_prompt_kwargs["post_prompt"]
23 +return f"{pre_prompt}{question}{post_prompt}"