lmms-eval@8ee7848 (original) (raw)

17 files changed

lines changed

Original file line number	Diff line number	Diff line change
@@ -197,6 +197,19 @@ We also provide the raw data exported from Weights & Biases for the detailed res
197	197	- MMMU (mmmu)
198	198	- MMMU Validation (mmmu_val)
199	199	- MMMU Test (mmmu_test)
	200	+- MMUPD (mmupd)
	201	+- MMUPD Base (mmupd_base)
	202	+- MMAAD Base (mmaad_base)
	203	+- MMIASD Base (mmiasd_base)
	204	+- MMIVQD Base (mmivqd_base)
	205	+- MMUPD Option (mmupd_option)
	206	+- MMAAD Option (mmaad_option)
	207	+- MMIASD Option (mmiasd_option)
	208	+- MMIVQD Option (mmivqd_option)
	209	+- MMUPD Instruction (mmupd_instruction)
	210	+- MMAAD Instruction (mmaad_instruction)
	211	+- MMIASD Instruction (mmiasd_instruction)
	212	+- MMIVQD Instruction (mmivqd_instruction)
200	213	- MMVet (mmvet)
201	214	- Multi-DocVQA (multidocvqa)
202	215	- Multi-DocVQA Validation (multidocvqa_val)

Original file line number	Diff line number	Diff line change
@@ -0,0 +1,18 @@
	1	+dataset_path: MM-UPD/MM-UPD
	2	+doc_to_target: "answer"
	3	+doc_to_visual: !function utils.mmupd_doc_to_visual
	4	+doc_to_text: !function utils.mmupd_doc_to_text
	5	+doc_to_target: "answer"
	6	+process_results: !function utils.mmupd_process_results
	7	+model_specific_generation_kwargs:
	8	+ llava:
	9	+ image_aspect_ratio: original
	10	+output_type: generate_until
	11	+generation_kwargs:
	12	+ until:
	13	+ - "ASSISTANT:"
	14	+ max_new_tokens: 1024
	15	+ temperature: 0
	16	+ top_p: 0
	17	+ num_beams: 1
	18	+ do_sample: false

Original file line number	Diff line number	Diff line change
@@ -0,0 +1,12 @@
	1	+task: "mmaad_base"
	2	+test_split: test
	3	+dataset_name: mmaad_base
	4	+model_specific_prompt_kwargs:
	5	+default:
	6	+pre_prompt: ""
	7	+post_prompt: "\n"
	8	+include: _default_template_mmupd_yaml
	9	+metric_list:
	10	+ - metric: gpt_eval_score
	11	+aggregation: !function utils.mmaad_base
	12	+higher_is_better: true

Original file line number	Diff line number	Diff line change
@@ -0,0 +1,12 @@
	1	+task: "mmaad_instruction"
	2	+test_split: test
	3	+dataset_name: mmaad_base
	4	+model_specific_prompt_kwargs:
	5	+default:
	6	+pre_prompt: ""
	7	+post_prompt: "\nIf all the options are incorrect, answer \"F. None of the above\"."
	8	+include: _default_template_mmupd_yaml
	9	+metric_list:
	10	+ - metric: gpt_eval_score
	11	+aggregation: !function utils.mmaad_instruction
	12	+higher_is_better: true

Original file line number	Diff line number	Diff line change
@@ -0,0 +1,12 @@
	1	+task: "mmaad_option"
	2	+test_split: test
	3	+dataset_name: mmaad_option
	4	+model_specific_prompt_kwargs:
	5	+default:
	6	+pre_prompt: ""
	7	+post_prompt: "\nAnswer with the option's letter from the given choices directly."
	8	+include: _default_template_mmupd_yaml
	9	+metric_list:
	10	+ - metric: gpt_eval_score
	11	+aggregation: !function utils.mmaad_option
	12	+higher_is_better: true

Original file line number	Diff line number	Diff line change
@@ -0,0 +1,12 @@
	1	+task: "mmiasd_base"
	2	+test_split: test
	3	+dataset_name: mmiasd_base
	4	+model_specific_prompt_kwargs:
	5	+default:
	6	+pre_prompt: ""
	7	+post_prompt: "\n"
	8	+include: _default_template_mmupd_yaml
	9	+metric_list:
	10	+ - metric: gpt_eval_score
	11	+aggregation: !function utils.mmiasd_base
	12	+higher_is_better: true

Original file line number	Diff line number	Diff line change
@@ -0,0 +1,12 @@
	1	+task: "mmiasd_instruction"
	2	+test_split: test
	3	+dataset_name: mmiasd_base
	4	+model_specific_prompt_kwargs:
	5	+default:
	6	+pre_prompt: ""
	7	+post_prompt: "\nIf all the options are incorrect, answer \"F. None of the above\"."
	8	+include: _default_template_mmupd_yaml
	9	+metric_list:
	10	+ - metric: gpt_eval_score
	11	+aggregation: !function utils.mmiasd_instruction
	12	+higher_is_better: true

Original file line number	Diff line number	Diff line change
@@ -0,0 +1,12 @@
	1	+task: "mmiasd_option"
	2	+test_split: test
	3	+dataset_name: mmiasd_option
	4	+model_specific_prompt_kwargs:
	5	+default:
	6	+pre_prompt: ""
	7	+post_prompt: "\nAnswer with the option's letter from the given choices directly."
	8	+include: _default_template_mmupd_yaml
	9	+metric_list:
	10	+ - metric: gpt_eval_score
	11	+aggregation: !function utils.mmiasd_option
	12	+higher_is_better: true

Original file line number	Diff line number	Diff line change
@@ -0,0 +1,12 @@
	1	+task: "mmivqd_base"
	2	+test_split: test
	3	+dataset_name: mmivqd_base
	4	+model_specific_prompt_kwargs:
	5	+default:
	6	+pre_prompt: ""
	7	+post_prompt: "\n"
	8	+include: _default_template_mmupd_yaml
	9	+metric_list:
	10	+ - metric: gpt_eval_score
	11	+aggregation: !function utils.mmivqd_base
	12	+higher_is_better: true

Original file line number	Diff line number	Diff line change
@@ -0,0 +1,12 @@
	1	+task: "mmivqd_instruction"
	2	+test_split: test
	3	+dataset_name: mmivqd_base
	4	+model_specific_prompt_kwargs:
	5	+default:
	6	+pre_prompt: ""
	7	+post_prompt: "\nIf the given image is irrelevant to the question, answer \"F. The image and question are irrelevant.\"."
	8	+include: _default_template_mmupd_yaml
	9	+metric_list:
	10	+ - metric: gpt_eval_score
	11	+aggregation: !function utils.mmivqd_instruction
	12	+higher_is_better: true

Original file line number	Diff line number	Diff line change
@@ -0,0 +1,12 @@
	1	+task: "mmivqd_option"
	2	+test_split: test
	3	+dataset_name: mmivqd_option
	4	+model_specific_prompt_kwargs:
	5	+default:
	6	+pre_prompt: ""
	7	+post_prompt: "\nAnswer with the option's letter from the given choices directly."
	8	+include: _default_template_mmupd_yaml
	9	+metric_list:
	10	+ - metric: gpt_eval_score
	11	+aggregation: !function utils.mmivqd_option
	12	+higher_is_better: true