Add separated pope tasks by category · EvolvingLMMs-Lab/lmms-eval@22520a9 (original) (raw)

Original file line number	Diff line number	Diff line change
@@ -0,0 +1,35 @@
	1	+dataset_path: lmms-lab/POPE
	2	+dataset_name: Full
	3	+dataset_kwargs:
	4	+token: True
	5	+task: "pope_adv"
	6	+test_split: adversarial
	7	+output_type: generate_until
	8	+doc_to_visual: !function utils.pope_doc_to_visual
	9	+doc_to_text: !function utils.pope_doc_to_text
	10	+doc_to_target: "answer"
	11	+generation_kwargs:
	12	+max_new_tokens: 128
	13	+temperature: 0
	14	+top_p: 0
	15	+num_beams: 1
	16	+do_sample: false
	17	+process_results: !function utils.pope_process_results
	18	+metric_list:
	19	+ - metric: pope_accuracy
	20	+aggregation: !function utils.pope_aggregate_accuracy
	21	+higher_is_better: true
	22	+ - metric: pope_precision
	23	+aggregation: !function utils.pope_aggregate_precision
	24	+higher_is_better: true
	25	+ - metric: pope_recall
	26	+aggregation: !function utils.pope_aggregate_recall
	27	+higher_is_better: true
	28	+ - metric: pope_f1_score
	29	+aggregation: !function utils.pope_aggregate_f1_score
	30	+higher_is_better: true
	31	+ - metric: pope_yes_ratio
	32	+aggregation: !function utils.pope_aggregate_yes_ratio
	33	+higher_is_better: true
	34	+metadata:
	35	+ - version: 0.0