@@ -0,0 +1,35 @@ |
|
|
|
1 |
+dataset_path: lmms-lab/POPE |
|
2 |
+dataset_name: Full |
|
3 |
+dataset_kwargs: |
|
4 |
+token: True |
|
5 |
+task: "pope_adv" |
|
6 |
+test_split: adversarial |
|
7 |
+output_type: generate_until |
|
8 |
+doc_to_visual: !function utils.pope_doc_to_visual |
|
9 |
+doc_to_text: !function utils.pope_doc_to_text |
|
10 |
+doc_to_target: "answer" |
|
11 |
+generation_kwargs: |
|
12 |
+max_new_tokens: 128 |
|
13 |
+temperature: 0 |
|
14 |
+top_p: 0 |
|
15 |
+num_beams: 1 |
|
16 |
+do_sample: false |
|
17 |
+process_results: !function utils.pope_process_results |
|
18 |
+metric_list: |
|
19 |
+ - metric: pope_accuracy |
|
20 |
+aggregation: !function utils.pope_aggregate_accuracy |
|
21 |
+higher_is_better: true |
|
22 |
+ - metric: pope_precision |
|
23 |
+aggregation: !function utils.pope_aggregate_precision |
|
24 |
+higher_is_better: true |
|
25 |
+ - metric: pope_recall |
|
26 |
+aggregation: !function utils.pope_aggregate_recall |
|
27 |
+higher_is_better: true |
|
28 |
+ - metric: pope_f1_score |
|
29 |
+aggregation: !function utils.pope_aggregate_f1_score |
|
30 |
+higher_is_better: true |
|
31 |
+ - metric: pope_yes_ratio |
|
32 |
+aggregation: !function utils.pope_aggregate_yes_ratio |
|
33 |
+higher_is_better: true |
|
34 |
+metadata: |
|
35 |
+ - version: 0.0 |