[Fix] import issues of multilingual llava and olympiadbench · EvolvingLMMs-Lab/lmms-eval@616edf4 (original) (raw)
File tree
15 files changed
lines changed
- multilingual-llava-bench-in-the-wild
15 files changed
lines changed
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
1 | +test_split: train | |
2 | +output_type: generate_until | |
3 | +doc_to_visual: !function utils.llava_doc_to_visual | |
4 | +doc_to_text: !function utils.llava_doc_to_text | |
5 | +doc_to_target: "gpt_answer" | |
6 | +generation_kwargs: | |
7 | +until: | |
8 | + - "ASSISTANT:" | |
9 | +image_aspect_ratio: original | |
10 | +max_new_tokens: 1024 | |
11 | +temperature: 0 | |
12 | +top_p: 0 | |
13 | +num_beams: 1 | |
14 | +do_sample: false | |
15 | +process_results: !function utils.llava_process_results | |
16 | +metric_list: | |
17 | + - metric: gpt_eval_llava_all | |
18 | +aggregation: !function utils.llava_all_aggregation | |
19 | +higher_is_better: true | |
20 | + - metric: gpt_eval_llava_conv | |
21 | +aggregation: !function utils.llava_conv_aggregation | |
22 | +higher_is_better: true | |
23 | + - metric: gpt_eval_llava_detail | |
24 | +aggregation: !function utils.llava_detail_aggregation | |
25 | +higher_is_better: true | |
26 | + - metric: gpt_eval_llava_complex | |
27 | +aggregation: !function utils.llava_complex_aggregation | |
28 | +higher_is_better: true | |
29 | +metadata: | |
30 | +version: 0.0 | |
31 | +gpt_eval_model_name: "gpt-4-0613" | |
32 | +model_specific_prompt_kwargs: | |
33 | +default: | |
34 | +pre_prompt: "" | |
35 | +post_prompt: "" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,42 +1,6 @@ | ||
1 | - | |
2 | -dataset_path: "gagan3012/multilingual-llava-bench" | |
3 | -dataset_kwargs: | |
4 | -config: arabic | |
5 | -token: True | |
6 | -task: "llava_in_the_wild_arabic" | |
7 | -test_split: train | |
8 | -output_type: generate_until | |
9 | -doc_to_visual: !function utils.llava_doc_to_visual | |
10 | -doc_to_text: !function utils.llava_doc_to_text | |
11 | -doc_to_target: "gpt_answer" | |
12 | -generation_kwargs: | |
13 | -until: | |
14 | - - "ASSISTANT:" | |
15 | -image_aspect_ratio: original | |
16 | -max_new_tokens: 1024 | |
17 | -temperature: 0 | |
18 | -top_p: 0 | |
19 | -num_beams: 1 | |
20 | -do_sample: false | |
21 | -process_results: !function utils.llava_process_results | |
22 | -metric_list: | |
23 | - - metric: gpt_eval_llava_all | |
24 | -aggregation: !function utils.llava_all_aggregation | |
25 | -higher_is_better: true | |
26 | - - metric: gpt_eval_llava_conv | |
27 | -aggregation: !function utils.llava_conv_aggregation | |
28 | -higher_is_better: true | |
29 | - - metric: gpt_eval_llava_detail | |
30 | -aggregation: !function utils.llava_detail_aggregation | |
31 | -higher_is_better: true | |
32 | - - metric: gpt_eval_llava_complex | |
33 | -aggregation: !function utils.llava_complex_aggregation | |
34 | -higher_is_better: true | |
35 | -metadata: | |
36 | -version: 0.0 | |
37 | -gpt_eval_model_name: "gpt-4-0613" | |
38 | -model_specific_prompt_kwargs: | |
39 | -default: | |
40 | -pre_prompt: "" | |
41 | -post_prompt: "" | |
42 | - | |
1 | +dataset_path: "gagan3012/multilingual-llava-bench" | |
2 | +dataset_kwargs: | |
3 | +config: arabic | |
4 | +token: True | |
5 | +task: "llava_in_the_wild_arabic" | |
6 | +include: _default_template.yaml |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,42 +1,6 @@ | ||
1 | - | |
2 | -dataset_path: "gagan3012/multilingual-llava-bench" | |
3 | -dataset_kwargs: | |
4 | -config: bengali | |
5 | -token: True | |
6 | -task: "llava_in_the_wild_bengali" | |
7 | -test_split: train | |
8 | -output_type: generate_until | |
9 | -doc_to_visual: !function utils.llava_doc_to_visual | |
10 | -doc_to_text: !function utils.llava_doc_to_text | |
11 | -doc_to_target: "gpt_answer" | |
12 | -generation_kwargs: | |
13 | -until: | |
14 | - - "ASSISTANT:" | |
15 | -image_aspect_ratio: original | |
16 | -max_new_tokens: 1024 | |
17 | -temperature: 0 | |
18 | -top_p: 0 | |
19 | -num_beams: 1 | |
20 | -do_sample: false | |
21 | -process_results: !function utils.llava_process_results | |
22 | -metric_list: | |
23 | - - metric: gpt_eval_llava_all | |
24 | -aggregation: !function utils.llava_all_aggregation | |
25 | -higher_is_better: true | |
26 | - - metric: gpt_eval_llava_conv | |
27 | -aggregation: !function utils.llava_conv_aggregation | |
28 | -higher_is_better: true | |
29 | - - metric: gpt_eval_llava_detail | |
30 | -aggregation: !function utils.llava_detail_aggregation | |
31 | -higher_is_better: true | |
32 | - - metric: gpt_eval_llava_complex | |
33 | -aggregation: !function utils.llava_complex_aggregation | |
34 | -higher_is_better: true | |
35 | -metadata: | |
36 | -version: 0.0 | |
37 | -gpt_eval_model_name: "gpt-4-0613" | |
38 | -model_specific_prompt_kwargs: | |
39 | -default: | |
40 | -pre_prompt: "" | |
41 | -post_prompt: "" | |
42 | - | |
1 | +dataset_path: "gagan3012/multilingual-llava-bench" | |
2 | +dataset_kwargs: | |
3 | +config: bengali | |
4 | +token: True | |
5 | +task: "llava_in_the_wild_bengali" | |
6 | +include: _default_template.yaml |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,42 +1,6 @@ | ||
1 | - | |
2 | -dataset_path: "gagan3012/multilingual-llava-bench" | |
3 | -dataset_kwargs: | |
4 | -config: chinese | |
5 | -token: True | |
6 | -task: "llava_in_the_wild_chinese" | |
7 | -test_split: train | |
8 | -output_type: generate_until | |
9 | -doc_to_visual: !function utils.llava_doc_to_visual | |
10 | -doc_to_text: !function utils.llava_doc_to_text | |
11 | -doc_to_target: "gpt_answer" | |
12 | -generation_kwargs: | |
13 | -until: | |
14 | - - "ASSISTANT:" | |
15 | -image_aspect_ratio: original | |
16 | -max_new_tokens: 1024 | |
17 | -temperature: 0 | |
18 | -top_p: 0 | |
19 | -num_beams: 1 | |
20 | -do_sample: false | |
21 | -process_results: !function utils.llava_process_results | |
22 | -metric_list: | |
23 | - - metric: gpt_eval_llava_all | |
24 | -aggregation: !function utils.llava_all_aggregation | |
25 | -higher_is_better: true | |
26 | - - metric: gpt_eval_llava_conv | |
27 | -aggregation: !function utils.llava_conv_aggregation | |
28 | -higher_is_better: true | |
29 | - - metric: gpt_eval_llava_detail | |
30 | -aggregation: !function utils.llava_detail_aggregation | |
31 | -higher_is_better: true | |
32 | - - metric: gpt_eval_llava_complex | |
33 | -aggregation: !function utils.llava_complex_aggregation | |
34 | -higher_is_better: true | |
35 | -metadata: | |
36 | -version: 0.0 | |
37 | -gpt_eval_model_name: "gpt-4-0613" | |
38 | -model_specific_prompt_kwargs: | |
39 | -default: | |
40 | -pre_prompt: "" | |
41 | -post_prompt: "" | |
42 | - | |
1 | +dataset_path: "gagan3012/multilingual-llava-bench" | |
2 | +dataset_kwargs: | |
3 | +config: chinese | |
4 | +token: True | |
5 | +task: "llava_in_the_wild_chinese" | |
6 | +include: _default_template.yaml |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,42 +1,6 @@ | ||
1 | - | |
2 | -dataset_path: "gagan3012/multilingual-llava-bench" | |
3 | -dataset_kwargs: | |
4 | -config: french | |
5 | -token: True | |
6 | -task: "llava_in_the_wild_french" | |
7 | -test_split: train | |
8 | -output_type: generate_until | |
9 | -doc_to_visual: !function utils.llava_doc_to_visual | |
10 | -doc_to_text: !function utils.llava_doc_to_text | |
11 | -doc_to_target: "gpt_answer" | |
12 | -generation_kwargs: | |
13 | -until: | |
14 | - - "ASSISTANT:" | |
15 | -image_aspect_ratio: original | |
16 | -max_new_tokens: 1024 | |
17 | -temperature: 0 | |
18 | -top_p: 0 | |
19 | -num_beams: 1 | |
20 | -do_sample: false | |
21 | -process_results: !function utils.llava_process_results | |
22 | -metric_list: | |
23 | - - metric: gpt_eval_llava_all | |
24 | -aggregation: !function utils.llava_all_aggregation | |
25 | -higher_is_better: true | |
26 | - - metric: gpt_eval_llava_conv | |
27 | -aggregation: !function utils.llava_conv_aggregation | |
28 | -higher_is_better: true | |
29 | - - metric: gpt_eval_llava_detail | |
30 | -aggregation: !function utils.llava_detail_aggregation | |
31 | -higher_is_better: true | |
32 | - - metric: gpt_eval_llava_complex | |
33 | -aggregation: !function utils.llava_complex_aggregation | |
34 | -higher_is_better: true | |
35 | -metadata: | |
36 | -version: 0.0 | |
37 | -gpt_eval_model_name: "gpt-4-0613" | |
38 | -model_specific_prompt_kwargs: | |
39 | -default: | |
40 | -pre_prompt: "" | |
41 | -post_prompt: "" | |
42 | - | |
1 | +dataset_path: "gagan3012/multilingual-llava-bench" | |
2 | +dataset_kwargs: | |
3 | +config: french | |
4 | +token: True | |
5 | +task: "llava_in_the_wild_french" | |
6 | +include: _default_template.yaml |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,42 +1,6 @@ | ||
1 | - | |
2 | -dataset_path: "gagan3012/multilingual-llava-bench" | |
3 | -dataset_kwargs: | |
4 | -config: hindi | |
5 | -token: True | |
6 | -task: "llava_in_the_wild_hindi" | |
7 | -test_split: train | |
8 | -output_type: generate_until | |
9 | -doc_to_visual: !function utils.llava_doc_to_visual | |
10 | -doc_to_text: !function utils.llava_doc_to_text | |
11 | -doc_to_target: "gpt_answer" | |
12 | -generation_kwargs: | |
13 | -until: | |
14 | - - "ASSISTANT:" | |
15 | -image_aspect_ratio: original | |
16 | -max_new_tokens: 1024 | |
17 | -temperature: 0 | |
18 | -top_p: 0 | |
19 | -num_beams: 1 | |
20 | -do_sample: false | |
21 | -process_results: !function utils.llava_process_results | |
22 | -metric_list: | |
23 | - - metric: gpt_eval_llava_all | |
24 | -aggregation: !function utils.llava_all_aggregation | |
25 | -higher_is_better: true | |
26 | - - metric: gpt_eval_llava_conv | |
27 | -aggregation: !function utils.llava_conv_aggregation | |
28 | -higher_is_better: true | |
29 | - - metric: gpt_eval_llava_detail | |
30 | -aggregation: !function utils.llava_detail_aggregation | |
31 | -higher_is_better: true | |
32 | - - metric: gpt_eval_llava_complex | |
33 | -aggregation: !function utils.llava_complex_aggregation | |
34 | -higher_is_better: true | |
35 | -metadata: | |
36 | -version: 0.0 | |
37 | -gpt_eval_model_name: "gpt-4-0613" | |
38 | -model_specific_prompt_kwargs: | |
39 | -default: | |
40 | -pre_prompt: "" | |
41 | -post_prompt: "" | |
42 | - | |
1 | +dataset_path: "gagan3012/multilingual-llava-bench" | |
2 | +dataset_kwargs: | |
3 | +config: hindi | |
4 | +token: True | |
5 | +task: "llava_in_the_wild_hindi" | |
6 | +include: _default_template.yaml |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,42 +1,6 @@ | ||
1 | - | |
2 | -dataset_path: "gagan3012/multilingual-llava-bench" | |
3 | -dataset_kwargs: | |
4 | -config: japanese | |
5 | -token: True | |
6 | -task: "llava_in_the_wild_japanese" | |
7 | -test_split: train | |
8 | -output_type: generate_until | |
9 | -doc_to_visual: !function utils.llava_doc_to_visual | |
10 | -doc_to_text: !function utils.llava_doc_to_text | |
11 | -doc_to_target: "gpt_answer" | |
12 | -generation_kwargs: | |
13 | -until: | |
14 | - - "ASSISTANT:" | |
15 | -image_aspect_ratio: original | |
16 | -max_new_tokens: 1024 | |
17 | -temperature: 0 | |
18 | -top_p: 0 | |
19 | -num_beams: 1 | |
20 | -do_sample: false | |
21 | -process_results: !function utils.llava_process_results | |
22 | -metric_list: | |
23 | - - metric: gpt_eval_llava_all | |
24 | -aggregation: !function utils.llava_all_aggregation | |
25 | -higher_is_better: true | |
26 | - - metric: gpt_eval_llava_conv | |
27 | -aggregation: !function utils.llava_conv_aggregation | |
28 | -higher_is_better: true | |
29 | - - metric: gpt_eval_llava_detail | |
30 | -aggregation: !function utils.llava_detail_aggregation | |
31 | -higher_is_better: true | |
32 | - - metric: gpt_eval_llava_complex | |
33 | -aggregation: !function utils.llava_complex_aggregation | |
34 | -higher_is_better: true | |
35 | -metadata: | |
36 | -version: 0.0 | |
37 | -gpt_eval_model_name: "gpt-4-0613" | |
38 | -model_specific_prompt_kwargs: | |
39 | -default: | |
40 | -pre_prompt: "" | |
41 | -post_prompt: "" | |
42 | - | |
1 | +dataset_path: "gagan3012/multilingual-llava-bench" | |
2 | +dataset_kwargs: | |
3 | +config: japanese | |
4 | +token: True | |
5 | +task: "llava_in_the_wild_japanese" | |
6 | +include: _default_template.yaml |