[Fix] import issues of multilingual llava and olympiadbench · EvolvingLMMs-Lab/lmms-eval@616edf4 (original) (raw)

File tree

15 files changed

lines changed

15 files changed

lines changed

Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
1 +test_split: train
2 +output_type: generate_until
3 +doc_to_visual: !function utils.llava_doc_to_visual
4 +doc_to_text: !function utils.llava_doc_to_text
5 +doc_to_target: "gpt_answer"
6 +generation_kwargs:
7 +until:
8 + - "ASSISTANT:"
9 +image_aspect_ratio: original
10 +max_new_tokens: 1024
11 +temperature: 0
12 +top_p: 0
13 +num_beams: 1
14 +do_sample: false
15 +process_results: !function utils.llava_process_results
16 +metric_list:
17 + - metric: gpt_eval_llava_all
18 +aggregation: !function utils.llava_all_aggregation
19 +higher_is_better: true
20 + - metric: gpt_eval_llava_conv
21 +aggregation: !function utils.llava_conv_aggregation
22 +higher_is_better: true
23 + - metric: gpt_eval_llava_detail
24 +aggregation: !function utils.llava_detail_aggregation
25 +higher_is_better: true
26 + - metric: gpt_eval_llava_complex
27 +aggregation: !function utils.llava_complex_aggregation
28 +higher_is_better: true
29 +metadata:
30 +version: 0.0
31 +gpt_eval_model_name: "gpt-4-0613"
32 +model_specific_prompt_kwargs:
33 +default:
34 +pre_prompt: ""
35 +post_prompt: ""
Original file line number Diff line number Diff line change
@@ -1,42 +1,6 @@
1 -
2 -dataset_path: "gagan3012/multilingual-llava-bench"
3 -dataset_kwargs:
4 -config: arabic
5 -token: True
6 -task: "llava_in_the_wild_arabic"
7 -test_split: train
8 -output_type: generate_until
9 -doc_to_visual: !function utils.llava_doc_to_visual
10 -doc_to_text: !function utils.llava_doc_to_text
11 -doc_to_target: "gpt_answer"
12 -generation_kwargs:
13 -until:
14 - - "ASSISTANT:"
15 -image_aspect_ratio: original
16 -max_new_tokens: 1024
17 -temperature: 0
18 -top_p: 0
19 -num_beams: 1
20 -do_sample: false
21 -process_results: !function utils.llava_process_results
22 -metric_list:
23 - - metric: gpt_eval_llava_all
24 -aggregation: !function utils.llava_all_aggregation
25 -higher_is_better: true
26 - - metric: gpt_eval_llava_conv
27 -aggregation: !function utils.llava_conv_aggregation
28 -higher_is_better: true
29 - - metric: gpt_eval_llava_detail
30 -aggregation: !function utils.llava_detail_aggregation
31 -higher_is_better: true
32 - - metric: gpt_eval_llava_complex
33 -aggregation: !function utils.llava_complex_aggregation
34 -higher_is_better: true
35 -metadata:
36 -version: 0.0
37 -gpt_eval_model_name: "gpt-4-0613"
38 -model_specific_prompt_kwargs:
39 -default:
40 -pre_prompt: ""
41 -post_prompt: ""
42 -
1 +dataset_path: "gagan3012/multilingual-llava-bench"
2 +dataset_kwargs:
3 +config: arabic
4 +token: True
5 +task: "llava_in_the_wild_arabic"
6 +include: _default_template.yaml
Original file line number Diff line number Diff line change
@@ -1,42 +1,6 @@
1 -
2 -dataset_path: "gagan3012/multilingual-llava-bench"
3 -dataset_kwargs:
4 -config: bengali
5 -token: True
6 -task: "llava_in_the_wild_bengali"
7 -test_split: train
8 -output_type: generate_until
9 -doc_to_visual: !function utils.llava_doc_to_visual
10 -doc_to_text: !function utils.llava_doc_to_text
11 -doc_to_target: "gpt_answer"
12 -generation_kwargs:
13 -until:
14 - - "ASSISTANT:"
15 -image_aspect_ratio: original
16 -max_new_tokens: 1024
17 -temperature: 0
18 -top_p: 0
19 -num_beams: 1
20 -do_sample: false
21 -process_results: !function utils.llava_process_results
22 -metric_list:
23 - - metric: gpt_eval_llava_all
24 -aggregation: !function utils.llava_all_aggregation
25 -higher_is_better: true
26 - - metric: gpt_eval_llava_conv
27 -aggregation: !function utils.llava_conv_aggregation
28 -higher_is_better: true
29 - - metric: gpt_eval_llava_detail
30 -aggregation: !function utils.llava_detail_aggregation
31 -higher_is_better: true
32 - - metric: gpt_eval_llava_complex
33 -aggregation: !function utils.llava_complex_aggregation
34 -higher_is_better: true
35 -metadata:
36 -version: 0.0
37 -gpt_eval_model_name: "gpt-4-0613"
38 -model_specific_prompt_kwargs:
39 -default:
40 -pre_prompt: ""
41 -post_prompt: ""
42 -
1 +dataset_path: "gagan3012/multilingual-llava-bench"
2 +dataset_kwargs:
3 +config: bengali
4 +token: True
5 +task: "llava_in_the_wild_bengali"
6 +include: _default_template.yaml
Original file line number Diff line number Diff line change
@@ -1,42 +1,6 @@
1 -
2 -dataset_path: "gagan3012/multilingual-llava-bench"
3 -dataset_kwargs:
4 -config: chinese
5 -token: True
6 -task: "llava_in_the_wild_chinese"
7 -test_split: train
8 -output_type: generate_until
9 -doc_to_visual: !function utils.llava_doc_to_visual
10 -doc_to_text: !function utils.llava_doc_to_text
11 -doc_to_target: "gpt_answer"
12 -generation_kwargs:
13 -until:
14 - - "ASSISTANT:"
15 -image_aspect_ratio: original
16 -max_new_tokens: 1024
17 -temperature: 0
18 -top_p: 0
19 -num_beams: 1
20 -do_sample: false
21 -process_results: !function utils.llava_process_results
22 -metric_list:
23 - - metric: gpt_eval_llava_all
24 -aggregation: !function utils.llava_all_aggregation
25 -higher_is_better: true
26 - - metric: gpt_eval_llava_conv
27 -aggregation: !function utils.llava_conv_aggregation
28 -higher_is_better: true
29 - - metric: gpt_eval_llava_detail
30 -aggregation: !function utils.llava_detail_aggregation
31 -higher_is_better: true
32 - - metric: gpt_eval_llava_complex
33 -aggregation: !function utils.llava_complex_aggregation
34 -higher_is_better: true
35 -metadata:
36 -version: 0.0
37 -gpt_eval_model_name: "gpt-4-0613"
38 -model_specific_prompt_kwargs:
39 -default:
40 -pre_prompt: ""
41 -post_prompt: ""
42 -
1 +dataset_path: "gagan3012/multilingual-llava-bench"
2 +dataset_kwargs:
3 +config: chinese
4 +token: True
5 +task: "llava_in_the_wild_chinese"
6 +include: _default_template.yaml
Original file line number Diff line number Diff line change
@@ -1,42 +1,6 @@
1 -
2 -dataset_path: "gagan3012/multilingual-llava-bench"
3 -dataset_kwargs:
4 -config: french
5 -token: True
6 -task: "llava_in_the_wild_french"
7 -test_split: train
8 -output_type: generate_until
9 -doc_to_visual: !function utils.llava_doc_to_visual
10 -doc_to_text: !function utils.llava_doc_to_text
11 -doc_to_target: "gpt_answer"
12 -generation_kwargs:
13 -until:
14 - - "ASSISTANT:"
15 -image_aspect_ratio: original
16 -max_new_tokens: 1024
17 -temperature: 0
18 -top_p: 0
19 -num_beams: 1
20 -do_sample: false
21 -process_results: !function utils.llava_process_results
22 -metric_list:
23 - - metric: gpt_eval_llava_all
24 -aggregation: !function utils.llava_all_aggregation
25 -higher_is_better: true
26 - - metric: gpt_eval_llava_conv
27 -aggregation: !function utils.llava_conv_aggregation
28 -higher_is_better: true
29 - - metric: gpt_eval_llava_detail
30 -aggregation: !function utils.llava_detail_aggregation
31 -higher_is_better: true
32 - - metric: gpt_eval_llava_complex
33 -aggregation: !function utils.llava_complex_aggregation
34 -higher_is_better: true
35 -metadata:
36 -version: 0.0
37 -gpt_eval_model_name: "gpt-4-0613"
38 -model_specific_prompt_kwargs:
39 -default:
40 -pre_prompt: ""
41 -post_prompt: ""
42 -
1 +dataset_path: "gagan3012/multilingual-llava-bench"
2 +dataset_kwargs:
3 +config: french
4 +token: True
5 +task: "llava_in_the_wild_french"
6 +include: _default_template.yaml
Original file line number Diff line number Diff line change
@@ -1,42 +1,6 @@
1 -
2 -dataset_path: "gagan3012/multilingual-llava-bench"
3 -dataset_kwargs:
4 -config: hindi
5 -token: True
6 -task: "llava_in_the_wild_hindi"
7 -test_split: train
8 -output_type: generate_until
9 -doc_to_visual: !function utils.llava_doc_to_visual
10 -doc_to_text: !function utils.llava_doc_to_text
11 -doc_to_target: "gpt_answer"
12 -generation_kwargs:
13 -until:
14 - - "ASSISTANT:"
15 -image_aspect_ratio: original
16 -max_new_tokens: 1024
17 -temperature: 0
18 -top_p: 0
19 -num_beams: 1
20 -do_sample: false
21 -process_results: !function utils.llava_process_results
22 -metric_list:
23 - - metric: gpt_eval_llava_all
24 -aggregation: !function utils.llava_all_aggregation
25 -higher_is_better: true
26 - - metric: gpt_eval_llava_conv
27 -aggregation: !function utils.llava_conv_aggregation
28 -higher_is_better: true
29 - - metric: gpt_eval_llava_detail
30 -aggregation: !function utils.llava_detail_aggregation
31 -higher_is_better: true
32 - - metric: gpt_eval_llava_complex
33 -aggregation: !function utils.llava_complex_aggregation
34 -higher_is_better: true
35 -metadata:
36 -version: 0.0
37 -gpt_eval_model_name: "gpt-4-0613"
38 -model_specific_prompt_kwargs:
39 -default:
40 -pre_prompt: ""
41 -post_prompt: ""
42 -
1 +dataset_path: "gagan3012/multilingual-llava-bench"
2 +dataset_kwargs:
3 +config: hindi
4 +token: True
5 +task: "llava_in_the_wild_hindi"
6 +include: _default_template.yaml
Original file line number Diff line number Diff line change
@@ -1,42 +1,6 @@
1 -
2 -dataset_path: "gagan3012/multilingual-llava-bench"
3 -dataset_kwargs:
4 -config: japanese
5 -token: True
6 -task: "llava_in_the_wild_japanese"
7 -test_split: train
8 -output_type: generate_until
9 -doc_to_visual: !function utils.llava_doc_to_visual
10 -doc_to_text: !function utils.llava_doc_to_text
11 -doc_to_target: "gpt_answer"
12 -generation_kwargs:
13 -until:
14 - - "ASSISTANT:"
15 -image_aspect_ratio: original
16 -max_new_tokens: 1024
17 -temperature: 0
18 -top_p: 0
19 -num_beams: 1
20 -do_sample: false
21 -process_results: !function utils.llava_process_results
22 -metric_list:
23 - - metric: gpt_eval_llava_all
24 -aggregation: !function utils.llava_all_aggregation
25 -higher_is_better: true
26 - - metric: gpt_eval_llava_conv
27 -aggregation: !function utils.llava_conv_aggregation
28 -higher_is_better: true
29 - - metric: gpt_eval_llava_detail
30 -aggregation: !function utils.llava_detail_aggregation
31 -higher_is_better: true
32 - - metric: gpt_eval_llava_complex
33 -aggregation: !function utils.llava_complex_aggregation
34 -higher_is_better: true
35 -metadata:
36 -version: 0.0
37 -gpt_eval_model_name: "gpt-4-0613"
38 -model_specific_prompt_kwargs:
39 -default:
40 -pre_prompt: ""
41 -post_prompt: ""
42 -
1 +dataset_path: "gagan3012/multilingual-llava-bench"
2 +dataset_kwargs:
3 +config: japanese
4 +token: True
5 +task: "llava_in_the_wild_japanese"
6 +include: _default_template.yaml