add II-Bench · EvolvingLMMs-Lab/lmms-eval@6248113 (original) (raw)

1

import json

2

import logging

3

import re

4

from collections import Counter

5

from lmms_eval.tasks._task_utils.file_utils import generate_submission_file

6

+

7

PROMPT = """Question: {}

8

(A) {}

9

(B) {}

10

11

(D) {}

12

(E) {}

13

(F) {}"""

14

+

15

def ii_bench_doc_to_text(doc, model_specific_prompt_kwargs):

16

question = PROMPT.format(doc["question"], doc["option1"], doc["option2"], doc["option3"], doc["option4"], doc["option5"], doc["option6"])

17

pre_prompt = model_specific_prompt_kwargs["pre_prompt"]

18

post_prompt = model_specific_prompt_kwargs["post_prompt"]

19

return f"{pre_prompt}{question}{post_prompt}"

20

+

21

+

22

def ii_bench_doc_to_visual(doc):

23

return [doc["image"].convert("RGB")]

24

+

25

+

26

def extract_option_labels(text, options=None):

27

if isinstance(text, dict):

28

return 'error'

29

pattern = r"(([A-F]))"

30

matches = re.findall(pattern, text)

31

+

32

if not matches:

33

pattern = r"\b([A-F])\b"

34

matches = re.findall(pattern, text)

35

+

36

if matches:

37

counter = Counter(matches)

38

most_common = counter.most_common()

39

max_count = most_common[0][1]

40

candidates = [item for item in most_common if item[1] == max_count]

41

return candidates[-1][0]

42

else:

43

if options:

44

counter = Counter()

45

for i, option in enumerate(options, start=1):

46

label = chr(64 + i)

47

option_stripped = option.strip()

48

if option_stripped in text:

49

counter[label] += 1

50

elif text in option:

51

counter[label] += 1

52

if counter:

53

most_common = counter.most_common()

54

max_count = most_common[0][1]

55

candidates = [item for item in most_common if item[1] == max_count]

56

return candidates[-1][0]

57

return None

58

+

59

+

60

def ii_bench_process_results(doc, results):

61

response = results[0]

62

predict = extract_option_labels(response, [doc["option1"], doc["option2"], doc["option3"], doc["option4"], doc["option5"], doc["option6"]])

63

return {"submission": {"id": doc["id"], "predict_answer": predict, "response": response}}

64

+

65

+

66

def ii_bench_aggregate_submissions(results, args):

67

file = generate_submission_file("ii_bench_test_for_submission.json", args)

68

with open(file, "w") as f:

69

json.dump(results, f, indent=4)

70

logging.getLogger("lmms-eval").info(f"Results saved to {file}")