Merge pull request #136 from Dousia/main · EvolvingLMMs-Lab/lmms-eval@2ebec77 (original) (raw)
``
1
`+
import collections
`
``
2
`+
import os
`
``
3
`+
import json
`
``
4
`+
from capture_metric.capture import CAPTURE
`
``
5
`+
from pycocoevalcap.eval import COCOEvalCap, Bleu, Meteor, Rouge, Cider, Spice
`
``
6
`+
from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer
`
``
7
`+
from pycocotools.coco import COCO
`
``
8
`+
import io
`
``
9
`+
from PIL import Image
`
``
10
+
``
11
`+
from lmms_eval.tasks._task_utils.file_utils import generate_submission_file
`
``
12
+
``
13
`+
import logging
`
``
14
+
``
15
`+
eval_logger = logging.getLogger("lmms-eval")
`
``
16
+
``
17
`+
dir_name = os.path.dirname(os.path.abspath(file))
`
``
18
+
``
19
`+
detailcaps_METRICS = ["CAPTURE", "Bleu_4", "Bleu_3", "Bleu_2", "Bleu_1", "METEOR", "ROUGE_L", "CIDEr"] # , "SPICE"]
`
``
20
+
``
21
+
``
22
`+
def detailcaps_doc_to_visual(doc):
`
``
23
`+
return [Image.open(io.BytesIO(doc["binary"])).convert("RGB")]
`
``
24
+
``
25
+
``
26
`+
def detailcaps_doc_to_text(doc, model_specific_prompt_kwargs=None):
`
``
27
`+
question = "Please carefully observe the image and come up with a caption for the image"
`
``
28
`+
return model_specific_prompt_kwargs["prompt"]
`
``
29
+
``
30
`+
def detailcaps_doc_to_target(doc):
`
``
31
`+
references = [
`
``
32
`+
doc['GT_Caption_GPT4O'],
`
``
33
`+
doc['GT_Caption_GPT4V'],
`
``
34
`+
doc['GT_Caption_Gemini15Pro'],
`
``
35
`+
]
`
``
36
`+
return references
`
``
37
+
``
38
+
``
39
`+
def detailcaps_process_result(doc, result):
`
``
40
`+
"""
`
``
41
`+
Args:
`
``
42
`+
doc: a instance of the eval dataset
`
``
43
`+
results: [pred]
`
``
44
`+
Returns:
`
``
45
`+
a dictionary with key: metric name, value: metric value
`
``
46
`+
"""
`
``
47
+
``
48
`+
pred = result[0]
`
``
49
`+
The question id in our dataset is the image file itself
`
``
50
`+
image_id = doc["image"]
`
``
51
+
``
52
`+
data_dict = {"answer": detailcaps_doc_to_target(doc), "pred": pred, "image_id": image_id}
`
``
53
+
``
54
`+
return {f"detailcaps_{metric}": data_dict for metric in detailcaps_METRICS}
`
``
55
+
``
56
+
``
57
`+
def check_if_context_is_set(expected_context='spawn'):
`
``
58
`+
获取默认上下文的名称
`
``
59
`+
default_context_name = mp.get_context().get_start_method()
`
``
60
+
``
61
`+
检查当前上下文是否与预期的上下文相匹配
`
``
62
`+
is_set_to_expected = default_context_name == expected_context
`
``
63
+
``
64
`+
return is_set_to_expected
`
``
65
+
``
66
+
``
67
`+
def detailcaps_aggregation_result(results, metric, args=None):
`
``
68
+
``
69
`+
scorers = [
`
``
70
`+
(Bleu(4), "Bleu_1"),
`
``
71
`+
(Bleu(4), "Bleu_2"),
`
``
72
`+
(Bleu(4), "Bleu_3"),
`
``
73
`+
(Bleu(4), "Bleu_4"),
`
``
74
`+
(Meteor(), "METEOR"),
`
``
75
`+
(Rouge(), "ROUGE_L"),
`
``
76
`+
(Cider(), "CIDEr"),
`
``
77
`+
(CAPTURE(), "CAPTURE")
`
``
78
`+
]
`
``
79
`+
scorers_dict = {s[1]: s for s in scorers}
`
``
80
+
``
81
`+
stored_results = []
`
``
82
`+
In order to make the coco eval tools to successfully create index
`
``
83
`+
We need at least two dict in the dataset
`
``
84
`+
'annotation' and 'images'
`
``
85
`+
'annotation' exactly reproduce the original annotation
`
``
86
`+
'images' however only need the image id which is contained in the file name
`
``
87
`+
dataset = {"annotations": [], "images": []}
`
``
88
`+
idx = 0
`
``
89
+
``
90
`+
for result in results:
`
``
91
`+
stored_results.append({"image_id": result["image_id"], "caption": result["pred"]})
`
``
92
`+
for a in result["answer"]:
`
``
93
`+
dataset["annotations"].append({"image_id": result["image_id"], "caption": a, "id": idx})
`
``
94
`+
idx += 1
`
``
95
`+
dataset["images"].append({"id": result["image_id"]})
`
``
96
+
``
97
`+
coco = COCO()
`
``
98
`+
Manually create index here
`
``
99
`+
coco.dataset = dataset
`
``
100
`+
coco.createIndex()
`
``
101
+
``
102
`+
detailcaps_result = coco.loadRes(stored_results)
`
``
103
`+
detailcaps_eval = COCOEvalCap(coco, detailcaps_result)
`
``
104
+
``
105
`+
imgIds = detailcaps_eval.params["image_id"]
`
``
106
`+
gts = {}
`
``
107
`+
res = {}
`
``
108
`+
for imgId in imgIds:
`
``
109
`+
gts[imgId] = detailcaps_eval.coco.imgToAnns[imgId]
`
``
110
`+
res[imgId] = detailcaps_eval.cocoRes.imgToAnns[imgId]
`
``
111
+
``
112
`+
eval_logger.info("tokenization...")
`
``
113
`+
tokenizer = PTBTokenizer()
`
``
114
+
``
115
`+
if metric == 'CAPTURE':
`
``
116
`+
reorg_gts, reorg_res = collections.defaultdict(list), collections.defaultdict(list)
`
``
117
`+
for _, samples in gts.items():
`
``
118
`+
for sample in samples:
`
``
119
`+
reorg_gts[sample['image_id']].append(sample['caption'])
`
``
120
`+
for _, samples in res.items():
`
``
121
`+
for sample in samples:
`
``
122
`+
reorg_res[sample['image_id']].append(sample['caption'])
`
``
123
`+
gts, res = reorg_gts, reorg_res
`
``
124
`+
else:
`
``
125
`+
gts = tokenizer.tokenize(gts)
`
``
126
`+
res = tokenizer.tokenize(res)
`
``
127
+
``
128
`+
eval_logger.info(f"Computing {metric} scores...")
`
``
129
+
``
130
`+
if int(os.environ.get("RANK", 0)) == 0:
`
``
131
`+
from IPython import embed; embed()
`
``
132
`+
else:
`
``
133
`+
import time; time.sleep(1200)
`
``
134
+
``
135
`+
score, scores = scorers_dict[metric][0].compute_score(gts, res)
`
``
136
`+
When metric is one of the Bleu, score will be a list
`
``
137
`+
if type(score) == list:
`
``
138
`+
n = int(metric.split("_")[-1])
`
``
139
`+
score = score[n - 1]
`
``
140
+
``
141
`+
path = generate_submission_file(f"detailcaps_val_{metric}_scores.json", args)
`
``
142
`+
eval_logger.info("Storing prediction that can be submitted to the server ...")
`
``
143
`+
with open(path, "w") as f:
`
``
144
`+
json.dump(stored_results, f, indent=4)
`
``
145
`+
eval_logger.info(f"Your result has been saved to {path}.")
`
``
146
+
``
147
`+
return score
`
``
148
+
``
149
+
``
150
`+
def detailcaps_bleu4(results, args=None):
`
``
151
`+
return detailcaps_aggregation_result(results, "Bleu_4", args)
`
``
152
+
``
153
+
``
154
`+
def detailcaps_bleu3(results, args=None):
`
``
155
`+
return detailcaps_aggregation_result(results, "Bleu_3", args)
`
``
156
+
``
157
+
``
158
`+
def detailcaps_bleu2(results, args=None):
`
``
159
`+
return detailcaps_aggregation_result(results, "Bleu_2", args)
`
``
160
+
``
161
+
``
162
`+
def detailcaps_bleu1(results, args=None):
`
``
163
`+
return detailcaps_aggregation_result(results, "Bleu_1", args)
`
``
164
+
``
165
+
``
166
`+
def detailcaps_meteor(results, args=None):
`
``
167
`+
return detailcaps_aggregation_result(results, "METEOR", args)
`
``
168
+
``
169
+
``
170
`+
def detailcaps_rougel(results, args=None):
`
``
171
`+
return detailcaps_aggregation_result(results, "ROUGE_L", args)
`
``
172
+
``
173
+
``
174
`+
def detailcaps_cider(results, args=None):
`
``
175
`+
return detailcaps_aggregation_result(results, "CIDEr", args)
`
``
176
+
``
177
+
``
178
`+
def detailcaps_spice(results, args=None):
`
``
179
`+
return detailcaps_aggregation_result(results, "SPICE", args)
`
``
180
+
``
181
+
``
182
`+
def detailcaps_capture(results, args=None):
`
``
183
`+
return detailcaps_aggregation_result(results, "CAPTURE", args)
`
``
184
+
``
185
+
``
186
`+
def detailcaps_test_process_result(doc, result):
`
``
187
`+
"""
`
``
188
`+
Args:
`
``
189
`+
doc: a instance of the eval dataset
`
``
190
`+
results: [pred]
`
``
191
`+
Returns:
`
``
192
`+
a dictionary with key: metric name (in this case detailcaps_passthrough), value: metric value
`
``
193
`+
"""
`
``
194
`+
return {"detailcaps_passthrough": {"pred": result[0], "image_id": doc["image_id"]}}
`
``
195
+
``
196
+
``
197
`+
def detailcaps_test_aggregation_result(results, args=None):
`
``
198
`+
stored_results = []
`
``
199
`+
for result in results:
`
``
200
`+
stored_results.append({"image_id": int(result["image_id"]), "caption": result["pred"]})
`
``
201
+
``
202
`+
path = generate_submission_file("detailcaps_captions_detailcaps_test_alg_results.json", args)
`
``
203
`+
eval_logger.info("Storing prediction that can be submitted to the server ...")
`
``
204
`+
with open(path, "w") as f:
`
``
205
`+
json.dump(stored_results, f, indent=4)
`
``
206
+
``
207
`+
eval_logger.info(f"Your test result has been stored in {path}. Make sure you also have the val result stored to submit to the server on https://codalab.lisn.upsaclay.fr/competitions/7404#participate.")
`