lmms-eval@2ebec77 (original) (raw)

1

import collections

2

import os

3

import json

4

from capture_metric.capture import CAPTURE

5

from pycocoevalcap.eval import COCOEvalCap, Bleu, Meteor, Rouge, Cider, Spice

6

from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer

7

from pycocotools.coco import COCO

8

import io

9

from PIL import Image

10

+

11

from lmms_eval.tasks._task_utils.file_utils import generate_submission_file

12

+

13

import logging

14

+

15

eval_logger = logging.getLogger("lmms-eval")

16

+

17

dir_name = os.path.dirname(os.path.abspath(file))

18

+

19

detailcaps_METRICS = ["CAPTURE", "Bleu_4", "Bleu_3", "Bleu_2", "Bleu_1", "METEOR", "ROUGE_L", "CIDEr"] # , "SPICE"]

20

+

21

+

22

def detailcaps_doc_to_visual(doc):

23

return [Image.open(io.BytesIO(doc["binary"])).convert("RGB")]

24

+

25

+

26

def detailcaps_doc_to_text(doc, model_specific_prompt_kwargs=None):

27

question = "Please carefully observe the image and come up with a caption for the image"

28

return model_specific_prompt_kwargs["prompt"]

29

+

30

def detailcaps_doc_to_target(doc):

31

references = [

32

doc['GT_Caption_GPT4O'],

33

doc['GT_Caption_GPT4V'],

34

doc['GT_Caption_Gemini15Pro'],

35

]

36

return references

37

+

38

+

39

def detailcaps_process_result(doc, result):

40

"""

41

Args:

42

doc: a instance of the eval dataset

43

results: [pred]

44

Returns:

45

a dictionary with key: metric name, value: metric value

46

"""

47

+

48

pred = result[0]

49

The question id in our dataset is the image file itself

50

image_id = doc["image"]

51

+

52

data_dict = {"answer": detailcaps_doc_to_target(doc), "pred": pred, "image_id": image_id}

53

+

54

return {f"detailcaps_{metric}": data_dict for metric in detailcaps_METRICS}

55

+

56

+

57

def check_if_context_is_set(expected_context='spawn'):

58

获取默认上下文的名称

59

default_context_name = mp.get_context().get_start_method()

60

+

61

检查当前上下文是否与预期的上下文相匹配

62

is_set_to_expected = default_context_name == expected_context

63

+

64

return is_set_to_expected

65

+

66

+

67

def detailcaps_aggregation_result(results, metric, args=None):

68

+

69

scorers = [

70

(Bleu(4), "Bleu_1"),

71

(Bleu(4), "Bleu_2"),

72

(Bleu(4), "Bleu_3"),

73

(Bleu(4), "Bleu_4"),

74

(Meteor(), "METEOR"),

75

(Rouge(), "ROUGE_L"),

76

(Cider(), "CIDEr"),

77

(CAPTURE(), "CAPTURE")

78

]

79

scorers_dict = {s[1]: s for s in scorers}

80

+

81

stored_results = []

82

In order to make the coco eval tools to successfully create index

83

We need at least two dict in the dataset

84

'annotation' and 'images'

85

'annotation' exactly reproduce the original annotation

86

'images' however only need the image id which is contained in the file name

87

dataset = {"annotations": [], "images": []}

88

idx = 0

89

+

90

for result in results:

91

stored_results.append({"image_id": result["image_id"], "caption": result["pred"]})

92

for a in result["answer"]:

93

dataset["annotations"].append({"image_id": result["image_id"], "caption": a, "id": idx})

94

idx += 1

95

dataset["images"].append({"id": result["image_id"]})

96

+

97

coco = COCO()

98

Manually create index here

99

coco.dataset = dataset

100

coco.createIndex()

101

+

102

detailcaps_result = coco.loadRes(stored_results)

103

detailcaps_eval = COCOEvalCap(coco, detailcaps_result)

104

+

105

imgIds = detailcaps_eval.params["image_id"]

106

gts = {}

107

res = {}

108

for imgId in imgIds:

109

gts[imgId] = detailcaps_eval.coco.imgToAnns[imgId]

110

res[imgId] = detailcaps_eval.cocoRes.imgToAnns[imgId]

111

+

112

eval_logger.info("tokenization...")

113

tokenizer = PTBTokenizer()

114

+

115

if metric == 'CAPTURE':

116

reorg_gts, reorg_res = collections.defaultdict(list), collections.defaultdict(list)

117

for _, samples in gts.items():

118

for sample in samples:

119

reorg_gts[sample['image_id']].append(sample['caption'])

120

for _, samples in res.items():

121

for sample in samples:

122

reorg_res[sample['image_id']].append(sample['caption'])

123

gts, res = reorg_gts, reorg_res

124

else:

125

gts = tokenizer.tokenize(gts)

126

res = tokenizer.tokenize(res)

127

+

128

eval_logger.info(f"Computing {metric} scores...")

129

+

130

if int(os.environ.get("RANK", 0)) == 0:

131

from IPython import embed; embed()

132

else:

133

import time; time.sleep(1200)

134

+

135

score, scores = scorers_dict[metric][0].compute_score(gts, res)

136

When metric is one of the Bleu, score will be a list

137

if type(score) == list:

138

n = int(metric.split("_")[-1])

139

score = score[n - 1]

140

+

141

path = generate_submission_file(f"detailcaps_val_{metric}_scores.json", args)

142

eval_logger.info("Storing prediction that can be submitted to the server ...")

143

with open(path, "w") as f:

144

json.dump(stored_results, f, indent=4)

145

eval_logger.info(f"Your result has been saved to {path}.")

146

+

147

return score

148

+

149

+

150

def detailcaps_bleu4(results, args=None):

151

return detailcaps_aggregation_result(results, "Bleu_4", args)

152

+

153

+

154

def detailcaps_bleu3(results, args=None):

155

return detailcaps_aggregation_result(results, "Bleu_3", args)

156

+

157

+

158

def detailcaps_bleu2(results, args=None):

159

return detailcaps_aggregation_result(results, "Bleu_2", args)

160

+

161

+

162

def detailcaps_bleu1(results, args=None):

163

return detailcaps_aggregation_result(results, "Bleu_1", args)

164

+

165

+

166

def detailcaps_meteor(results, args=None):

167

return detailcaps_aggregation_result(results, "METEOR", args)

168

+

169

+

170

def detailcaps_rougel(results, args=None):

171

return detailcaps_aggregation_result(results, "ROUGE_L", args)

172

+

173

+

174

def detailcaps_cider(results, args=None):

175

return detailcaps_aggregation_result(results, "CIDEr", args)

176

+

177

+

178

def detailcaps_spice(results, args=None):

179

return detailcaps_aggregation_result(results, "SPICE", args)

180

+

181

+

182

def detailcaps_capture(results, args=None):

183

return detailcaps_aggregation_result(results, "CAPTURE", args)

184

+

185

+

186

def detailcaps_test_process_result(doc, result):

187

"""

188

Args:

189

doc: a instance of the eval dataset

190

results: [pred]

191

Returns:

192

a dictionary with key: metric name (in this case detailcaps_passthrough), value: metric value

193

"""

194

return {"detailcaps_passthrough": {"pred": result[0], "image_id": doc["image_id"]}}

195

+

196

+

197

def detailcaps_test_aggregation_result(results, args=None):

198

stored_results = []

199

for result in results:

200

stored_results.append({"image_id": int(result["image_id"]), "caption": result["pred"]})

201

+

202

path = generate_submission_file("detailcaps_captions_detailcaps_test_alg_results.json", args)

203

eval_logger.info("Storing prediction that can be submitted to the server ...")

204

with open(path, "w") as f:

205

json.dump(stored_results, f, indent=4)

206

+

207

eval_logger.info(f"Your test result has been stored in {path}. Make sure you also have the val result stored to submit to the server on https://codalab.lisn.upsaclay.fr/competitions/7404#participate.")