Merge pull request #136 from Dousia/main · EvolvingLMMs-Lab/lmms-eval@2ebec77 (original) (raw)

``

1

`+

import collections

`

``

2

`+

import os

`

``

3

`+

import json

`

``

4

`+

from capture_metric.capture import CAPTURE

`

``

5

`+

from pycocoevalcap.eval import COCOEvalCap, Bleu, Meteor, Rouge, Cider, Spice

`

``

6

`+

from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer

`

``

7

`+

from pycocotools.coco import COCO

`

``

8

`+

import io

`

``

9

`+

from PIL import Image

`

``

10

+

``

11

`+

from lmms_eval.tasks._task_utils.file_utils import generate_submission_file

`

``

12

+

``

13

`+

import logging

`

``

14

+

``

15

`+

eval_logger = logging.getLogger("lmms-eval")

`

``

16

+

``

17

`+

dir_name = os.path.dirname(os.path.abspath(file))

`

``

18

+

``

19

`+

detailcaps_METRICS = ["CAPTURE", "Bleu_4", "Bleu_3", "Bleu_2", "Bleu_1", "METEOR", "ROUGE_L", "CIDEr"] # , "SPICE"]

`

``

20

+

``

21

+

``

22

`+

def detailcaps_doc_to_visual(doc):

`

``

23

`+

return [Image.open(io.BytesIO(doc["binary"])).convert("RGB")]

`

``

24

+

``

25

+

``

26

`+

def detailcaps_doc_to_text(doc, model_specific_prompt_kwargs=None):

`

``

27

`+

question = "Please carefully observe the image and come up with a caption for the image"

`

``

28

`+

return model_specific_prompt_kwargs["prompt"]

`

``

29

+

``

30

`+

def detailcaps_doc_to_target(doc):

`

``

31

`+

references = [

`

``

32

`+

doc['GT_Caption_GPT4O'],

`

``

33

`+

doc['GT_Caption_GPT4V'],

`

``

34

`+

doc['GT_Caption_Gemini15Pro'],

`

``

35

`+

]

`

``

36

`+

return references

`

``

37

+

``

38

+

``

39

`+

def detailcaps_process_result(doc, result):

`

``

40

`+

"""

`

``

41

`+

Args:

`

``

42

`+

doc: a instance of the eval dataset

`

``

43

`+

results: [pred]

`

``

44

`+

Returns:

`

``

45

`+

a dictionary with key: metric name, value: metric value

`

``

46

`+

"""

`

``

47

+

``

48

`+

pred = result[0]

`

``

49

`+

The question id in our dataset is the image file itself

`

``

50

`+

image_id = doc["image"]

`

``

51

+

``

52

`+

data_dict = {"answer": detailcaps_doc_to_target(doc), "pred": pred, "image_id": image_id}

`

``

53

+

``

54

`+

return {f"detailcaps_{metric}": data_dict for metric in detailcaps_METRICS}

`

``

55

+

``

56

+

``

57

`+

def check_if_context_is_set(expected_context='spawn'):

`

``

58

`+

获取默认上下文的名称

`

``

59

`+

default_context_name = mp.get_context().get_start_method()

`

``

60

+

``

61

`+

检查当前上下文是否与预期的上下文相匹配

`

``

62

`+

is_set_to_expected = default_context_name == expected_context

`

``

63

+

``

64

`+

return is_set_to_expected

`

``

65

+

``

66

+

``

67

`+

def detailcaps_aggregation_result(results, metric, args=None):

`

``

68

+

``

69

`+

scorers = [

`

``

70

`+

(Bleu(4), "Bleu_1"),

`

``

71

`+

(Bleu(4), "Bleu_2"),

`

``

72

`+

(Bleu(4), "Bleu_3"),

`

``

73

`+

(Bleu(4), "Bleu_4"),

`

``

74

`+

(Meteor(), "METEOR"),

`

``

75

`+

(Rouge(), "ROUGE_L"),

`

``

76

`+

(Cider(), "CIDEr"),

`

``

77

`+

(CAPTURE(), "CAPTURE")

`

``

78

`+

]

`

``

79

`+

scorers_dict = {s[1]: s for s in scorers}

`

``

80

+

``

81

`+

stored_results = []

`

``

82

`+

In order to make the coco eval tools to successfully create index

`

``

83

`+

We need at least two dict in the dataset

`

``

84

`+

'annotation' and 'images'

`

``

85

`+

'annotation' exactly reproduce the original annotation

`

``

86

`+

'images' however only need the image id which is contained in the file name

`

``

87

`+

dataset = {"annotations": [], "images": []}

`

``

88

`+

idx = 0

`

``

89

+

``

90

`+

for result in results:

`

``

91

`+

stored_results.append({"image_id": result["image_id"], "caption": result["pred"]})

`

``

92

`+

for a in result["answer"]:

`

``

93

`+

dataset["annotations"].append({"image_id": result["image_id"], "caption": a, "id": idx})

`

``

94

`+

idx += 1

`

``

95

`+

dataset["images"].append({"id": result["image_id"]})

`

``

96

+

``

97

`+

coco = COCO()

`

``

98

`+

Manually create index here

`

``

99

`+

coco.dataset = dataset

`

``

100

`+

coco.createIndex()

`

``

101

+

``

102

`+

detailcaps_result = coco.loadRes(stored_results)

`

``

103

`+

detailcaps_eval = COCOEvalCap(coco, detailcaps_result)

`

``

104

+

``

105

`+

imgIds = detailcaps_eval.params["image_id"]

`

``

106

`+

gts = {}

`

``

107

`+

res = {}

`

``

108

`+

for imgId in imgIds:

`

``

109

`+

gts[imgId] = detailcaps_eval.coco.imgToAnns[imgId]

`

``

110

`+

res[imgId] = detailcaps_eval.cocoRes.imgToAnns[imgId]

`

``

111

+

``

112

`+

eval_logger.info("tokenization...")

`

``

113

`+

tokenizer = PTBTokenizer()

`

``

114

+

``

115

`+

if metric == 'CAPTURE':

`

``

116

`+

reorg_gts, reorg_res = collections.defaultdict(list), collections.defaultdict(list)

`

``

117

`+

for _, samples in gts.items():

`

``

118

`+

for sample in samples:

`

``

119

`+

reorg_gts[sample['image_id']].append(sample['caption'])

`

``

120

`+

for _, samples in res.items():

`

``

121

`+

for sample in samples:

`

``

122

`+

reorg_res[sample['image_id']].append(sample['caption'])

`

``

123

`+

gts, res = reorg_gts, reorg_res

`

``

124

`+

else:

`

``

125

`+

gts = tokenizer.tokenize(gts)

`

``

126

`+

res = tokenizer.tokenize(res)

`

``

127

+

``

128

`+

eval_logger.info(f"Computing {metric} scores...")

`

``

129

+

``

130

`+

if int(os.environ.get("RANK", 0)) == 0:

`

``

131

`+

from IPython import embed; embed()

`

``

132

`+

else:

`

``

133

`+

import time; time.sleep(1200)

`

``

134

+

``

135

`+

score, scores = scorers_dict[metric][0].compute_score(gts, res)

`

``

136

`+

When metric is one of the Bleu, score will be a list

`

``

137

`+

if type(score) == list:

`

``

138

`+

n = int(metric.split("_")[-1])

`

``

139

`+

score = score[n - 1]

`

``

140

+

``

141

`+

path = generate_submission_file(f"detailcaps_val_{metric}_scores.json", args)

`

``

142

`+

eval_logger.info("Storing prediction that can be submitted to the server ...")

`

``

143

`+

with open(path, "w") as f:

`

``

144

`+

json.dump(stored_results, f, indent=4)

`

``

145

`+

eval_logger.info(f"Your result has been saved to {path}.")

`

``

146

+

``

147

`+

return score

`

``

148

+

``

149

+

``

150

`+

def detailcaps_bleu4(results, args=None):

`

``

151

`+

return detailcaps_aggregation_result(results, "Bleu_4", args)

`

``

152

+

``

153

+

``

154

`+

def detailcaps_bleu3(results, args=None):

`

``

155

`+

return detailcaps_aggregation_result(results, "Bleu_3", args)

`

``

156

+

``

157

+

``

158

`+

def detailcaps_bleu2(results, args=None):

`

``

159

`+

return detailcaps_aggregation_result(results, "Bleu_2", args)

`

``

160

+

``

161

+

``

162

`+

def detailcaps_bleu1(results, args=None):

`

``

163

`+

return detailcaps_aggregation_result(results, "Bleu_1", args)

`

``

164

+

``

165

+

``

166

`+

def detailcaps_meteor(results, args=None):

`

``

167

`+

return detailcaps_aggregation_result(results, "METEOR", args)

`

``

168

+

``

169

+

``

170

`+

def detailcaps_rougel(results, args=None):

`

``

171

`+

return detailcaps_aggregation_result(results, "ROUGE_L", args)

`

``

172

+

``

173

+

``

174

`+

def detailcaps_cider(results, args=None):

`

``

175

`+

return detailcaps_aggregation_result(results, "CIDEr", args)

`

``

176

+

``

177

+

``

178

`+

def detailcaps_spice(results, args=None):

`

``

179

`+

return detailcaps_aggregation_result(results, "SPICE", args)

`

``

180

+

``

181

+

``

182

`+

def detailcaps_capture(results, args=None):

`

``

183

`+

return detailcaps_aggregation_result(results, "CAPTURE", args)

`

``

184

+

``

185

+

``

186

`+

def detailcaps_test_process_result(doc, result):

`

``

187

`+

"""

`

``

188

`+

Args:

`

``

189

`+

doc: a instance of the eval dataset

`

``

190

`+

results: [pred]

`

``

191

`+

Returns:

`

``

192

`+

a dictionary with key: metric name (in this case detailcaps_passthrough), value: metric value

`

``

193

`+

"""

`

``

194

`+

return {"detailcaps_passthrough": {"pred": result[0], "image_id": doc["image_id"]}}

`

``

195

+

``

196

+

``

197

`+

def detailcaps_test_aggregation_result(results, args=None):

`

``

198

`+

stored_results = []

`

``

199

`+

for result in results:

`

``

200

`+

stored_results.append({"image_id": int(result["image_id"]), "caption": result["pred"]})

`

``

201

+

``

202

`+

path = generate_submission_file("detailcaps_captions_detailcaps_test_alg_results.json", args)

`

``

203

`+

eval_logger.info("Storing prediction that can be submitted to the server ...")

`

``

204

`+

with open(path, "w") as f:

`

``

205

`+

json.dump(stored_results, f, indent=4)

`

``

206

+

``

207

`+

eval_logger.info(f"Your test result has been stored in {path}. Make sure you also have the val result stored to submit to the server on https://codalab.lisn.upsaclay.fr/competitions/7404#participate.")

`