lmms-eval@d78ec86 (original) (raw)

`@@ -10,6 +10,8 @@

10

`import sys

11

`from typing import List, Dict, Optional, Union

12

`import re

13

import cv2

14

import numpy as np

13

15

14

16

`from loguru import logger as eval_logger

15

17

80

82

`# cache_dir = os.path.join(hf_home, cache_dir)

81

83

`# base_cache_dir = config["dataset_kwargs"]["cache_dir"]

82

84

`base_cache_dir = os.path.expanduser(hf_home)

83

-

85

with open(Path(file).parent / "videomme.yaml", "r") as f:

86

raw_data = f.readlines()

87

safe_data = []

88

for i, line in enumerate(raw_data):

89

remove function definition since yaml load cannot handle it

90

if "!function" not in line:

91

safe_data.append(line)

92

cache_name = yaml.safe_load("".join(safe_data))["dataset_kwargs"]["cache_dir"]

93

+

94

+

95

def parse_subtitle_time(time_str):

96

h, m, s_ms = time_str.split(':')

97

s, ms = s_ms.split(',')

98

return int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 1000

99

+

100

def load_subtitles(subtitle_path):

101

subtitles = {}

102

with open(subtitle_path, 'r', encoding='utf-8') as file:

103

content = file.read().split('\n\n')

104

for section in content:

105

if section.strip():

106

lines = section.split('\n')

107

if len(lines) >= 3:

108

time_range = lines[1].split(' --> ')

109

start_time = parse_subtitle_time(time_range[0])

110

end_time = parse_subtitle_time(time_range[1])

111

text = ' '.join(line for line in lines[2:])

112

subtitles[(start_time, end_time)] = text

113

return subtitles

114

+

115

def convert_time_to_frame(time_in_seconds, fps):

116

return int(time_in_seconds * fps)

117

+

118

def extract_subtitles(video_path, subtitle_path):

119

video = cv2.VideoCapture(video_path)

120

fps = video.get(cv2.CAP_PROP_FPS)

121

total_frame=int(video.get(cv2.CAP_PROP_FRAME_COUNT))

122

subtitles = load_subtitles(subtitle_path)

123

+

124

subtitle_frames = []

125

for (start_time, end_time), text in subtitles.items():

126

start_frame = convert_time_to_frame(start_time, fps)

127

end_frame = convert_time_to_frame(end_time, fps)

128

subtitle_frames.append((start_frame, end_frame, text))

129

+

130

return subtitle_frames,total_frame

84

131

85

132

`def videomme_doc_to_visual(doc):

86

with open(Path(file).parent / "videomme.yaml", "r") as f:

87

raw_data = f.readlines()

88

safe_data = []

89

for i, line in enumerate(raw_data):

90

remove function definition since yaml load cannot handle it

91

if "!function" not in line:

92

safe_data.append(line)

93

cache_name = yaml.safe_load("".join(safe_data))["dataset_kwargs"]["cache_dir"]

133

+

94

134

`cache_dir = os.path.join(base_cache_dir, cache_name)

95

135

`video_path = doc["videoID"] + ".mp4"

96

136

`video_path = os.path.join(cache_dir, video_path)

`@@ -106,6 +146,71 @@ def videomme_doc_to_visual(doc):

106

146

107

147

108

148

`def videomme_doc_to_text(doc, model_specific_prompt_kwargs=None):

149

option_prompt="Select the best answer to the following multiple-choice question based on the video and the subtitles. Respond with only the letter (A, B, C, or D) of the correct option."

150

question = doc["question"]

151

option = str(doc["options"])

152

question = question + "\n" + option

153

full_prompt=option_prompt+"\n"+question+"\n"+"The best answer is:"

154

return full_prompt

155

Frames + Subs

156

This video's subtitles are listed below:

157

【subtitles】

158

+

159

Select the best answer to the following multiple-choice question based on the video and the subtitles. Respond with only the letter (A, B, C, or D) of the correct option.

160

【question】

161

The best answer is:

162

Frames / Frames + Audio

163

Select the best answer to the following multiple-choice question based on the video. Respond with only the letter (A, B, C, or D) of the correct option.

164

【question】

165

The best answer is:

166

+

167

def videomme_doc_to_text_subtitle(doc, model_specific_prompt_kwargs=None):

168

cache_dir = os.path.join(base_cache_dir, cache_name)

169

video_path = doc["videoID"] + ".mp4"

170

subtitle_path=os.path.join(cache_dir,"subtitle",doc["videoID"]+".srt")

171

video_path = os.path.join(cache_dir, video_path)

172

if os.path.exists(subtitle_path): #Denote have subtitle

173

subtitle=open(subtitle_path).readlines()

174

else:

175

subtitle=""

176

subtitles_prompt="This video's subtitles are listed below: \n"

177

if subtitle=="":

178

subtitle="No subtitles available"

179

else:

180

if "gemini_api_flag" in model_specific_prompt_kwargs: #specific for gemini_api

181

if model_specific_prompt_kwargs['gemini_api_flag']=="full subtitle":

182

textlist=[]

183

for ele in subtitle:

184

pattern = r'(.*?)'

185

matches = re.findall(pattern, ele)

186

if matches:

187

textlist.append(matches[0])

188

subtitle_text="\n".join(textlist)

189

else:

190

if "frame_num" in model_specific_prompt_kwargs:

191

frame_num=model_specific_prompt_kwargs['frame_num']

192

subtitle_by_frame,total_frame=extract_subtitles(video_path,subtitle_path)

193

uniform_sampled_frames = np.linspace(0, total_frame - 1, frame_num, dtype=int).tolist()

194

+

195

subtitle_by_frame_idx=[]

196

for frame_idx in uniform_sampled_frames:

197

for idx,title in enumerate(subtitle_by_frame):

198

if frame_idx<title[1] and frame_idx>=title[0]:

199

subtitle_by_frame_idx.append(idx)

200

subtitle_by_frame_idx=list(set(subtitle_by_frame_idx))

201

+

202

textlist=[]

203

for idx in subtitle_by_frame_idx:

204

pattern = r'(.*?)'

205

raw_text=re.findall(pattern, subtitle_by_frame[idx][2])

206

try:

207

textlist.append(raw_text[0])

208

except:

209

continue

210

subtitle_text="\n".join(textlist)

211

subtitle=subtitle_text

212

+

213

option_prompt="Select the best answer to the following multiple-choice question based on the video and the subtitles. Respond with only the letter (A, B, C, or D) of the correct option."

109

214

`question = doc["question"]

110

215

`option = str(doc["options"])

111

216

`question = question + "\n" + option + model_specific_prompt_kwargs["post_prompt"]