docs · dadwadw233/lmms-eval@908a161 (original) (raw)

`@@ -28,38 +28,15 @@

`

28

28

` },

`

29

29

` {

`

30

30

`"cell_type": "code",

`

31

``

`-

"execution_count": 45,

`

``

31

`+

"execution_count": null,

`

32

32

`"metadata": {

`

33

33

`"vscode": {

`

34

34

`"languageId": "bat"

`

35

35

` }

`

36

36

` },

`

37

``

`-

"outputs": [

`

38

``

`-

{

`

39

``

`-

"name": "stdout",

`

40

``

`-

"output_type": "stream",

`

41

``

`-

"text": [

`

42

``

`-

"--2024-06-19 14:09:51-- https://huggingface.co/datasets/pufanyi/VQAv2_TOY/resolve/main/source_data/sample_data.zip\n",

`

43

``

`-

"Resolving huggingface.co (huggingface.co)... 13.33.30.114, 13.33.30.49, 13.33.30.76, ...\n",

`

44

``

`-

"Connecting to huggingface.co (huggingface.co)|13.33.30.114|:443... connected.\n",

`

45

``

`-

"HTTP request sent, awaiting response... 302 Found\n",

`

46

``

`-

"Location: https://cdn-lfs-us-1.huggingface.co/repos/c9/82/c9827770a5c0b13c1b646a275968813f8705db30ac0de29f118bb316c2b2a4eb/8cc2e821b7c6e4b5726a6feeb6214cd2d4810d53f568a5f3565d78e6d1ee5403?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27sample_data.zip%3B+filename%3D%22sample_data.zip%22%3B&response-content-type=application%2Fzip&Expires=1719036591&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcxOTAzNjU5MX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2M5LzgyL2M5ODI3NzcwYTVjMGIxM2MxYjY0NmEyNzU5Njg4MTNmODcwNWRiMzBhYzBkZTI5ZjExOGJiMzE2YzJiMmE0ZWIvOGNjMmU4MjFiN2M2ZTRiNTcyNmE2ZmVlYjYyMTRjZDJkNDgxMGQ1M2Y1NjhhNWYzNTY1ZDc4ZTZkMWVlNTQwMz9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=kppoby2Wg9BYA-L2HJ0uShfMSULqTXjtN3cbdBdZTvMf4NvNXBJxc0mcPSiz-sqV7d7hJn32IzHze2JnnTGxrVrozYdHeoTuG0EtF%7ERgQz17PbzbEps-MPzl-h4G9d5RImWDBNN3OYTWyvSxFzn12d-owQKrkdEXejUZEkGdzvHgECzLPpuMw%7EXIctwxBBbxrHRtBNU57K2KBwOqw5rujHtQevhMaCeRgxRFlpfc3FDxsl4rUVHrCM79UhPwutpEAtOh%7Ep6%7EdgLOXal6oZKCnejCQg3AjgvuMe4Eot3J37a7yUGToRtx6XX8Q9I1SC2nScXIWwZndOQY-1VNSL1s-A__&Key-Pair-Id=K2FPYV99P2N66Q [following]\n",

`

47

``

`-

"--2024-06-19 14:09:51-- https://cdn-lfs-us-1.huggingface.co/repos/c9/82/c9827770a5c0b13c1b646a275968813f8705db30ac0de29f118bb316c2b2a4eb/8cc2e821b7c6e4b5726a6feeb6214cd2d4810d53f568a5f3565d78e6d1ee5403?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27sample_data.zip%3B+filename%3D%22sample_data.zip%22%3B&response-content-type=application%2Fzip&Expires=1719036591&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcxOTAzNjU5MX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2M5LzgyL2M5ODI3NzcwYTVjMGIxM2MxYjY0NmEyNzU5Njg4MTNmODcwNWRiMzBhYzBkZTI5ZjExOGJiMzE2YzJiMmE0ZWIvOGNjMmU4MjFiN2M2ZTRiNTcyNmE2ZmVlYjYyMTRjZDJkNDgxMGQ1M2Y1NjhhNWYzNTY1ZDc4ZTZkMWVlNTQwMz9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=kppoby2Wg9BYA-L2HJ0uShfMSULqTXjtN3cbdBdZTvMf4NvNXBJxc0mcPSiz-sqV7d7hJn32IzHze2JnnTGxrVrozYdHeoTuG0EtF%7ERgQz17PbzbEps-MPzl-h4G9d5RImWDBNN3OYTWyvSxFzn12d-owQKrkdEXejUZEkGdzvHgECzLPpuMw%7EXIctwxBBbxrHRtBNU57K2KBwOqw5rujHtQevhMaCeRgxRFlpfc3FDxsl4rUVHrCM79UhPwutpEAtOh%7Ep6%7EdgLOXal6oZKCnejCQg3AjgvuMe4Eot3J37a7yUGToRtx6XX8Q9I1SC2nScXIWwZndOQY-1VNSL1s-A__&Key-Pair-Id=K2FPYV99P2N66Q\n",

`

48

``

`-

"Resolving cdn-lfs-us-1.huggingface.co (cdn-lfs-us-1.huggingface.co)... 3.165.102.80, 3.165.102.25, 3.165.102.95, ...\n",

`

49

``

`-

"Connecting to cdn-lfs-us-1.huggingface.co (cdn-lfs-us-1.huggingface.co)|3.165.102.80|:443... connected.\n",

`

50

``

`-

"HTTP request sent, awaiting response... 200 OK\n",

`

51

``

`-

"Length: 2678607 (2.6M) [application/zip]\n",

`

52

``

`-

"Saving to: ‘data/sample_data.zip’\n",

`

53

``

`-

"\n",

`

54

``

`-

"sample_data.zip 100%[===================>] 2.55M 7.46MB/s in 0.3s \n",

`

55

``

`-

"\n",

`

56

``

`-

"2024-06-19 14:09:52 (7.46 MB/s) - ‘data/sample_data.zip’ saved [2678607/2678607]\n",

`

57

``

`-

"\n"

`

58

``

`-

]

`

59

``

`-

}

`

60

``

`-

],

`

``

37

`+

"outputs": [],

`

61

38

`"source": [

`

62

``

`-

"!wget https://huggingface.co/datasets/pufanyi/VQAv2_TOY/resolve/main/source_data/sample_data.zip -P data\n",

`

``

39

`+

"!wget https://huggingface.co/datasets/lmms-lab/VQAv2_TOY/resolve/main/source_data/sample_data.zip -P data\n",

`

63

40

`"!unzip data/sample_data.zip -d data"

`

64

41

` ]

`

65

42

` },

`

107

84

`"\n",

`

108

85

`"features = datasets.Features(\n",

`

109

86

`" {\n",

`

110

``

`-

" "question": datasets.Value("string"),\n",

`

111

87

`" "question_id": datasets.Value("int64"),\n",

`

``

88

`+

" "question": datasets.Value("string"),\n",

`

112

89

`" "image_id": datasets.Value("string"),\n",

`

113

90

`" "image": datasets.Image(),\n",

`

114

``

`-

" "answers": datasets.Sequence(datasets.Sequence(feature={"answer": datasets.Value("string"), "answer_confidence": datasets.Value("string"), "answer_id": datasets.Value("int64")})),\n",

`

115

``

`-

" "answer_type": datasets.Value("string"),\n",

`

116

``

`-

" "multiple_choice_answer": datasets.Value("string"),\n",

`

117

``

`-

" "question_type": datasets.Value("string"),\n",

`

118

91

`" }\n",

`

119

92

`")"

`

120

93

` ]

`

144

117

`"import json\n",

`

145

118

`"from PIL import Image\n",

`

146

119

`"\n",

`

147

``

`-

"KEYS = ["question", "question_id", "image_id", "answers", "answer_type", "multiple_choice_answer", "question_type"]\n",

`

148

``

`-

"\n",

`

149

120

`"def generator(qa_file, image_folder, image_prefix):\n",

`

150

``

`-

" # Open and load the question-answer file\n",

`

151

121

`" with open(qa_file, "r") as f:\n",

`

152

122

`" data = json.load(f)\n",

`

153

123

`" qa = data["questions"]\n",

`

154

124

`"\n",

`

155

125

`" for q in qa:\n",

`

156

``

`-

" # Get the image id\n",

`

157

126

`" image_id = q["image_id"]\n",

`

158

``

`-

" # Construct the image path\n",

`

159

127

`" image_path = os.path.join(image_folder, f"{image_prefix}_{image_id:012}.jpg")\n",

`

160

``

`-

" # Open the image and add it to the question-answer dictionary\n",

`

161

128

`" q["image"] = Image.open(image_path)\n",

`

162

``

`-

" # Check if all keys are present in the question-answer dictionary, if not add them with None value\n",

`

163

``

`-

" for key in KEYS:\n",

`

164

``

`-

" if key not in q:\n",

`

165

``

`-

" q[key] = None\n",

`

166

``

`-

" # Yield the question-answer dictionary\n",

`

167

129

`" yield q"

`

168

130

` ]

`

169

131

` },

`

`@@ -189,33 +151,34 @@

`

189

151

`"data_val = datasets.Dataset.from_generator(\n",

`

190

152

`" generator,\n",

`

191

153

`" gen_kwargs={\n",

`

192

``

`-

" "qa_file": "data/questions/v2_OpenEnded_mscoco_val2014_questions.json",\n",

`

193

``

`-

" "image_folder": "data/images/val2014",\n",

`

``

154

`+

" "qa_file": "data/questions/vqav2_toy_questions_val2014.json",\n",

`

``

155

`+

" "image_folder": "data/images",\n",

`

194

156

`" "image_prefix": "COCO_val2014",\n",

`

195

157

`" },\n",

`

196

``

`-

" features=features,\n",

`

``

158

`+

" # For this dataset, there is no need to specify the features, as all cells are non-null and all splits have the same schema\n",

`

``

159

`+

" # features=features,\n",

`

197

160

`" num_proc=NUM_PROC,\n",

`

198

161

`")\n",

`

199

162

`"\n",

`

200

163

`"data_test = datasets.Dataset.from_generator(\n",

`

201

164

`" generator,\n",

`

202

165

`" gen_kwargs={\n",

`

203

``

`-

" "qa_file": "data/questions/v2_OpenEnded_mscoco_test2015_questions.json",\n",

`

204

``

`-

" "image_folder": "data/images/test2015",\n",

`

``

166

`+

" "qa_file": "data/questions/vqav2_toy_questions_test2015.json",\n",

`

``

167

`+

" "image_folder": "data/images",\n",

`

205

168

`" "image_prefix": "COCO_test2015",\n",

`

206

169

`" },\n",

`

207

``

`-

" features=features,\n",

`

``

170

`+

" # features=features,\n",

`

208

171

`" num_proc=NUM_PROC,\n",

`

209

172

`")\n",

`

210

173

`"\n",

`

211

174

`"data_test_dev = datasets.Dataset.from_generator(\n",

`

212

175

`" generator,\n",

`

213

176

`" gen_kwargs={\n",

`

214

``

`-

" "qa_file": "data/questions/v2_OpenEnded_mscoco_test-dev2015_questions.json",\n",

`

215

``

`-

" "image_folder": "data/images/test2015",\n",

`

``

177

`+

" "qa_file": "data/questions/vqav2_toy_questions_test-dev2015.json",\n",

`

``

178

`+

" "image_folder": "data/images",\n",

`

216

179

`" "image_prefix": "COCO_test2015",\n",

`

217

180

`" },\n",

`

218

``

`-

" features=features,\n",

`

``

181

`+

" # features=features,\n",

`

219

182

`" num_proc=NUM_PROC,\n",

`

220

183

`")"

`

221

184

` ]

`

244

207

`"metadata": {},

`

245

208

`"outputs": [],

`

246

209

`"source": [

`

247

``

`-

"data.push_to_hub("pufanyi/VQAv2")"

`

``

210

`+

"data.push_to_hub("lmms-lab/VQAv2_TOY") # replace lmms-lab to your username"

`

248

211

` ]

`

249

212

` },

`

250

213

` {

`

251

``

`-

"cell_type": "code",

`

252

``

`-

"execution_count": 44,

`

``

214

`+

"cell_type": "markdown",

`

253

215

`"metadata": {},

`

254

``

`-

"outputs": [

`

255

``

`-

{

`

256

``

`-

"data": {

`

257

``

`-

"text/plain": [

`

258

``

`-

"CommitInfo(commit_url='https://huggingface.co/datasets/pufanyi/VQAv2_TOY/commit/b057eff450520a6e3fc7e6be88c3a172c4b5d99b', commit_message='Upload source_data/sample_data.zip with huggingface_hub', commit_description='', oid='b057eff450520a6e3fc7e6be88c3a172c4b5d99b', pr_url=None, pr_revision=None, pr_num=None)"

`

259

``

`-

]

`

260

``

`-

},

`

261

``

`-

"execution_count": 44,

`

262

``

`-

"metadata": {},

`

263

``

`-

"output_type": "execute_result"

`

264

``

`-

}

`

265

``

`-

],

`

266

216

`"source": [

`

267

``

`-

"from huggingface_hub import HfApi\n",

`

268

``

`-

"\n",

`

269

``

`-

"api = HfApi()\n",

`

270

``

`-

"api.upload_file(\n",

`

271

``

`-

" path_or_fileobj="/data/pufanyi/project/lmms-eval-public/tools/data/sample_data.zip",\n",

`

272

``

`-

" path_in_repo="source_data/sample_data.zip",\n",

`

273

``

`-

" repo_id="pufanyi/VQAv2_TOY",\n",

`

274

``

`-

" repo_type="dataset",\n",

`

275

``

`-

")"

`

``

217

`+

"Now, you can check the dataset on the Hugging Face dataset hub."

`

276

218

` ]

`

277

219

` },

`

278

220

` {

`