update utils.py for leaderboard submission · dadwadw233/lmms-eval@cb2f2d1 (original) (raw)

Original file line number Diff line number Diff line change
@@ -145,7 +145,7 @@ def mmupd_results_eval(results, args, upd_type, question_type):
145 145
146 146 overall_acc_dual, category_acc_dual, dual_results_df = mmupd_evaluator.calculate_dual_acc(standard_results_df, upd_results_df)
147 147
148 -file_json = generate_submission_file(f"mmupd_results_{upd_type}_{question_type}.json", args)
148 +file_json = generate_submission_file(f"mmupd_{upd_type}_{question_type}_dual_results.json", args)
149 149
150 150 details_info = {
151 151 "overall_acc_dual": overall_acc_dual,
@@ -159,7 +159,10 @@ def mmupd_results_eval(results, args, upd_type, question_type):
159 159 with open(file_json, "w") as f:
160 160 json.dump(details_info, f)
161 161
162 -file_excel = generate_submission_file(f"mmupd_results_{upd_type}_{question_type}_dual.xlsx", args)
162 +file_excel = generate_submission_file(f"mmupd_{upd_type}_{question_type}_dual_results_detail.xlsx", args)
163 163 dual_results_df.to_excel(file_excel, index=False)
164 164
165 +file_json = generate_submission_file(f"mmupd_{upd_type}_{question_type}_dual_results_detail.json", args)
166 +dual_results_df.to_json(file_json, orient="records", indent=2) # for huggingface leaderboard submission
167 +
165 168 return overall_acc_dual * 100