Release llava-wilder · EvolvingLMMs-Lab/lmms-eval@efb5295 (original) (raw)
`@@ -13,17 +13,6 @@
`
13
13
`# Set up a logger
`
14
14
`eval_logger = logging.getLogger("lmms-eval")
`
15
15
``
16
``
`-
Create a static variable to track if the message has been logged
`
17
``
`-
if not hasattr(eval_logger, "dashcope_warning_logged"):
`
18
``
`-
eval_logger.dashcope_warning_logged = False
`
19
``
-
20
``
`-
try:
`
21
``
`-
import dashscope
`
22
``
`-
except ImportError:
`
23
``
`-
if not eval_logger.dashcope_warning_logged:
`
24
``
`-
eval_logger.debug("Dashcope not found, make sure you install dashscope to use qwen vl")
`
25
``
`-
eval_logger.dashcope_warning_logged = True
`
26
``
-
27
16
`NUM_SECONDS_TO_SLEEP = 5
`
28
17
`dir_path = os.path.dirname(os.path.realpath(file))
`
29
18
``
58
47
`"Content-Type": "application/json",
`
59
48
` }
`
60
49
``
61
``
`-
elif API_TYPE == "qwen_vl":
`
62
``
`-
API_URL = os.getenv("QWEN_ENDPOINT", "https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation")
`
63
``
`-
API_KEY = os.getenv("DASHSCOPE_API_KEY", "YOUR_API_KEY")
`
64
``
`-
headers = {
`
65
``
`-
"Authorization": f"Bearer {API_KEY}",
`
66
``
`-
"Content-Type": "application/json",
`
67
``
`-
}
`
68
``
-
69
50
``
70
51
`def get_chat_response(base64_image, prompt, max_retries=5, wait_time=10):
`
71
52
`headers = {
`
`@@ -114,29 +95,6 @@ def image_to_base64(pil_image):
`
114
95
`return base64.b64encode(buffered.getvalue()).decode("utf-8")
`
115
96
``
116
97
``
117
``
`-
def qwen_multimodal_conversation_call(text_content, image_content, retries=5):
`
118
``
`-
"""Simple single round multimodal conversation call."""
`
119
``
`-
messages = [{"role": "user", "content": [{"image": image_content}, {"text": text_content}]}]
`
120
``
`-
for attempt in range(retries):
`
121
``
`-
try:
`
122
``
`-
response_data = dashscope.MultiModalConversation.call(model=GPT_EVAL_MODEL_NAME, messages=messages)
`
123
``
`-
The response status_code is HTTPStatus.OK indicate success,
`
124
``
`-
otherwise indicate request is failed, you can get error code
`
125
``
`-
and message from code and message.
`
126
``
`-
content = response_data["output"]["choices"][0]["message"]["content"][0]["text"].strip()
`
127
``
`-
if content != "":
`
128
``
`-
return content, GPT_EVAL_MODEL_NAME
`
129
``
`-
break # If successful, break out of the loop
`
130
``
`-
except Exception as e:
`
131
``
`-
eval_logger.info(f"Attempt {attempt + 1} failed with error: {e}")
`
132
``
`-
if attempt < retries: # If we have retries left, sleep and then continue to next attempt
`
133
``
`-
time.sleep(NUM_SECONDS_TO_SLEEP)
`
134
``
`-
else: # If this was the last attempt, log and return empty
`
135
``
`-
eval_logger.error(f"All {retries} attempts failed. Last error message: {e}")
`
136
``
`-
return "", ""
`
137
``
`-
return "", ""
`
138
``
-
139
``
-
140
98
`def parse_score(review):
`
141
99
`try:
`
142
100
`score_pair = review.split("\n")[0]
`
`@@ -162,20 +120,13 @@ def llava_process_results(doc, result):
`
162
120
` """
`
163
121
`try:
`
164
122
`question = doc.get("question", "")
`
165
``
`-
ans1 = doc.get("gpt4v_answer", "")
`
``
123
`+
ans1 = doc.get("answer", "")
`
166
124
`ans2 = result[0] if result else ""
`
167
125
`content = f"[Question]\n{question}\n\n" + f"[Assistant 1]\n{ans1}\n\n[End of Assistant 1]\n\n" + f"[Assistant 2]\n{ans2}\n\n[End of Assistant 2]\n\n" f"[System]\n{judge_rules}\n\n"
`
168
126
`visuals = llava_doc_to_visual(doc)
`
169
``
`-
if API_TYPE == "qwen_vl":
`
170
``
`-
file_path = os.path.join(dir_path, f"tmp_{doc['question_id']}.jpg")
`
171
``
`-
visuals[0].save(file_path)
`
172
``
`-
image_content = "file://" + file_path
`
173
``
`-
review, model_name = qwen_multimodal_conversation_call(content, image_content=image_content)
`
174
``
`-
os.remove(file_path)
`
175
``
`-
elif API_TYPE == "openai":
`
176
``
`-
image_path = doc["image"]
`
177
``
`-
base64_image = image_to_base64(image_path)
`
178
``
`-
review, model_name = get_chat_response(base64_image, content)
`
``
127
`+
image_path = doc["image"]
`
``
128
`+
base64_image = image_to_base64(image_path)
`
``
129
`+
review, model_name = get_chat_response(base64_image, content)
`
179
130
`scores = parse_score(review)
`
180
131
`except Exception as e:
`
181
132
`eval_logger.error(f"Error for Question ID: {doc.get('question_id', 'Unknown')}: {e}")
`