Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -22,12 +22,16 @@ def get_evaluation_questions():
|
|
22 |
Both lists must be of equal length.
|
23 |
"""
|
24 |
questions_str = os.environ.get("TEST_QUESTION_1")
|
|
|
25 |
expected_str = os.environ.get("TEST_EXPECTED_1")
|
|
|
26 |
if not questions_str or not expected_str:
|
27 |
return []
|
28 |
try:
|
29 |
questions_list = json.loads(questions_str)
|
|
|
30 |
expected_list = json.loads(expected_str)
|
|
|
31 |
except Exception as e:
|
32 |
print(f"Error parsing evaluation questions: {str(e)}")
|
33 |
return []
|
@@ -94,6 +98,7 @@ def submit_prompt(email, name, system_prompt):
|
|
94 |
)
|
95 |
# Extract the answer from the response object.
|
96 |
answer = response.choices[0].message.content.strip()
|
|
|
97 |
except Exception as e:
|
98 |
answer = f"Error during OpenAI API call: {str(e)}"
|
99 |
|
@@ -101,8 +106,10 @@ def submit_prompt(email, name, system_prompt):
|
|
101 |
if expected.lower() in answer.lower():
|
102 |
score += 1
|
103 |
verdict = "Correct"
|
|
|
104 |
else:
|
105 |
verdict = "Incorrect"
|
|
|
106 |
|
107 |
responses.append(
|
108 |
f"Question: {question}\n"
|
|
|
22 |
Both lists must be of equal length.
|
23 |
"""
|
24 |
questions_str = os.environ.get("TEST_QUESTION_1")
|
25 |
+
print("questions",questions_str)
|
26 |
expected_str = os.environ.get("TEST_EXPECTED_1")
|
27 |
+
print("expected",expected_str)
|
28 |
if not questions_str or not expected_str:
|
29 |
return []
|
30 |
try:
|
31 |
questions_list = json.loads(questions_str)
|
32 |
+
print("questions lst ",questions_list)
|
33 |
expected_list = json.loads(expected_str)
|
34 |
+
print("expected lst",expected_list)
|
35 |
except Exception as e:
|
36 |
print(f"Error parsing evaluation questions: {str(e)}")
|
37 |
return []
|
|
|
98 |
)
|
99 |
# Extract the answer from the response object.
|
100 |
answer = response.choices[0].message.content.strip()
|
101 |
+
print("llm answer", answer)
|
102 |
except Exception as e:
|
103 |
answer = f"Error during OpenAI API call: {str(e)}"
|
104 |
|
|
|
106 |
if expected.lower() in answer.lower():
|
107 |
score += 1
|
108 |
verdict = "Correct"
|
109 |
+
print(f"{expected.lower()} DOES NOT MATCH {answer.lower()}")
|
110 |
else:
|
111 |
verdict = "Incorrect"
|
112 |
+
print(f"{expected.lower()} MATCHES {answer.lower()}")
|
113 |
|
114 |
responses.append(
|
115 |
f"Question: {question}\n"
|