Spaces:

DeepJudge
/

Applicant-Task-Submission

Running

App Files Files

Timothy-Vinzent commited on Feb 19

Commit

3f8b483

verified ·

1 Parent(s): 9ed8b92

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -13

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
 import re
 import gradio as gr
 from openai import OpenAI
@@ -13,20 +14,27 @@ submitted_emails = set()
 def get_evaluation_questions():
     """
     Loads evaluation questions and expected answers from environment variables.
     Expected environment variable names are:
-      TEST_QUESTION_1, TEST_EXPECTED_1,
-      TEST_QUESTION_2, TEST_EXPECTED_2, and so on.
     """
-    questions = []
-    i = 1
-    while True:
-        question = os.environ.get(f"TEST_QUESTION_{i}")
-        expected = os.environ.get(f"TEST_EXPECTED_{i}")
-        if not question or not expected:
-            break
-        questions.append({"question": question, "expected": expected})
-        i += 1
-    return questions
 # Load the evaluation questions once at startup.
 EVALUATION_QUESTIONS = get_evaluation_questions()
@@ -89,7 +97,7 @@ def submit_prompt(email, name, system_prompt):
         except Exception as e:
             answer = f"Error during OpenAI API call: {str(e)}"
-        # Simple evaluation: check if the answer contains the expected substring.
         if expected.lower() in answer.lower():
             score += 1
             verdict = "Correct"

 import os
 import re
+import json
 import gradio as gr
 from openai import OpenAI
 def get_evaluation_questions():
     """
     Loads evaluation questions and expected answers from environment variables.
     Expected environment variable names are:
+      - TEST_QUESTION_1: a JSON array of user query strings.
+      - TEST_EXPECTED: a JSON array of JSON-like strings representing the expected outputs.
+    Both lists must be of equal length.
     """
+    questions_str = os.environ.get("TEST_QUESTION_1")
+    expected_str = os.environ.get("TEST_EXPECTED")
+    if not questions_str or not expected_str:
+        return []
+    try:
+        questions_list = json.loads(questions_str)
+        expected_list = json.loads(expected_str)
+    except Exception as e:
+        print(f"Error parsing evaluation questions: {str(e)}")
+        return []
+    if len(questions_list) != len(expected_list):
+        print("Mismatch in length: questions list and expected answers list must have the same length.")
+        return []
+    return [{"question": q, "expected": e} for q, e in zip(questions_list, expected_list)]
 # Load the evaluation questions once at startup.
 EVALUATION_QUESTIONS = get_evaluation_questions()
         except Exception as e:
             answer = f"Error during OpenAI API call: {str(e)}"
+        # Simple evaluation: check if the expected output is a substring of the answer (case-insensitive).
         if expected.lower() in answer.lower():
             score += 1
             verdict = "Correct"