Timothy-Vinzent commited on
Commit
3f8b483
·
verified ·
1 Parent(s): 9ed8b92

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -13
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
  import re
 
3
 
4
  import gradio as gr
5
  from openai import OpenAI
@@ -13,20 +14,27 @@ submitted_emails = set()
13
  def get_evaluation_questions():
14
  """
15
  Loads evaluation questions and expected answers from environment variables.
 
16
  Expected environment variable names are:
17
- TEST_QUESTION_1, TEST_EXPECTED_1,
18
- TEST_QUESTION_2, TEST_EXPECTED_2, and so on.
 
 
19
  """
20
- questions = []
21
- i = 1
22
- while True:
23
- question = os.environ.get(f"TEST_QUESTION_{i}")
24
- expected = os.environ.get(f"TEST_EXPECTED_{i}")
25
- if not question or not expected:
26
- break
27
- questions.append({"question": question, "expected": expected})
28
- i += 1
29
- return questions
 
 
 
 
30
 
31
  # Load the evaluation questions once at startup.
32
  EVALUATION_QUESTIONS = get_evaluation_questions()
@@ -89,7 +97,7 @@ def submit_prompt(email, name, system_prompt):
89
  except Exception as e:
90
  answer = f"Error during OpenAI API call: {str(e)}"
91
 
92
- # Simple evaluation: check if the answer contains the expected substring.
93
  if expected.lower() in answer.lower():
94
  score += 1
95
  verdict = "Correct"
 
1
  import os
2
  import re
3
+ import json
4
 
5
  import gradio as gr
6
  from openai import OpenAI
 
14
  def get_evaluation_questions():
15
  """
16
  Loads evaluation questions and expected answers from environment variables.
17
+
18
  Expected environment variable names are:
19
+ - TEST_QUESTION_1: a JSON array of user query strings.
20
+ - TEST_EXPECTED: a JSON array of JSON-like strings representing the expected outputs.
21
+
22
+ Both lists must be of equal length.
23
  """
24
+ questions_str = os.environ.get("TEST_QUESTION_1")
25
+ expected_str = os.environ.get("TEST_EXPECTED")
26
+ if not questions_str or not expected_str:
27
+ return []
28
+ try:
29
+ questions_list = json.loads(questions_str)
30
+ expected_list = json.loads(expected_str)
31
+ except Exception as e:
32
+ print(f"Error parsing evaluation questions: {str(e)}")
33
+ return []
34
+ if len(questions_list) != len(expected_list):
35
+ print("Mismatch in length: questions list and expected answers list must have the same length.")
36
+ return []
37
+ return [{"question": q, "expected": e} for q, e in zip(questions_list, expected_list)]
38
 
39
  # Load the evaluation questions once at startup.
40
  EVALUATION_QUESTIONS = get_evaluation_questions()
 
97
  except Exception as e:
98
  answer = f"Error during OpenAI API call: {str(e)}"
99
 
100
+ # Simple evaluation: check if the expected output is a substring of the answer (case-insensitive).
101
  if expected.lower() in answer.lower():
102
  score += 1
103
  verdict = "Correct"