Timothy-Vinzent commited on
Commit
b3013af
·
verified ·
1 Parent(s): 47e7f20

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -54
app.py CHANGED
@@ -1,67 +1,88 @@
1
  import os
2
  import re
3
  import json
4
-
5
  import gradio as gr
6
  from openai import OpenAI
 
 
 
 
 
 
 
 
7
 
8
  # Initialize the OpenAI client with the API key from environment variables.
9
  client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
10
 
11
- # In-memory storage to track submitted emails (not persistent; resets on app restart).
12
  submitted_emails = set()
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def get_evaluation_questions():
15
  """
16
  Loads evaluation questions and expected answers from environment variables.
17
 
18
- Expected environment variable names are:
19
- - TEST_QUESTION_1: a JSON array of user query strings.
20
- - TEST_EXPECTED: a JSON array of JSON-like strings representing the expected outputs.
21
 
22
  Both lists must be of equal length.
23
  """
24
  questions_str = os.environ.get("TEST_QUESTION_1")
25
- print("questions",questions_str)
26
  expected_str = os.environ.get("TEST_EXPECTED_1")
27
- print("expected",expected_str)
28
  if not questions_str or not expected_str:
29
  return []
30
  try:
31
  questions_list = json.loads(questions_str)
32
- print("questions lst ",questions_list)
33
  except Exception as e:
34
  print(f"Error parsing questions: {str(e)}")
35
  return []
36
- try:
37
  expected_list = json.loads(expected_str)
38
- print("expected lst",expected_list)
39
  except Exception as e:
40
- print(f"Error parsing solutions: {str(e)}")
41
  return []
42
  if len(questions_list) != len(expected_list):
43
- print("length of question list", len(questions_list))
44
- print("length of solution list", len(expected_list))
45
  print("Mismatch in length: questions list and expected answers list must have the same length.")
46
  return []
47
  return [{"question": q, "expected": e} for q, e in zip(questions_list, expected_list)]
48
 
49
- # Load the evaluation questions once at startup.
50
  EVALUATION_QUESTIONS = get_evaluation_questions()
51
 
52
  def sanitize_input(text):
53
  """
54
- Sanitizes input to prevent harmful content and limits its length.
 
55
  """
56
- # Allow alphanumerics and some punctuation, then truncate to 500 characters.
57
  clean_text = re.sub(r"[^a-zA-Z0-9\s.,!?@:\-]", "", text)
58
  return clean_text.strip()[:500]
59
-
60
  def sanitize_prompt(text):
61
  """
62
- Sanitizes input to limits its length.
63
  """
64
  return text.strip()[:8000]
 
65
  def validate_email(email):
66
  """
67
  Validates that the provided email is in a valid format.
@@ -72,64 +93,79 @@ def validate_email(email):
72
 
73
  def submit_prompt(email, name, system_prompt):
74
  """
75
- Handles user submission:
76
- - Validates email format.
77
- - Checks if the email has already been used for submission.
78
- - Evaluates the system prompt against predefined test questions.
79
- - Prevents multiple submissions from the same email.
80
- Returns the evaluation results or an error message if the submission is invalid.
 
 
 
81
  """
82
  # Validate email format.
83
  if not validate_email(email):
84
  return "Invalid email address. Please enter a valid email."
85
-
86
- # Check if this email has already been used for submission.
87
  if email in submitted_emails:
88
  return f"Submission already received for {email}. You can only submit once."
89
-
 
 
 
 
 
 
 
 
 
 
90
  # Sanitize inputs.
91
  email = sanitize_input(email)
92
  name = sanitize_input(name)
93
- print("UNSAINITIZED SYSTEM PROMPT", system_prompt)
94
  system_prompt = sanitize_prompt(system_prompt)
95
- print("CLEANED SYSTEM PROMPT", system_prompt)
96
-
97
  score = 0
98
- responses = []
99
-
 
 
 
100
  for item in EVALUATION_QUESTIONS:
101
  question = item["question"]
102
  expected = item["expected"]
103
  try:
104
- # Use the new client-based API for chat completions.
105
  response = client.chat.completions.create(
106
- model="gpt-4o-mini", # Ensure this identifier matches the deployed model.
107
  messages=[
108
  {"role": "system", "content": system_prompt},
109
  {"role": "user", "content": question}
110
  ]
111
  )
112
- # Extract the answer from the response object.
113
  answer = response.choices[0].message.content.strip()
114
- print("llm answer", answer)
115
  except Exception as e:
116
  answer = f"Error during OpenAI API call: {str(e)}"
117
-
118
- # Step 1: Check if the answer is valid JSON
 
119
  try:
120
  parsed_answer = json.loads(answer)
 
121
  except json.JSONDecodeError as e:
122
- verdict = f"Incorrect (Invalid JSON): {str(e)}"
123
  responses.append(
124
  f"Question: {question}\n"
125
  f"Answer: {answer}\n"
126
  f"Expected: {json.dumps(expected)}\n"
127
  f"Result: {verdict}\n"
128
  )
129
- print(verdict)
 
130
  continue
131
-
132
- # Step 2: Check if all required keys are present
133
  required_keys = ["document_level", "clause_level"]
134
  missing_keys = [key for key in required_keys if key not in parsed_answer]
135
  if missing_keys:
@@ -140,15 +176,16 @@ def submit_prompt(email, name, system_prompt):
140
  f"Expected: {json.dumps(expected)}\n"
141
  f"Result: {verdict}\n"
142
  )
143
- print(verdict)
 
144
  continue
145
-
146
- # Step 3: Check if values for each key match
147
  incorrect_values = []
148
  for key in required_keys:
149
  if parsed_answer[key] != expected[key]:
150
  incorrect_values.append(key)
151
-
152
  if len(incorrect_values) == 2:
153
  verdict = "Incorrect (Both values are incorrect)"
154
  elif len(incorrect_values) == 1:
@@ -156,24 +193,38 @@ def submit_prompt(email, name, system_prompt):
156
  else:
157
  score += 1
158
  verdict = "Correct"
159
-
160
  responses.append(
161
  f"Question: {question}\n"
162
  f"Answer: {json.dumps(parsed_answer)}\n"
163
  f"Expected: {json.dumps(expected)}\n"
164
  f"Result: {verdict}\n"
165
  )
166
- print(verdict)
167
-
168
-
169
  result_details = "\n".join(responses)
170
-
171
- # Record this email as having submitted their prompt.
172
  submitted_emails.add(email)
173
-
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  return (
175
  f"Thank you for your submission, {name}!\n\n"
176
- f"Your evaluation score is {score} out of {len(EVALUATION_QUESTIONS)}.\n\nDetails:\n{result_details}"
 
177
  )
178
 
179
  def build_interface():
 
1
  import os
2
  import re
3
  import json
 
4
  import gradio as gr
5
  from openai import OpenAI
6
+ import gspread
7
+ from google.oauth2.service_account import Credentials
8
+
9
+ # Define scopes for Google Sheets and Drive API access.
10
+ SCOPES = [
11
+ "https://www.googleapis.com/auth/spreadsheets",
12
+ "https://www.googleapis.com/auth/drive"
13
+ ]
14
 
15
  # Initialize the OpenAI client with the API key from environment variables.
16
  client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
17
 
18
+ # In-memory set to track submitted emails (this resets when the app restarts).
19
  submitted_emails = set()
20
 
21
+ def get_google_sheet():
22
+ """
23
+ Connects to the Google Sheet using service account credentials stored
24
+ in the environment variable "GOOGLE_CREDS_JSON" and returns the worksheet
25
+ named "Submissions" from the spreadsheet identified by "SPREADSHEET_ID".
26
+ """
27
+ creds = Credentials.from_service_account_info(
28
+ json.loads(os.environ["GOOGLE_CREDS_JSON"]),
29
+ scopes=SCOPES
30
+ )
31
+ gc = gspread.authorize(creds)
32
+ sh = gc.open_by_key(os.environ["SPREADSHEET_ID"])
33
+ worksheet = sh.worksheet("Submissions")
34
+ return worksheet
35
+
36
  def get_evaluation_questions():
37
  """
38
  Loads evaluation questions and expected answers from environment variables.
39
 
40
+ Expected environment variables:
41
+ - TEST_QUESTION_1: a JSON array of user query strings.
42
+ - TEST_EXPECTED_1: a JSON array of JSON-like strings representing expected outputs.
43
 
44
  Both lists must be of equal length.
45
  """
46
  questions_str = os.environ.get("TEST_QUESTION_1")
47
+ print("questions", questions_str)
48
  expected_str = os.environ.get("TEST_EXPECTED_1")
49
+ print("expected", expected_str)
50
  if not questions_str or not expected_str:
51
  return []
52
  try:
53
  questions_list = json.loads(questions_str)
54
+ print("questions lst ", questions_list)
55
  except Exception as e:
56
  print(f"Error parsing questions: {str(e)}")
57
  return []
58
+ try:
59
  expected_list = json.loads(expected_str)
60
+ print("expected lst", expected_list)
61
  except Exception as e:
62
+ print(f"Error parsing expected answers: {str(e)}")
63
  return []
64
  if len(questions_list) != len(expected_list):
 
 
65
  print("Mismatch in length: questions list and expected answers list must have the same length.")
66
  return []
67
  return [{"question": q, "expected": e} for q, e in zip(questions_list, expected_list)]
68
 
69
+ # Load evaluation questions at startup.
70
  EVALUATION_QUESTIONS = get_evaluation_questions()
71
 
72
  def sanitize_input(text):
73
  """
74
+ Sanitizes input to allow only alphanumerics and some punctuation,
75
+ then truncates to 500 characters.
76
  """
 
77
  clean_text = re.sub(r"[^a-zA-Z0-9\s.,!?@:\-]", "", text)
78
  return clean_text.strip()[:500]
79
+
80
  def sanitize_prompt(text):
81
  """
82
+ Sanitizes the system prompt by stripping and limiting its length.
83
  """
84
  return text.strip()[:8000]
85
+
86
  def validate_email(email):
87
  """
88
  Validates that the provided email is in a valid format.
 
93
 
94
  def submit_prompt(email, name, system_prompt):
95
  """
96
+ Handles the full submission process:
97
+ - Validates email format.
98
+ - Checks if the email has already been used (by in-memory set and Google Sheet).
99
+ - Sanitizes input fields.
100
+ - Processes the system prompt against each evaluation question using the OpenAI API.
101
+ - For each test question, records the verdict and answer.
102
+ - Appends the submission as a new row in the Google Sheet with columns:
103
+ Name, Email, System Prompt, Score, and for each of the 7 test questions: verdict and answer.
104
+ Returns a result message with evaluation details.
105
  """
106
  # Validate email format.
107
  if not validate_email(email):
108
  return "Invalid email address. Please enter a valid email."
109
+
110
+ # Check if this email has already been submitted (in-memory).
111
  if email in submitted_emails:
112
  return f"Submission already received for {email}. You can only submit once."
113
+
114
+ # Connect to Google Sheet and check if the email already exists.
115
+ try:
116
+ sheet = get_google_sheet()
117
+ email_col = sheet.col_values(2) # Assumes column 2 contains the email addresses.
118
+ if email in email_col[1:]: # Skip header row.
119
+ return f"Submission already received for {email}. You can only submit once."
120
+ except Exception as e:
121
+ print(f"Error accessing Google Sheet: {str(e)}")
122
+ return f"Error accessing Google Sheet: {str(e)}"
123
+
124
  # Sanitize inputs.
125
  email = sanitize_input(email)
126
  name = sanitize_input(name)
 
127
  system_prompt = sanitize_prompt(system_prompt)
128
+
 
129
  score = 0
130
+ responses = [] # For display output.
131
+ verdicts = [] # For storing each question's verdict in the sheet.
132
+ answers_list = [] # For storing each question's answer in the sheet.
133
+
134
+ # Process each evaluation question.
135
  for item in EVALUATION_QUESTIONS:
136
  question = item["question"]
137
  expected = item["expected"]
138
  try:
 
139
  response = client.chat.completions.create(
140
+ model="gpt-4o-mini", # Ensure this model identifier matches your deployed model.
141
  messages=[
142
  {"role": "system", "content": system_prompt},
143
  {"role": "user", "content": question}
144
  ]
145
  )
 
146
  answer = response.choices[0].message.content.strip()
147
+ print("LLM answer:", answer)
148
  except Exception as e:
149
  answer = f"Error during OpenAI API call: {str(e)}"
150
+
151
+ verdict = ""
152
+ # Check if the answer is a valid JSON.
153
  try:
154
  parsed_answer = json.loads(answer)
155
+ answer_to_store = json.dumps(parsed_answer) # Normalize parsed JSON as string.
156
  except json.JSONDecodeError as e:
157
+ verdict = f"Incorrect (Invalid JSON: {str(e)})"
158
  responses.append(
159
  f"Question: {question}\n"
160
  f"Answer: {answer}\n"
161
  f"Expected: {json.dumps(expected)}\n"
162
  f"Result: {verdict}\n"
163
  )
164
+ verdicts.append(verdict)
165
+ answers_list.append(answer)
166
  continue
167
+
168
+ # Verify that all required keys are present.
169
  required_keys = ["document_level", "clause_level"]
170
  missing_keys = [key for key in required_keys if key not in parsed_answer]
171
  if missing_keys:
 
176
  f"Expected: {json.dumps(expected)}\n"
177
  f"Result: {verdict}\n"
178
  )
179
+ verdicts.append(verdict)
180
+ answers_list.append(json.dumps(parsed_answer))
181
  continue
182
+
183
+ # Compare values for each required key.
184
  incorrect_values = []
185
  for key in required_keys:
186
  if parsed_answer[key] != expected[key]:
187
  incorrect_values.append(key)
188
+
189
  if len(incorrect_values) == 2:
190
  verdict = "Incorrect (Both values are incorrect)"
191
  elif len(incorrect_values) == 1:
 
193
  else:
194
  score += 1
195
  verdict = "Correct"
196
+
197
  responses.append(
198
  f"Question: {question}\n"
199
  f"Answer: {json.dumps(parsed_answer)}\n"
200
  f"Expected: {json.dumps(expected)}\n"
201
  f"Result: {verdict}\n"
202
  )
203
+ verdicts.append(verdict)
204
+ answers_list.append(json.dumps(parsed_answer))
205
+
206
  result_details = "\n".join(responses)
207
+
208
+ # Record this email locally so that subsequent submissions are blocked.
209
  submitted_emails.add(email)
210
+
211
+ # Prepare the row for Google Sheets:
212
+ # The row format is: Name, Email, System Prompt, Score, then for each of the 7 test questions: Verdict, Answer.
213
+ row = [name, email, system_prompt, str(score)]
214
+ for v, a in zip(verdicts, answers_list):
215
+ row.extend([v, a])
216
+
217
+ # Append the new row to the Google Sheet.
218
+ try:
219
+ sheet.append_row(row)
220
+ except Exception as e:
221
+ print(f"Error appending row to Google Sheet: {str(e)}")
222
+ return f"Error saving submission: {str(e)}"
223
+
224
  return (
225
  f"Thank you for your submission, {name}!\n\n"
226
+ f"Your evaluation score is {score} out of {len(EVALUATION_QUESTIONS)}.\n\n"
227
+ f"Details:\n{result_details}"
228
  )
229
 
230
  def build_interface():