Spaces:

DeepJudge
/

Applicant-Task-Submission

Running

App Files Files

Timothy-Vinzent commited on Feb 20

Commit

b3013af

verified ·

1 Parent(s): 47e7f20

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -54

app.py CHANGED Viewed

@@ -1,67 +1,88 @@
 import os
 import re
 import json
 import gradio as gr
 from openai import OpenAI
 # Initialize the OpenAI client with the API key from environment variables.
 client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
-# In-memory storage to track submitted emails (not persistent; resets on app restart).
 submitted_emails = set()
 def get_evaluation_questions():
     """
     Loads evaluation questions and expected answers from environment variables.
-    Expected environment variable names are:
-      - TEST_QUESTION_1: a JSON array of user query strings.
-      - TEST_EXPECTED: a JSON array of JSON-like strings representing the expected outputs.
     Both lists must be of equal length.
     """
     questions_str = os.environ.get("TEST_QUESTION_1")
-    print("questions",questions_str)
     expected_str = os.environ.get("TEST_EXPECTED_1")
-    print("expected",expected_str)
     if not questions_str or not expected_str:
         return []
     try:
         questions_list = json.loads(questions_str)
-        print("questions lst ",questions_list)
     except Exception as e:
         print(f"Error parsing questions: {str(e)}")
         return []
-    try:
         expected_list = json.loads(expected_str)
-        print("expected lst",expected_list)
     except Exception as e:
-        print(f"Error parsing solutions: {str(e)}")
         return []
     if len(questions_list) != len(expected_list):
-        print("length of question list", len(questions_list))
-        print("length of solution list", len(expected_list))
         print("Mismatch in length: questions list and expected answers list must have the same length.")
         return []
     return [{"question": q, "expected": e} for q, e in zip(questions_list, expected_list)]
-# Load the evaluation questions once at startup.
 EVALUATION_QUESTIONS = get_evaluation_questions()
 def sanitize_input(text):
     """
-    Sanitizes input to prevent harmful content and limits its length.
     """
-    # Allow alphanumerics and some punctuation, then truncate to 500 characters.
     clean_text = re.sub(r"[^a-zA-Z0-9\s.,!?@:\-]", "", text)
     return clean_text.strip()[:500]
 def sanitize_prompt(text):
     """
-    Sanitizes input to limits its length.
     """
     return text.strip()[:8000]
 def validate_email(email):
     """
     Validates that the provided email is in a valid format.
@@ -72,64 +93,79 @@ def validate_email(email):
 def submit_prompt(email, name, system_prompt):
     """
-    Handles user submission:
-      - Validates email format.
-      - Checks if the email has already been used for submission.
-      - Evaluates the system prompt against predefined test questions.
-      - Prevents multiple submissions from the same email.
-    Returns the evaluation results or an error message if the submission is invalid.
     """
     # Validate email format.
     if not validate_email(email):
         return "Invalid email address. Please enter a valid email."
-    # Check if this email has already been used for submission.
     if email in submitted_emails:
         return f"Submission already received for {email}. You can only submit once."
     # Sanitize inputs.
     email = sanitize_input(email)
     name = sanitize_input(name)
-    print("UNSAINITIZED SYSTEM PROMPT", system_prompt)
     system_prompt = sanitize_prompt(system_prompt)
-    print("CLEANED SYSTEM PROMPT", system_prompt)
     score = 0
-    responses = []
     for item in EVALUATION_QUESTIONS:
         question = item["question"]
         expected = item["expected"]
         try:
-            # Use the new client-based API for chat completions.
             response = client.chat.completions.create(
-                model="gpt-4o-mini",  # Ensure this identifier matches the deployed model.
                 messages=[
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": question}
                 ]
             )
-            # Extract the answer from the response object.
             answer = response.choices[0].message.content.strip()
-            print("llm answer", answer)
         except Exception as e:
             answer = f"Error during OpenAI API call: {str(e)}"
-        # Step 1: Check if the answer is valid JSON
         try:
             parsed_answer = json.loads(answer)
         except json.JSONDecodeError as e:
-            verdict = f"Incorrect (Invalid JSON): {str(e)}"
             responses.append(
                 f"Question: {question}\n"
                 f"Answer: {answer}\n"
                 f"Expected: {json.dumps(expected)}\n"
                 f"Result: {verdict}\n"
             )
-            print(verdict)
             continue
-        # Step 2: Check if all required keys are present
         required_keys = ["document_level", "clause_level"]
         missing_keys = [key for key in required_keys if key not in parsed_answer]
         if missing_keys:
@@ -140,15 +176,16 @@ def submit_prompt(email, name, system_prompt):
                 f"Expected: {json.dumps(expected)}\n"
                 f"Result: {verdict}\n"
             )
-            print(verdict)
             continue
-        # Step 3: Check if values for each key match
         incorrect_values = []
         for key in required_keys:
             if parsed_answer[key] != expected[key]:
                 incorrect_values.append(key)
         if len(incorrect_values) == 2:
             verdict = "Incorrect (Both values are incorrect)"
         elif len(incorrect_values) == 1:
@@ -156,24 +193,38 @@ def submit_prompt(email, name, system_prompt):
         else:
             score += 1
             verdict = "Correct"
         responses.append(
             f"Question: {question}\n"
             f"Answer: {json.dumps(parsed_answer)}\n"
             f"Expected: {json.dumps(expected)}\n"
             f"Result: {verdict}\n"
         )
-        print(verdict)
     result_details = "\n".join(responses)
-    # Record this email as having submitted their prompt.
     submitted_emails.add(email)
     return (
         f"Thank you for your submission, {name}!\n\n"
-        f"Your evaluation score is {score} out of {len(EVALUATION_QUESTIONS)}.\n\nDetails:\n{result_details}"
     )
 def build_interface():

 import os
 import re
 import json
 import gradio as gr
 from openai import OpenAI
+import gspread
+from google.oauth2.service_account import Credentials
+# Define scopes for Google Sheets and Drive API access.
+SCOPES = [
+    "https://www.googleapis.com/auth/spreadsheets",
+    "https://www.googleapis.com/auth/drive"
+]
 # Initialize the OpenAI client with the API key from environment variables.
 client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
+# In-memory set to track submitted emails (this resets when the app restarts).
 submitted_emails = set()
+def get_google_sheet():
+    """
+    Connects to the Google Sheet using service account credentials stored
+    in the environment variable "GOOGLE_CREDS_JSON" and returns the worksheet
+    named "Submissions" from the spreadsheet identified by "SPREADSHEET_ID".
+    """
+    creds = Credentials.from_service_account_info(
+        json.loads(os.environ["GOOGLE_CREDS_JSON"]),
+        scopes=SCOPES
+    )
+    gc = gspread.authorize(creds)
+    sh = gc.open_by_key(os.environ["SPREADSHEET_ID"])
+    worksheet = sh.worksheet("Submissions")
+    return worksheet
 def get_evaluation_questions():
     """
     Loads evaluation questions and expected answers from environment variables.
+    Expected environment variables:
+    - TEST_QUESTION_1: a JSON array of user query strings.
+    - TEST_EXPECTED_1: a JSON array of JSON-like strings representing expected outputs.
     Both lists must be of equal length.
     """
     questions_str = os.environ.get("TEST_QUESTION_1")
+    print("questions", questions_str)
     expected_str = os.environ.get("TEST_EXPECTED_1")
+    print("expected", expected_str)
     if not questions_str or not expected_str:
         return []
     try:
         questions_list = json.loads(questions_str)
+        print("questions lst ", questions_list)
     except Exception as e:
         print(f"Error parsing questions: {str(e)}")
         return []
+    try:
         expected_list = json.loads(expected_str)
+        print("expected lst", expected_list)
     except Exception as e:
+        print(f"Error parsing expected answers: {str(e)}")
         return []
     if len(questions_list) != len(expected_list):
         print("Mismatch in length: questions list and expected answers list must have the same length.")
         return []
     return [{"question": q, "expected": e} for q, e in zip(questions_list, expected_list)]
+# Load evaluation questions at startup.
 EVALUATION_QUESTIONS = get_evaluation_questions()
 def sanitize_input(text):
     """
+    Sanitizes input to allow only alphanumerics and some punctuation,
+    then truncates to 500 characters.
     """
     clean_text = re.sub(r"[^a-zA-Z0-9\s.,!?@:\-]", "", text)
     return clean_text.strip()[:500]
 def sanitize_prompt(text):
     """
+    Sanitizes the system prompt by stripping and limiting its length.
     """
     return text.strip()[:8000]
 def validate_email(email):
     """
     Validates that the provided email is in a valid format.
 def submit_prompt(email, name, system_prompt):
     """
+    Handles the full submission process:
+     - Validates email format.
+     - Checks if the email has already been used (by in-memory set and Google Sheet).
+     - Sanitizes input fields.
+     - Processes the system prompt against each evaluation question using the OpenAI API.
+     - For each test question, records the verdict and answer.
+     - Appends the submission as a new row in the Google Sheet with columns:
+         Name, Email, System Prompt, Score, and for each of the 7 test questions: verdict and answer.
+    Returns a result message with evaluation details.
     """
     # Validate email format.
     if not validate_email(email):
         return "Invalid email address. Please enter a valid email."
+    # Check if this email has already been submitted (in-memory).
     if email in submitted_emails:
         return f"Submission already received for {email}. You can only submit once."
+    # Connect to Google Sheet and check if the email already exists.
+    try:
+        sheet = get_google_sheet()
+        email_col = sheet.col_values(2)  # Assumes column 2 contains the email addresses.
+        if email in email_col[1:]:  # Skip header row.
+            return f"Submission already received for {email}. You can only submit once."
+    except Exception as e:
+        print(f"Error accessing Google Sheet: {str(e)}")
+        return f"Error accessing Google Sheet: {str(e)}"
     # Sanitize inputs.
     email = sanitize_input(email)
     name = sanitize_input(name)
     system_prompt = sanitize_prompt(system_prompt)
     score = 0
+    responses = []    # For display output.
+    verdicts = []     # For storing each question's verdict in the sheet.
+    answers_list = [] # For storing each question's answer in the sheet.
+    # Process each evaluation question.
     for item in EVALUATION_QUESTIONS:
         question = item["question"]
         expected = item["expected"]
         try:
             response = client.chat.completions.create(
+                model="gpt-4o-mini",  # Ensure this model identifier matches your deployed model.
                 messages=[
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": question}
                 ]
             )
             answer = response.choices[0].message.content.strip()
+            print("LLM answer:", answer)
         except Exception as e:
             answer = f"Error during OpenAI API call: {str(e)}"
+        verdict = ""
+        # Check if the answer is a valid JSON.
         try:
             parsed_answer = json.loads(answer)
+            answer_to_store = json.dumps(parsed_answer)  # Normalize parsed JSON as string.
         except json.JSONDecodeError as e:
+            verdict = f"Incorrect (Invalid JSON: {str(e)})"
             responses.append(
                 f"Question: {question}\n"
                 f"Answer: {answer}\n"
                 f"Expected: {json.dumps(expected)}\n"
                 f"Result: {verdict}\n"
             )
+            verdicts.append(verdict)
+            answers_list.append(answer)
             continue
+        # Verify that all required keys are present.
         required_keys = ["document_level", "clause_level"]
         missing_keys = [key for key in required_keys if key not in parsed_answer]
         if missing_keys:
                 f"Expected: {json.dumps(expected)}\n"
                 f"Result: {verdict}\n"
             )
+            verdicts.append(verdict)
+            answers_list.append(json.dumps(parsed_answer))
             continue
+        # Compare values for each required key.
         incorrect_values = []
         for key in required_keys:
             if parsed_answer[key] != expected[key]:
                 incorrect_values.append(key)
         if len(incorrect_values) == 2:
             verdict = "Incorrect (Both values are incorrect)"
         elif len(incorrect_values) == 1:
         else:
             score += 1
             verdict = "Correct"
         responses.append(
             f"Question: {question}\n"
             f"Answer: {json.dumps(parsed_answer)}\n"
             f"Expected: {json.dumps(expected)}\n"
             f"Result: {verdict}\n"
         )
+        verdicts.append(verdict)
+        answers_list.append(json.dumps(parsed_answer))
     result_details = "\n".join(responses)
+    # Record this email locally so that subsequent submissions are blocked.
     submitted_emails.add(email)
+    # Prepare the row for Google Sheets:
+    # The row format is: Name, Email, System Prompt, Score, then for each of the 7 test questions: Verdict, Answer.
+    row = [name, email, system_prompt, str(score)]
+    for v, a in zip(verdicts, answers_list):
+        row.extend([v, a])
+    # Append the new row to the Google Sheet.
+    try:
+        sheet.append_row(row)
+    except Exception as e:
+        print(f"Error appending row to Google Sheet: {str(e)}")
+        return f"Error saving submission: {str(e)}"
     return (
         f"Thank you for your submission, {name}!\n\n"
+        f"Your evaluation score is {score} out of {len(EVALUATION_QUESTIONS)}.\n\n"
+        f"Details:\n{result_details}"
     )
 def build_interface():