Spaces:
Running
Running
File size: 14,687 Bytes
821e9b3 3f8b483 821e9b3 9ed8b92 b3013af 5521e44 9ed8b92 b3013af 9ed8b92 5521e44 b3013af 821e9b3 3f8b483 b3013af 3f8b483 821e9b3 3f8b483 54a0bc8 c1cd0b6 54a0bc8 3f8b483 47934fb b3013af 3f8b483 b3013af 3f8b483 54a0bc8 3f8b483 54a0bc8 5521e44 b3013af 821e9b3 5521e44 821e9b3 b3013af 821e9b3 b3013af bee7793 b3013af bee7793 f5e6b21 b3013af 9ed8b92 821e9b3 9ed8b92 821e9b3 b3013af 9ed8b92 b3013af 9ed8b92 b3013af 9ed8b92 821e9b3 bee7793 b3013af 821e9b3 b3013af 821e9b3 54a0bc8 821e9b3 9ed8b92 b3013af 821e9b3 9ed8b92 821e9b3 b3013af 122c32d b3013af 122c32d b3013af 122c32d b3013af 122c32d b3013af 122c32d b3013af 122c32d b3013af 122c32d b3013af 122c32d 821e9b3 b3013af 821e9b3 122c32d 821e9b3 b3013af 821e9b3 b3013af 9ed8b92 b3013af 9ed8b92 5521e44 821e9b3 9ed8b92 821e9b3 54a0bc8 1feb2ff 5771b1d 54a0bc8 df2b29c 54a0bc8 1feb2ff 54a0bc8 1feb2ff 54a0bc8 1feb2ff 54a0bc8 1feb2ff 54a0bc8 a032499 54a0bc8 1feb2ff 54a0bc8 1feb2ff 54a0bc8 1feb2ff 54a0bc8 1feb2ff 54a0bc8 1feb2ff 54a0bc8 5771b1d 54a0bc8 5771b1d 54a0bc8 1feb2ff 54a0bc8 9ed8b92 821e9b3 7446fcd 821e9b3 9ed8b92 821e9b3 9ed8b92 821e9b3 9ed8b92 821e9b3 9ed8b92 821e9b3 54a0bc8 9ed8b92 821e9b3 5521e44 821e9b3 54a0bc8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 |
import os
import re
import json
import gradio as gr
from openai import OpenAI
import gspread
from google.oauth2.service_account import Credentials
SCOPES = [
"https://www.googleapis.com/auth/spreadsheets",
"https://www.googleapis.com/auth/drive"
]
# Initialize the OpenAI client with the API key from environment variables.
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
# In-memory set to track submitted emails (this resets when the app restarts).
submitted_emails = set()
def get_google_sheet():
"""
Connects to the Google Sheet using service account credentials stored
in the environment variable "GOOGLE_CREDS_JSON" and returns the worksheet
named "Submissions" from the spreadsheet identified by "SPREADSHEET_ID".
"""
creds = Credentials.from_service_account_info(
json.loads(os.environ["GOOGLE_CREDS_JSON"]),
scopes=SCOPES
)
gc = gspread.authorize(creds)
sh = gc.open_by_key(os.environ["SPREADSHEET_ID"])
worksheet = sh.worksheet("Submissions")
return worksheet
def get_evaluation_questions():
"""
Loads evaluation questions and expected answers from environment variables.
Expected environment variables:
- TEST_QUESTION_1: a JSON array of user query strings.
- TEST_EXPECTED_1: a JSON array of JSON-like strings representing expected outputs.
Both lists must be of equal length.
"""
questions_str = os.environ.get("TEST_QUESTION_1")
docs_str = os.environ.get("TEST_DOCUMENTS_1")
expected_str = os.environ.get("TEST_EXPECTED_1")
if not questions_str or not expected_str or not docs_str:
return []
try:
questions_list = json.loads(questions_str)
except Exception as e:
print(f"Error parsing questions: {str(e)}")
return []
try:
expected_list = json.loads(expected_str)
except Exception as e:
print(f"Error parsing expected answers: {str(e)}")
return []
try:
docs_list = json.loads(docs_str)
except Exception as e:
print(f"Error parsing documents: {str(e)}")
return []
# Ensure all lists are of the same length.
if len(questions_list) != len(expected_list) or len(questions_list) != len(docs_list):
print("Mismatch in length: questions list and expected answers list must have the same length.")
return []
return [{"question": q, "expected": e, "docs": d} for q, e, d in zip(questions_list, expected_list, docs_list)]
# Load evaluation questions at startup.
EVALUATION_QUESTIONS = get_evaluation_questions()
def sanitize_input(text):
"""
Sanitizes input to allow only alphanumerics and some punctuation,
then truncates to 500 characters.
"""
clean_text = re.sub(r"[^a-zA-Z0-9\s.,!?@:\-]", "", text)
return clean_text.strip()[:500]
def sanitize_prompt(text):
"""
Sanitizes the system prompt by stripping and limiting its length.
"""
return text.strip()[:8000]
def validate_email(email):
"""
Validates that the provided email is in a valid format.
Returns True if valid, False otherwise.
"""
email_regex = r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$"
return re.match(email_regex, email) is not None
def submit_prompt(email, name, system_prompt):
"""
Handles the full submission process:
- Validates email format.
- Checks if the email has already been used (by in-memory set and Google Sheet).
- Sanitizes input fields.
- Processes the system prompt against each evaluation question using the OpenAI API.
- For each test question, records the verdict and answer.
- Appends the submission as a new row in the Google Sheet with columns:
Name, Email, System Prompt, Score, and for each of the 7 test questions: verdict and answer.
Returns a result message with evaluation details.
"""
# Validate email format.
if not validate_email(email):
return "Invalid email address. Please enter a valid email."
# Check if this email has already been submitted (in-memory).
if email in submitted_emails:
return f"Submission already received for {email}. You can only submit once."
# Connect to Google Sheet and check if the email already exists.
try:
sheet = get_google_sheet()
email_col = sheet.col_values(2) # Assumes column 2 contains the email addresses.
if email in email_col[1:]: # Skip header row.
return f"Submission already received for {email}. You can only submit once."
except Exception as e:
print(f"Error accessing Google Sheet: {str(e)}")
return f"Error accessing Google Sheet: {str(e)}"
# Sanitize inputs.
email = sanitize_input(email)
name = sanitize_input(name)
system_prompt = sanitize_prompt(system_prompt)
score = 0
responses = [] # For display output.
verdicts = [] # For storing each question's verdict in the sheet.
answers_list = [] # For storing each question's answer in the sheet.
# Process each evaluation question.
for item in EVALUATION_QUESTIONS:
question = item["question"]
docs = item["docs"]
expected = item["expected"]
try:
response = client.chat.completions.create(
model="gpt-4o-mini", # Ensure this model identifier matches your deployed model.
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": question}
]
)
answer = response.choices[0].message.content.strip()
except Exception as e:
answer = f"Error during OpenAI API call: {str(e)}"
verdict = ""
# Check if the answer is a valid JSON.
try:
parsed_answer = json.loads(answer)
answer_to_store = json.dumps(parsed_answer) # Normalize parsed JSON as string.
except json.JSONDecodeError as e:
verdict = f"Incorrect (Invalid JSON: {str(e)})"
responses.append(
f"Question: {question}\n"
f"Answer: {answer}\n"
f"Expected: {json.dumps(expected)}\n"
f"Result: {verdict}\n"
)
verdicts.append(verdict)
answers_list.append(answer)
continue
# Verify that all required keys are present.
required_keys = ["document_level", "clause_level"]
missing_keys = [key for key in required_keys if key not in parsed_answer]
if missing_keys:
verdict = f"Incorrect (Missing Keys: {', '.join(missing_keys)})"
responses.append(
f"Question: {question}\n"
f"Answer: {json.dumps(parsed_answer)}\n"
f"Expected: {json.dumps(expected)}\n"
f"Result: {verdict}\n"
)
verdicts.append(verdict)
answers_list.append(json.dumps(parsed_answer))
continue
# Compare values for each required key.
incorrect_values = []
for key in required_keys:
if parsed_answer[key] != expected[key]:
incorrect_values.append(key)
if len(incorrect_values) == 2:
verdict = "Incorrect (Both values are incorrect)"
elif len(incorrect_values) == 1:
verdict = f"Incorrect (Value for key '{incorrect_values[0]}' is incorrect)"
else:
score += 1
verdict = "Correct"
responses.append(
f"Question: {question}\n"
f"Answer: {json.dumps(parsed_answer)}\n"
f"Expected: {json.dumps(expected)}\n"
f"Result: {verdict}\n"
)
verdicts.append(verdict)
answers_list.append(json.dumps(parsed_answer))
result_details = "\n".join(responses)
# Record this email locally so that subsequent submissions are blocked.
submitted_emails.add(email)
# Prepare the row for Google Sheets:
# The row format is: Name, Email, System Prompt, Score, then for each of the 7 test questions: Verdict, Answer.
row = [name, email, system_prompt, str(score)]
for v, a in zip(verdicts, answers_list):
row.extend([v, a])
# Append the new row to the Google Sheet.
try:
sheet.append_row(row)
except Exception as e:
print(f"Error appending row to Google Sheet: {str(e)}")
return f"Error saving submission: {str(e)}"
return (
f"Thank you for your submission, {name}!\n\n"
)
def build_interface():
"""
Constructs the Gradio interface with a submission button and single-submission mechanism.
"""
with gr.Blocks() as demo:
gr.Markdown("# Applicant Task: Target Company & Law Firm Identification")
gr.Markdown("## Identifying Parties, Law Firms, and Target Company Presence")
# General description
gr.Markdown("""
This task involves processing a user query to determine the relevance to the intended task, followed by analyzing textual data to extract information about law firms representing parties (Buyer, Seller, and Third Parties) and verifying the presence of a target company.
The system is designed to sequentially leverage three LLM functions:
**Step 1:** LLM1 determines if the user's query mentions any target company.
- If no target company is found, LLM1 responds with a message wrapped in `<user_message></user_message>` XML tags to inform the user that the query is irrelevant to this task.
- If the query contains a target company, LLM1 moves forward with a formatted acknowledgment of the identified target company.
**Step 2:** LLM2 examines four separate paragraphs provided as input. For each paragraph, it extracts specific information about:
- The Buyer's representative law firm.
- The Seller's representative law firm.
- Any third-party law firm present.
- Whether the target company is mentioned in the paragraph.
Each paragraph's results are formatted and concatenated for the next step.
**Step 3:** LLM3 compiles the information from all analyzed paragraphs and outputs a structured JSON object with the following keys:
```json
{
"buyer_firm": "string",
"seller_firm": "string",
"third_party": "string",
"contains_target_firm": boolean
}
```
The goal is to identify the representative law firms of involved parties and determine if the target company is mentioned, ensuring the results are structured and accurate.
**Key Considerations:**
- The output must adhere to the prescribed JSON format for the final step.
- Ensure the system can accurately extract and classify relevant information from the input paragraphs.
""")
# Example Inputs and Outputs in an Accordion
with gr.Accordion("Example Workflow", open=False):
gr.Markdown("""
**Example Query and System Output:**
**User Query:**
*"Is Kirkland present?"*
Step 1 (LLM1):
- If no target company is identified:
Output: `<user_message>Query is not relevant to the intended task.</user_message>`
- If a target company is identified:
Output: *"The query mentions the target company Kirkland."*
Step 2 (LLM2 for Paragraphs):
**Input Paragraph Example:**
*"Representation agreements between Buyers and Kirkland & Ellis are included."*
**Output:**
*"Buyer Firm: Kirkland & Ellis, Seller Firm: None, Third Party Firm: None, Contains Target Firm: True."*
Step 3 (LLM3 Final Output):
Compiled JSON:
```json
{
"buyer_firm": "Kirkland & Ellis",
"seller_firm": null,
"third_party": null,
"contains_target_firm": true
}
```
""")
# Challenge instructions and testing guidance
with gr.Accordion("Task Instructions and Testing", open=False):
gr.Markdown("""
- Design prompts that ensure proper interaction between the three LLM systems, with each step contributing to the final output.
- Ensure strict adherence to JSON formatting requirements (e.g., no extra characters that may cause JSON parsing errors).
- Test extensively to verify accurate law firm and target company identification.
**Output Requirements:**
- Ensure final LLM3 JSON output has the following keys:
- `"buyer_firm"`
- `"seller_firm"`
- `"third_party"`
- `"contains_target_firm"`
- Values must be accurately extracted or classified based on LLM2's parsed data.
**Hints for Crafting System Prompts:**
- Explicitly specify formatting requirements at each step.
- Clarify the task definitions and expected classifications in each system prompt for LLM1, LLM2, and LLM3.
- Test using diverse sample data for robustness.
You can only submit once, so validate your system prompts thoroughly using mock queries and example data before final submission.
Good Luck!
""")
gr.Markdown("""
Enter your name and email below, as listed in your CV, and submit your designed prompts.
Remember: Focus on clarity, accuracy, and structured responses to achieve a high score!
""")
email_input = gr.Textbox(label="Email", placeholder="[email protected]")
name_input = gr.Textbox(label="First Name, Last Name", placeholder="John, Smith")
system_prompt_input = gr.Textbox(
label="System Prompt",
placeholder="Enter your system prompt here...",
lines=6,
)
submit_button = gr.Button("Submit")
output_text = gr.Textbox(label="Results", lines=15)
submit_button.click(
fn=submit_prompt,
inputs=[email_input, name_input, system_prompt_input],
outputs=output_text,
)
return demo
if __name__ == "__main__":
interface = build_interface()
# Launch the app on 0.0.0.0 so it is accessible externally (e.g., in a container).
interface.launch(server_name="0.0.0.0", server_port=7860) |