Spaces:
Running
Running
arvind6599
commited on
Commit
·
54a0bc8
1
Parent(s):
7446fcd
New description
Browse files
app.py
CHANGED
@@ -43,8 +43,10 @@ def get_evaluation_questions():
|
|
43 |
Both lists must be of equal length.
|
44 |
"""
|
45 |
questions_str = os.environ.get("TEST_QUESTION_1")
|
|
|
46 |
expected_str = os.environ.get("TEST_EXPECTED_1")
|
47 |
-
|
|
|
48 |
return []
|
49 |
try:
|
50 |
questions_list = json.loads(questions_str)
|
@@ -56,10 +58,18 @@ def get_evaluation_questions():
|
|
56 |
except Exception as e:
|
57 |
print(f"Error parsing expected answers: {str(e)}")
|
58 |
return []
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
print("Mismatch in length: questions list and expected answers list must have the same length.")
|
61 |
return []
|
62 |
-
|
|
|
63 |
|
64 |
# Load evaluation questions at startup.
|
65 |
EVALUATION_QUESTIONS = get_evaluation_questions()
|
@@ -129,6 +139,7 @@ def submit_prompt(email, name, system_prompt):
|
|
129 |
# Process each evaluation question.
|
130 |
for item in EVALUATION_QUESTIONS:
|
131 |
question = item["question"]
|
|
|
132 |
expected = item["expected"]
|
133 |
try:
|
134 |
response = client.chat.completions.create(
|
@@ -224,122 +235,109 @@ def build_interface():
|
|
224 |
Constructs the Gradio interface with a submission button and single-submission mechanism.
|
225 |
"""
|
226 |
with gr.Blocks() as demo:
|
227 |
-
gr.Markdown("# Applicant Task:
|
228 |
-
gr.Markdown("##
|
|
|
229 |
# General description
|
230 |
gr.Markdown("""
|
231 |
-
|
232 |
-
1. **Document Level**: Determines whether the query refers to a single document or multiple documents.
|
233 |
-
2. **Clause Level**: Identifies whether the query is focused on:
|
234 |
-
- A single clause,
|
235 |
-
- Multiple clauses, or
|
236 |
-
- General information not constrained to any specific clause.
|
237 |
|
238 |
-
|
239 |
|
240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
|
242 |
-
```
|
243 |
{
|
244 |
-
|
245 |
-
|
|
|
|
|
246 |
}
|
247 |
```
|
248 |
|
249 |
-
The goal is to
|
250 |
|
251 |
-
|
|
|
|
|
252 |
""")
|
253 |
|
254 |
# Example Inputs and Outputs in an Accordion
|
255 |
-
with gr.Accordion("Example
|
256 |
gr.Markdown("""
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
5. **User Message Example 5:**
|
286 |
-
- *"Tell me about the company’s financials."*
|
287 |
-
- **Expected Output:**
|
288 |
-
```
|
289 |
-
{"document_level": "single", "clause_level": "general"}
|
290 |
-
```
|
291 |
-
|
292 |
-
6. **User Message Example 6:**
|
293 |
-
- *"Provide all contracts and their confidentiality clauses."*
|
294 |
-
- **Expected Output:**
|
295 |
-
```
|
296 |
-
{"document_level": "multiple", "clause_level": "multiple"}
|
297 |
-
```
|
298 |
-
|
299 |
-
7. **User Message Example 7:**
|
300 |
-
- *"Extract the arbitration clause from this service agreement."*
|
301 |
-
- **Expected Output:**
|
302 |
-
```
|
303 |
-
{"document_level": "single", "clause_level": "single"}
|
304 |
```
|
305 |
""")
|
306 |
|
307 |
-
# Challenge instructions
|
308 |
-
with gr.Accordion("Task Instructions", open=False):
|
309 |
gr.Markdown("""
|
310 |
-
- Design
|
311 |
-
|
312 |
-
|
313 |
-
1. Specify formatting requirements (e.g., *"Output must be a valid JSON object"*).
|
314 |
-
- Note that we are not using constrained decoding or any sort of JSON mode; if not correctly prompted, the LLM will output plain text.
|
315 |
-
- All LLM responses will be passed to json.loads(response), responses that fail the json parsing are deemed incorrect (beware of tripple backtricks etc.)
|
316 |
-
2. Emphasize strict adherence to classification definitions:
|
317 |
-
- *Single Document:* Refers to one document.
|
318 |
-
- *Multiple Documents:* Refers to more than one document.
|
319 |
-
- *Single Clause:* Refers to one specific clause.
|
320 |
-
- *Multiple Clauses:* Refers to more than one specific clause.
|
321 |
-
- *General Information:* Refers to general content not tied to specific clauses.
|
322 |
|
323 |
-
**
|
|
|
|
|
|
|
|
|
|
|
|
|
324 |
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
|
|
330 |
|
331 |
Good Luck!
|
332 |
-
|
333 |
-
|
|
|
|
|
334 |
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
You can only submit once, try to test and build out your system prompt using gpt4o-mini with temp=1 before submitting your solution.
|
339 |
-
|
340 |
-
We look forward to your submission!
|
341 |
-
"""
|
342 |
-
)
|
343 |
|
344 |
email_input = gr.Textbox(label="Email", placeholder="[email protected]")
|
345 |
name_input = gr.Textbox(label="First Name, Last Name", placeholder="John, Smith")
|
@@ -356,10 +354,12 @@ def build_interface():
|
|
356 |
inputs=[email_input, name_input, system_prompt_input],
|
357 |
outputs=output_text,
|
358 |
)
|
|
|
|
|
359 |
|
360 |
return demo
|
361 |
|
362 |
if __name__ == "__main__":
|
363 |
interface = build_interface()
|
364 |
# Launch the app on 0.0.0.0 so it is accessible externally (e.g., in a container).
|
365 |
-
interface.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
43 |
Both lists must be of equal length.
|
44 |
"""
|
45 |
questions_str = os.environ.get("TEST_QUESTION_1")
|
46 |
+
docs_str = os.environ.get("TEST_DOCUMENTS_1")
|
47 |
expected_str = os.environ.get("TEST_EXPECTED_1")
|
48 |
+
|
49 |
+
if not questions_str or not expected_str or not docs_str:
|
50 |
return []
|
51 |
try:
|
52 |
questions_list = json.loads(questions_str)
|
|
|
58 |
except Exception as e:
|
59 |
print(f"Error parsing expected answers: {str(e)}")
|
60 |
return []
|
61 |
+
try:
|
62 |
+
docs_list = json.loads(docs_str)
|
63 |
+
except Exception as e:
|
64 |
+
print(f"Error parsing documents: {str(e)}")
|
65 |
+
return []
|
66 |
+
|
67 |
+
# Ensure all lists are of the same length.
|
68 |
+
if len(questions_list) != len(expected_list) or len(questions_list) != len(docs_list):
|
69 |
print("Mismatch in length: questions list and expected answers list must have the same length.")
|
70 |
return []
|
71 |
+
|
72 |
+
return [{"question": q, "expected": e, "docs": d} for q, e, d in zip(questions_list, expected_list, docs_list)]
|
73 |
|
74 |
# Load evaluation questions at startup.
|
75 |
EVALUATION_QUESTIONS = get_evaluation_questions()
|
|
|
139 |
# Process each evaluation question.
|
140 |
for item in EVALUATION_QUESTIONS:
|
141 |
question = item["question"]
|
142 |
+
docs = item["docs"]
|
143 |
expected = item["expected"]
|
144 |
try:
|
145 |
response = client.chat.completions.create(
|
|
|
235 |
Constructs the Gradio interface with a submission button and single-submission mechanism.
|
236 |
"""
|
237 |
with gr.Blocks() as demo:
|
238 |
+
gr.Markdown("# Applicant Task: Target Company & Law Firm Identification")
|
239 |
+
gr.Markdown("## Identifying Parties, Law Firms, and Target Company Presence")
|
240 |
+
|
241 |
# General description
|
242 |
gr.Markdown("""
|
243 |
+
This task involves processing a user query to determine the relevance to the intended task, followed by analyzing textual data to extract information about law firms representing parties (Buyer, Seller, and Third Parties) and verifying the presence of a target company.
|
|
|
|
|
|
|
|
|
|
|
244 |
|
245 |
+
The system is designed to sequentially leverage three LLM functions:
|
246 |
|
247 |
+
**Step 1:** LLM1 determines if the user's query mentions any target company.
|
248 |
+
- If no target company is found, LLM1 responds with a message wrapped in `<user_message></user_message>` XML tags to inform the user that the query is irrelevant to this task.
|
249 |
+
- If the query contains a target company, LLM1 moves forward with a formatted acknowledgment of the identified target company.
|
250 |
+
|
251 |
+
**Step 2:** LLM2 examines four separate paragraphs provided as input. For each paragraph, it extracts specific information about:
|
252 |
+
- The Buyer's representative law firm.
|
253 |
+
- The Seller's representative law firm.
|
254 |
+
- Any third-party law firm present.
|
255 |
+
- Whether the target company is mentioned in the paragraph.
|
256 |
+
Each paragraph's results are formatted and concatenated for the next step.
|
257 |
+
|
258 |
+
**Step 3:** LLM3 compiles the information from all analyzed paragraphs and outputs a structured JSON object with the following keys:
|
259 |
|
260 |
+
```json
|
261 |
{
|
262 |
+
"buyer_firm": "string",
|
263 |
+
"seller_firm": "string",
|
264 |
+
"third_party": "string",
|
265 |
+
"contains_target_firm": boolean
|
266 |
}
|
267 |
```
|
268 |
|
269 |
+
The goal is to identify the representative law firms of involved parties and determine if the target company is mentioned, ensuring the results are structured and accurate.
|
270 |
|
271 |
+
**Key Considerations:**
|
272 |
+
- The output must adhere to the prescribed JSON format for the final step.
|
273 |
+
- Ensure the system can accurately extract and classify relevant information from the input paragraphs.
|
274 |
""")
|
275 |
|
276 |
# Example Inputs and Outputs in an Accordion
|
277 |
+
with gr.Accordion("Example Workflow", open=False):
|
278 |
gr.Markdown("""
|
279 |
+
**Example Query and System Output:**
|
280 |
+
|
281 |
+
**User Query:**
|
282 |
+
*"Is Kirkland present?"*
|
283 |
+
|
284 |
+
Step 1 (LLM1):
|
285 |
+
- If no target company is identified:
|
286 |
+
Output: `<user_message>Query is not relevant to the intended task.</user_message>`
|
287 |
+
|
288 |
+
- If a target company is identified:
|
289 |
+
Output: *"The query mentions the target company Kirkland."*
|
290 |
+
|
291 |
+
Step 2 (LLM2 for Paragraphs):
|
292 |
+
**Input Paragraph Example:**
|
293 |
+
*"Representation agreements between Buyers and Kirkland & Ellis are included."*
|
294 |
+
|
295 |
+
**Output:**
|
296 |
+
*"Buyer Firm: Kirkland & Ellis, Seller Firm: None, Third Party Firm: None, Contains Target Firm: True."*
|
297 |
+
|
298 |
+
Step 3 (LLM3 Final Output):
|
299 |
+
Compiled JSON:
|
300 |
+
```json
|
301 |
+
{
|
302 |
+
"buyer_firm": "Kirkland & Ellis",
|
303 |
+
"seller_firm": null,
|
304 |
+
"third_party": null,
|
305 |
+
"contains_target_firm": true
|
306 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
307 |
```
|
308 |
""")
|
309 |
|
310 |
+
# Challenge instructions and testing guidance
|
311 |
+
with gr.Accordion("Task Instructions and Testing", open=False):
|
312 |
gr.Markdown("""
|
313 |
+
- Design prompts that ensure proper interaction between the three LLM systems, with each step contributing to the final output.
|
314 |
+
- Ensure strict adherence to JSON formatting requirements (e.g., no extra characters that may cause JSON parsing errors).
|
315 |
+
- Test extensively to verify accurate law firm and target company identification.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
316 |
|
317 |
+
**Output Requirements:**
|
318 |
+
- Ensure final LLM3 JSON output has the following keys:
|
319 |
+
- `"buyer_firm"`
|
320 |
+
- `"seller_firm"`
|
321 |
+
- `"third_party"`
|
322 |
+
- `"contains_target_firm"`
|
323 |
+
- Values must be accurately extracted or classified based on LLM2's parsed data.
|
324 |
|
325 |
+
**Hints for Crafting System Prompts:**
|
326 |
+
- Explicitly specify formatting requirements at each step.
|
327 |
+
- Clarify the task definitions and expected classifications in each system prompt for LLM1, LLM2, and LLM3.
|
328 |
+
- Test using diverse sample data for robustness.
|
329 |
+
|
330 |
+
You can only submit once, so validate your system prompts thoroughly using mock queries and example data before final submission.
|
331 |
|
332 |
Good Luck!
|
333 |
+
""")
|
334 |
+
|
335 |
+
gr.Markdown("""
|
336 |
+
Enter your name and email below, as listed in your CV, and submit your designed prompts.
|
337 |
|
338 |
+
Remember: Focus on clarity, accuracy, and structured responses to achieve a high score!
|
339 |
+
""")
|
340 |
+
|
|
|
|
|
|
|
|
|
|
|
341 |
|
342 |
email_input = gr.Textbox(label="Email", placeholder="[email protected]")
|
343 |
name_input = gr.Textbox(label="First Name, Last Name", placeholder="John, Smith")
|
|
|
354 |
inputs=[email_input, name_input, system_prompt_input],
|
355 |
outputs=output_text,
|
356 |
)
|
357 |
+
|
358 |
+
|
359 |
|
360 |
return demo
|
361 |
|
362 |
if __name__ == "__main__":
|
363 |
interface = build_interface()
|
364 |
# Launch the app on 0.0.0.0 so it is accessible externally (e.g., in a container).
|
365 |
+
interface.launch(server_name="0.0.0.0", server_port=7860)
|