Spaces:
Running
Running
Create task_extraction_model_groq.py
Browse files
features/insight_and_tasks/agents/task_extraction_model_groq.py
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import enum
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
from typing import List, Optional, Literal
|
5 |
+
from pydantic import BaseModel, Field, field_validator, ValidationInfo
|
6 |
+
from datetime import datetime, date
|
7 |
+
|
8 |
+
# Import Groq and instructor for structured output
|
9 |
+
try:
|
10 |
+
from groq import Groq, RateLimitError
|
11 |
+
import instructor
|
12 |
+
except ImportError:
|
13 |
+
print("Warning: 'groq' or 'instructor' library not found. Please install them.")
|
14 |
+
print("Try: pip install groq instructor")
|
15 |
+
Groq = None
|
16 |
+
instructor = None
|
17 |
+
|
18 |
+
|
19 |
+
from features.insight_and_tasks.data_models.tasks import (
|
20 |
+
TaskExtractionOutput,
|
21 |
+
OKR,
|
22 |
+
KeyResult,
|
23 |
+
Task,
|
24 |
+
EffortLevel,
|
25 |
+
TimelineCategory,
|
26 |
+
PriorityLevel,
|
27 |
+
TaskType,
|
28 |
+
DataSubject # Ensure all are imported
|
29 |
+
)
|
30 |
+
|
31 |
+
# --- Groq Client Initialization with Instructor ---
|
32 |
+
# Ensure GROQ_API_KEY is set in your environment variables before running
|
33 |
+
if Groq and instructor:
|
34 |
+
try:
|
35 |
+
api_key = os.getenv('GROQ_API_KEY')
|
36 |
+
if not api_key:
|
37 |
+
raise ValueError("GROQ_API_KEY environment variable not set. Please set it to your Groq API key.")
|
38 |
+
|
39 |
+
# Create a single, patched Groq client for structured output using instructor
|
40 |
+
# Mode.JSON ensures the output is a valid JSON object based on the Pydantic model
|
41 |
+
client = instructor.from_groq(Groq(api_key=api_key), mode=instructor.Mode.JSON)
|
42 |
+
except Exception as e:
|
43 |
+
print(f"Failed to initialize Groq client: {e}")
|
44 |
+
client = None
|
45 |
+
else:
|
46 |
+
client = None
|
47 |
+
|
48 |
+
|
49 |
+
|
50 |
+
# --- Helper Function for Date Calculations (Unchanged) ---
|
51 |
+
def get_quarter_info():
|
52 |
+
"""Calculates current quarter, year, and days remaining in the quarter."""
|
53 |
+
today = date.today()
|
54 |
+
current_year = today.year
|
55 |
+
current_quarter = (today.month - 1) // 3 + 1
|
56 |
+
|
57 |
+
if current_quarter == 1:
|
58 |
+
end_of_quarter_date = date(current_year, 3, 31)
|
59 |
+
elif current_quarter == 2:
|
60 |
+
end_of_quarter_date = date(current_year, 6, 30)
|
61 |
+
elif current_quarter == 3:
|
62 |
+
end_of_quarter_date = date(current_year, 9, 30)
|
63 |
+
else: # current_quarter == 4
|
64 |
+
end_of_quarter_date = date(current_year, 12, 31)
|
65 |
+
|
66 |
+
days_remaining = (end_of_quarter_date - today).days
|
67 |
+
days_remaining = max(0, days_remaining)
|
68 |
+
|
69 |
+
return current_quarter, current_year, days_remaining, today
|
70 |
+
|
71 |
+
# --- Main Task Extraction Function (Refactored for Groq) ---
|
72 |
+
def extract_tasks_from_text_groq(user_text_input: str) -> (Optional[TaskExtractionOutput], int, int, int):
|
73 |
+
"""
|
74 |
+
Extracts tasks from input text using the Groq API and structures them
|
75 |
+
using instructor.
|
76 |
+
|
77 |
+
Args:
|
78 |
+
user_text_input: The text to analyze.
|
79 |
+
|
80 |
+
Returns:
|
81 |
+
A tuple containing:
|
82 |
+
- A TaskExtractionOutput Pydantic model instance, or None on failure.
|
83 |
+
- The current quarter number.
|
84 |
+
- The current year.
|
85 |
+
- The number of days remaining in the quarter.
|
86 |
+
|
87 |
+
Raises:
|
88 |
+
ValueError: If the Groq client is not initialized or if the API call fails.
|
89 |
+
RateLimitError: If the Groq API rate limit is exceeded.
|
90 |
+
"""
|
91 |
+
if not client:
|
92 |
+
raise ValueError("Groq client is not initialized. Check your API key and library installations.")
|
93 |
+
|
94 |
+
quarter, year, days_remaining, current_date_obj = get_quarter_info()
|
95 |
+
|
96 |
+
# The prompt structure remains the same as it is effective.
|
97 |
+
# We explicitly tell the model its role and the structure we expect.
|
98 |
+
prompt = f"""You are a Time-Aware Task Extraction Specialist, an AI expert in meticulously analyzing strategic insights (e.g., from LinkedIn analytics) and transforming them into a structured set of actionable tasks, organized within an Objectives and Key Results (OKRs) framework.
|
99 |
+
|
100 |
+
Your output MUST be a valid JSON object that strictly conforms to the 'TaskExtractionOutput' Pydantic schema.
|
101 |
+
|
102 |
+
CURRENT CONTEXTUAL INFORMATION:
|
103 |
+
- Use this exact string for the 'current_quarter_info' field: 'Q{quarter} {year}, {days_remaining} days remaining'.
|
104 |
+
|
105 |
+
GENERATION RULES:
|
106 |
+
1. Create 1-3 OKR objects based on the input text.
|
107 |
+
2. For each OKR, create 1-3 KeyResult objects (This is MANDATORY).
|
108 |
+
3. For each KeyResult, create 1-3 Task objects (This is MANDATORY).
|
109 |
+
4. Tasks must be specific, actionable, and directly derived from the input text.
|
110 |
+
5. Do not repeat text.
|
111 |
+
6. Ensure the final output is a complete JSON object.
|
112 |
+
|
113 |
+
Now, analyze the following text and generate the structured JSON output:
|
114 |
+
---
|
115 |
+
TEXT TO ANALYZE:
|
116 |
+
{user_text_input}
|
117 |
+
---
|
118 |
+
"""
|
119 |
+
|
120 |
+
try:
|
121 |
+
# Use the instructor-patched client to make the call.
|
122 |
+
# Pass the Pydantic model to `response_model`.
|
123 |
+
# Instructor will handle the validation and parsing automatically.
|
124 |
+
task_output = client.chat.completions.create(
|
125 |
+
model="meta-llama/llama-4-maverick-17b-128e-instruct", # A powerful model available on Groq
|
126 |
+
response_model=TaskExtractionOutput,
|
127 |
+
messages=[
|
128 |
+
{"role": "user", "content": prompt},
|
129 |
+
],
|
130 |
+
temperature=0.1,
|
131 |
+
top_p=0.8,
|
132 |
+
max_retries=3, # Instructor can automatically retry on validation errors
|
133 |
+
)
|
134 |
+
return task_output, quarter, year, days_remaining
|
135 |
+
|
136 |
+
except RateLimitError as e:
|
137 |
+
print(f"Error: Groq API rate limit exceeded. Please wait and try again. Details: {e}")
|
138 |
+
raise # Re-raise the specific error
|
139 |
+
except Exception as e:
|
140 |
+
# This can catch Pydantic validation errors or other API issues.
|
141 |
+
print(f"An unexpected error occurred during the Groq API call or data validation: {e}")
|
142 |
+
raise ValueError(f"Failed to process text with Groq: {e}")
|