Spaces:

GuglielmoTor
/

LinkedinMonitor

Running

App Files Files Community

GuglielmoTor commited on Jun 6

Commit

5f9f384

verified ·

1 Parent(s): 0c7dac6

Create task_extraction_model_groq.py

Browse files

Files changed (1) hide show

features/insight_and_tasks/agents/task_extraction_model_groq.py +142 -0

features/insight_and_tasks/agents/task_extraction_model_groq.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import enum
+import json
+import os
+from typing import List, Optional, Literal
+from pydantic import BaseModel, Field, field_validator, ValidationInfo
+from datetime import datetime, date
+# Import Groq and instructor for structured output
+try:
+    from groq import Groq, RateLimitError
+    import instructor
+except ImportError:
+    print("Warning: 'groq' or 'instructor' library not found. Please install them.")
+    print("Try: pip install groq instructor")
+    Groq = None
+    instructor = None
+from features.insight_and_tasks.data_models.tasks import (
+    TaskExtractionOutput,
+    OKR,
+    KeyResult,
+    Task,
+    EffortLevel,
+    TimelineCategory,
+    PriorityLevel,
+    TaskType,
+    DataSubject # Ensure all are imported
+)
+# --- Groq Client Initialization with Instructor ---
+# Ensure GROQ_API_KEY is set in your environment variables before running
+if Groq and instructor:
+    try:
+        api_key = os.getenv('GROQ_API_KEY')
+        if not api_key:
+            raise ValueError("GROQ_API_KEY environment variable not set. Please set it to your Groq API key.")
+        # Create a single, patched Groq client for structured output using instructor
+        # Mode.JSON ensures the output is a valid JSON object based on the Pydantic model
+        client = instructor.from_groq(Groq(api_key=api_key), mode=instructor.Mode.JSON)
+    except Exception as e:
+        print(f"Failed to initialize Groq client: {e}")
+        client = None
+else:
+    client = None
+# --- Helper Function for Date Calculations (Unchanged) ---
+def get_quarter_info():
+    """Calculates current quarter, year, and days remaining in the quarter."""
+    today = date.today()
+    current_year = today.year
+    current_quarter = (today.month - 1) // 3 + 1
+    if current_quarter == 1:
+        end_of_quarter_date = date(current_year, 3, 31)
+    elif current_quarter == 2:
+        end_of_quarter_date = date(current_year, 6, 30)
+    elif current_quarter == 3:
+        end_of_quarter_date = date(current_year, 9, 30)
+    else:  # current_quarter == 4
+        end_of_quarter_date = date(current_year, 12, 31)
+    days_remaining = (end_of_quarter_date - today).days
+    days_remaining = max(0, days_remaining)
+    return current_quarter, current_year, days_remaining, today
+# --- Main Task Extraction Function (Refactored for Groq) ---
+def extract_tasks_from_text_groq(user_text_input: str) -> (Optional[TaskExtractionOutput], int, int, int):
+    """
+    Extracts tasks from input text using the Groq API and structures them
+    using instructor.
+    Args:
+        user_text_input: The text to analyze.
+    Returns:
+        A tuple containing:
+        - A TaskExtractionOutput Pydantic model instance, or None on failure.
+        - The current quarter number.
+        - The current year.
+        - The number of days remaining in the quarter.
+    Raises:
+        ValueError: If the Groq client is not initialized or if the API call fails.
+        RateLimitError: If the Groq API rate limit is exceeded.
+    """
+    if not client:
+        raise ValueError("Groq client is not initialized. Check your API key and library installations.")
+    quarter, year, days_remaining, current_date_obj = get_quarter_info()
+    # The prompt structure remains the same as it is effective.
+    # We explicitly tell the model its role and the structure we expect.
+    prompt = f"""You are a Time-Aware Task Extraction Specialist, an AI expert in meticulously analyzing strategic insights (e.g., from LinkedIn analytics) and transforming them into a structured set of actionable tasks, organized within an Objectives and Key Results (OKRs) framework.
+Your output MUST be a valid JSON object that strictly conforms to the 'TaskExtractionOutput' Pydantic schema.
+CURRENT CONTEXTUAL INFORMATION:
+- Use this exact string for the 'current_quarter_info' field: 'Q{quarter} {year}, {days_remaining} days remaining'.
+GENERATION RULES:
+1.  Create 1-3 OKR objects based on the input text.
+2.  For each OKR, create 1-3 KeyResult objects (This is MANDATORY).
+3.  For each KeyResult, create 1-3 Task objects (This is MANDATORY).
+4.  Tasks must be specific, actionable, and directly derived from the input text.
+5.  Do not repeat text.
+6.  Ensure the final output is a complete JSON object.
+Now, analyze the following text and generate the structured JSON output:
+---
+TEXT TO ANALYZE:
+{user_text_input}
+---
+"""
+    try:
+        # Use the instructor-patched client to make the call.
+        # Pass the Pydantic model to `response_model`.
+        # Instructor will handle the validation and parsing automatically.
+        task_output = client.chat.completions.create(
+            model="meta-llama/llama-4-maverick-17b-128e-instruct",  # A powerful model available on Groq
+            response_model=TaskExtractionOutput,
+            messages=[
+                {"role": "user", "content": prompt},
+            ],
+            temperature=0.1,
+            top_p=0.8,
+            max_retries=3, # Instructor can automatically retry on validation errors
+        )
+        return task_output, quarter, year, days_remaining
+    except RateLimitError as e:
+        print(f"Error: Groq API rate limit exceeded. Please wait and try again. Details: {e}")
+        raise  # Re-raise the specific error
+    except Exception as e:
+        # This can catch Pydantic validation errors or other API issues.
+        print(f"An unexpected error occurred during the Groq API call or data validation: {e}")
+        raise ValueError(f"Failed to process text with Groq: {e}")