Spaces:

GuglielmoTor
/

LinkedinMonitor

Running

File size: 4,813 Bytes

import pandas as pd
from groq import Groq, RateLimitError
import instructor
from pydantic import BaseModel
import os

api_key = os.getenv('GROQ_API_KEY')

# Create single patched Groq client with instructor for structured output
client = instructor.from_groq(Groq(api_key=api_key), mode=instructor.Mode.JSON)

class SummaryOutput(BaseModel):
    summary: str

# Define pydantic schema for classification output
class ClassificationOutput(BaseModel):
    category: str

PRIMARY_SUMMARIZER_MODEL = "deepseek-r1-distill-llama-70b" 
FALLBACK_SUMMARIZER_MODEL = "llama-3.3-70b-versatile"

# Summarize post text
def summarize_post(text):
    if pd.isna(text) or text is None:
        return None

    text = str(text)[:500]  # truncate to avoid token overflow

    prompt = f"""
    Summarize the following LinkedIn post in 5 to 10 words.
    Only return the summary inside a JSON field called 'summary'.

    Post Text:
    \"\"\"{text}\"\"\"
    """

    try:
        # Attempt with primary model
        print(f"Attempting summarization with primary model: {PRIMARY_SUMMARIZER_MODEL}")
        response = client.chat.completions.create(
            model=PRIMARY_SUMMARIZER_MODEL,
            response_model=SummaryOutput,
            messages=[
                {"role": "system", "content": "You are a precise summarizer. Only return a JSON object with a 'summary' string."},
                {"role": "user", "content": prompt}
                ],
            temperature=0.3
        )
        return response.summary
    except RateLimitError:
        print(f"Rate limit hit for primary summarizer model: {PRIMARY_SUMMARIZER_MODEL}. Trying fallback: {FALLBACK_SUMMARIZER_MODEL}")
        try:
            # Attempt with fallback model
            response = client.chat.completions.create(
                model=FALLBACK_SUMMARIZER_MODEL,
                response_model=SummaryOutput,
                messages=[
                    {"role": "system", "content": "You are a precise summarizer. Only return a JSON object with a 'summary' string."},
                    {"role": "user", "content": prompt}
                    ],
                temperature=0.3 # Keep temperature consistent or adjust as needed for fallback
            )
            print(f"Summarization successful with fallback model: {FALLBACK_SUMMARIZER_MODEL}")
            return response.summary
        except RateLimitError as rle_fallback:
            print(f"Rate limit hit for fallback summarizer model ({FALLBACK_SUMMARIZER_MODEL}): {rle_fallback}. Summarization failed.")
            return None
        except Exception as e_fallback:
            print(f"Error during summarization with fallback model ({FALLBACK_SUMMARIZER_MODEL}): {e_fallback}")
            return None
    except Exception as e_primary:
        print(f"Error during summarization with primary model ({PRIMARY_SUMMARIZER_MODEL}): {e_primary}")
        # You could also try fallback here for non-rate-limit errors if desired
        return None



# Classify post summary into structured categories
def classify_post(summary, labels):
    if pd.isna(summary) or summary is None:
        return None

    prompt = f"""
    Post Summary: "{summary}"

    Available Categories:
    {', '.join(labels)}

    Task: Choose the single most relevant category from the list above that applies to this summary. Return only one category in a structured JSON format under the field 'category'.
    If no category applies, return 'None'.
    """
    try:
        result = client.chat.completions.create(
            model="meta-llama/llama-4-maverick-17b-128e-instruct",
            response_model=ClassificationOutput,
            messages=[
                {"role": "system", "content": "You are a strict classifier. Return only one matching category name under the field 'category'."},
                {"role": "user", "content": prompt}
            ],
            temperature=0
        )
        return result.category
    except Exception as e:
        print(f"Classification error: {e}")
        return None

def summarize_and_classify_post(text, labels):
    summary = summarize_post(text)
    category = classify_post(summary, labels) if summary else None
    return {
        "summary": summary,
        "category": category
    }

def batch_summarize_and_classify(posts):

    labels = [
    "Company Culture and Values",
    "Employee Stories and Spotlights",
    "Work-Life Balance, Flexibility, and Well-being",
    "Diversity, Equity, and Inclusion (DEI)",
    "Professional Development and Growth Opportunities",
    "Mission, Vision, and Social Responsibility",
    "None"
    ]
    
    results = []
    for post in posts:
        text = post.get("text")
        result = summarize_and_classify_post(text, labels)
        results.append(result)
    return results