File size: 3,202 Bytes
16353a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae41008
16353a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import pandas as pd
from groq import Groq
import instructor
from pydantic import BaseModel
import os

api_key = os.getenv('GROQ_API_KEY')

# Create single patched Groq client with instructor for structured output
client = instructor.from_groq(Groq(api_key=api_key), mode=instructor.Mode.JSON)

class SummaryOutput(BaseModel):
    summary: str

# Define pydantic schema for classification output
class ClassificationOutput(BaseModel):
    category: str

# Summarize post text
def summarize_post(text):
    if pd.isna(text) or text is None:
        return None

    text = str(text)[:2000]  # truncate to avoid token overflow

    prompt = f"""
    Summarize the following LinkedIn post in 5 to 10 words.
    Only return the summary inside a JSON field called 'summary'.

    Post Text:
    \"\"\"{text}\"\"\"
    """

    try:
        response = client.chat.completions.create(
            model="deepseek-r1-distill-llama-70b",
            response_model=SummaryOutput,
            messages=[
                {"role": "system", "content": "You are a precise summarizer. Only return a JSON object with a 'summary' string."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.3
        )
        return response.summary
    except Exception as e:
        print(f"Summarization error: {e}")
        return None


# Classify post summary into structured categories
def classify_post(summary, labels):
    if pd.isna(summary) or summary is None:
        return None

    prompt = f"""
    Post Summary: "{summary}"

    Available Categories:
    {', '.join(labels)}

    Task: Choose the single most relevant category from the list above that applies to this summary. Return only one category in a structured JSON format under the field 'category'.
    If no category applies, return 'None'.
    """
    try:
        result = client.chat.completions.create(
            model="meta-llama/llama-4-maverick-17b-128e-instruct",
            response_model=ClassificationOutput,
            messages=[
                {"role": "system", "content": "You are a strict classifier. Return only one matching category name under the field 'category'."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.3,
            max_tokens=60
        )
        return result.category
    except Exception as e:
        print(f"Classification error: {e}")
        return None

def summarize_and_classify_post(text, labels):
    summary = summarize_post(text)
    category = classify_post(summary, labels) if summary else None
    return {
        "summary": summary,
        "category": category
    }

def batch_summarize_and_classify(posts):

    labels = [
    "Company Culture and Values",
    "Employee Stories and Spotlights",
    "Work-Life Balance, Flexibility, and Well-being",
    "Diversity, Equity, and Inclusion (DEI)",
    "Professional Development and Growth Opportunities",
    "Mission, Vision, and Social Responsibility",
    "None"
    ]
    
    results = []
    for post in posts:
        text = post.get("text")
        result = summarize_and_classify_post(text, labels)
        results.append(result)
    return results