LinkedinMonitor / posts_categorization.py
GuglielmoTor's picture
Update posts_categorization.py
ae41008 verified
raw
history blame
3.2 kB
import pandas as pd
from groq import Groq
import instructor
from pydantic import BaseModel
import os
api_key = os.getenv('GROQ_API_KEY')
# Create single patched Groq client with instructor for structured output
client = instructor.from_groq(Groq(api_key=api_key), mode=instructor.Mode.JSON)
class SummaryOutput(BaseModel):
summary: str
# Define pydantic schema for classification output
class ClassificationOutput(BaseModel):
category: str
# Summarize post text
def summarize_post(text):
if pd.isna(text) or text is None:
return None
text = str(text)[:2000] # truncate to avoid token overflow
prompt = f"""
Summarize the following LinkedIn post in 5 to 10 words.
Only return the summary inside a JSON field called 'summary'.
Post Text:
\"\"\"{text}\"\"\"
"""
try:
response = client.chat.completions.create(
model="deepseek-r1-distill-llama-70b",
response_model=SummaryOutput,
messages=[
{"role": "system", "content": "You are a precise summarizer. Only return a JSON object with a 'summary' string."},
{"role": "user", "content": prompt}
],
temperature=0.3
)
return response.summary
except Exception as e:
print(f"Summarization error: {e}")
return None
# Classify post summary into structured categories
def classify_post(summary, labels):
if pd.isna(summary) or summary is None:
return None
prompt = f"""
Post Summary: "{summary}"
Available Categories:
{', '.join(labels)}
Task: Choose the single most relevant category from the list above that applies to this summary. Return only one category in a structured JSON format under the field 'category'.
If no category applies, return 'None'.
"""
try:
result = client.chat.completions.create(
model="meta-llama/llama-4-maverick-17b-128e-instruct",
response_model=ClassificationOutput,
messages=[
{"role": "system", "content": "You are a strict classifier. Return only one matching category name under the field 'category'."},
{"role": "user", "content": prompt}
],
temperature=0.3,
max_tokens=60
)
return result.category
except Exception as e:
print(f"Classification error: {e}")
return None
def summarize_and_classify_post(text, labels):
summary = summarize_post(text)
category = classify_post(summary, labels) if summary else None
return {
"summary": summary,
"category": category
}
def batch_summarize_and_classify(posts):
labels = [
"Company Culture and Values",
"Employee Stories and Spotlights",
"Work-Life Balance, Flexibility, and Well-being",
"Diversity, Equity, and Inclusion (DEI)",
"Professional Development and Growth Opportunities",
"Mission, Vision, and Social Responsibility",
"None"
]
results = []
for post in posts:
text = post.get("text")
result = summarize_and_classify_post(text, labels)
results.append(result)
return results