Spaces:
Running
Running
import pandas as pd | |
from groq import Groq | |
import instructor | |
from pydantic import BaseModel | |
import os | |
api_key = os.getenv('GROQ_API_KEY') | |
# Create single patched Groq client with instructor for structured output | |
client = instructor.from_groq(Groq(api_key=api_key), mode=instructor.Mode.JSON) | |
class SummaryOutput(BaseModel): | |
summary: str | |
# Define pydantic schema for classification output | |
class ClassificationOutput(BaseModel): | |
category: str | |
# Summarize post text | |
def summarize_post(text): | |
if pd.isna(text) or text is None: | |
return None | |
text = str(text)[:2000] # truncate to avoid token overflow | |
prompt = f""" | |
Summarize the following LinkedIn post in 5 to 10 words. | |
Only return the summary inside a JSON field called 'summary'. | |
Post Text: | |
\"\"\"{text}\"\"\" | |
""" | |
try: | |
response = client.chat.completions.create( | |
model="deepseek-r1-distill-llama-70b", | |
response_model=SummaryOutput, | |
messages=[ | |
{"role": "system", "content": "You are a precise summarizer. Only return a JSON object with a 'summary' string."}, | |
{"role": "user", "content": prompt} | |
], | |
temperature=0.3 | |
) | |
return response.summary | |
except Exception as e: | |
print(f"Summarization error: {e}") | |
return None | |
# Classify post summary into structured categories | |
def classify_post(summary, labels): | |
if pd.isna(summary) or summary is None: | |
return None | |
prompt = f""" | |
Post Summary: "{summary}" | |
Available Categories: | |
{', '.join(labels)} | |
Task: Choose the single most relevant category from the list above that applies to this summary. Return only one category in a structured JSON format under the field 'category'. | |
If no category applies, return 'None'. | |
""" | |
try: | |
result = client.chat.completions.create( | |
model="meta-llama/llama-4-maverick-17b-128e-instruct", | |
response_model=ClassificationOutput, | |
messages=[ | |
{"role": "system", "content": "You are a strict classifier. Return only one matching category name under the field 'category'."}, | |
{"role": "user", "content": prompt} | |
], | |
temperature=0.3, | |
max_tokens=60 | |
) | |
return result.category | |
except Exception as e: | |
print(f"Classification error: {e}") | |
return None | |
def summarize_and_classify_post(text, labels): | |
summary = summarize_post(text) | |
category = classify_post(summary, labels) if summary else None | |
return { | |
"summary": summary, | |
"category": category | |
} | |
def batch_summarize_and_classify(posts): | |
labels = [ | |
"Company Culture and Values", | |
"Employee Stories and Spotlights", | |
"Work-Life Balance, Flexibility, and Well-being", | |
"Diversity, Equity, and Inclusion (DEI)", | |
"Professional Development and Growth Opportunities", | |
"Mission, Vision, and Social Responsibility", | |
"None" | |
] | |
results = [] | |
for post in posts: | |
text = post.get("text") | |
result = summarize_and_classify_post(text, labels) | |
results.append(result) | |
return results | |