Spaces:
Running
Running
File size: 3,202 Bytes
16353a0 ae41008 16353a0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import pandas as pd
from groq import Groq
import instructor
from pydantic import BaseModel
import os
api_key = os.getenv('GROQ_API_KEY')
# Create single patched Groq client with instructor for structured output
client = instructor.from_groq(Groq(api_key=api_key), mode=instructor.Mode.JSON)
class SummaryOutput(BaseModel):
summary: str
# Define pydantic schema for classification output
class ClassificationOutput(BaseModel):
category: str
# Summarize post text
def summarize_post(text):
if pd.isna(text) or text is None:
return None
text = str(text)[:2000] # truncate to avoid token overflow
prompt = f"""
Summarize the following LinkedIn post in 5 to 10 words.
Only return the summary inside a JSON field called 'summary'.
Post Text:
\"\"\"{text}\"\"\"
"""
try:
response = client.chat.completions.create(
model="deepseek-r1-distill-llama-70b",
response_model=SummaryOutput,
messages=[
{"role": "system", "content": "You are a precise summarizer. Only return a JSON object with a 'summary' string."},
{"role": "user", "content": prompt}
],
temperature=0.3
)
return response.summary
except Exception as e:
print(f"Summarization error: {e}")
return None
# Classify post summary into structured categories
def classify_post(summary, labels):
if pd.isna(summary) or summary is None:
return None
prompt = f"""
Post Summary: "{summary}"
Available Categories:
{', '.join(labels)}
Task: Choose the single most relevant category from the list above that applies to this summary. Return only one category in a structured JSON format under the field 'category'.
If no category applies, return 'None'.
"""
try:
result = client.chat.completions.create(
model="meta-llama/llama-4-maverick-17b-128e-instruct",
response_model=ClassificationOutput,
messages=[
{"role": "system", "content": "You are a strict classifier. Return only one matching category name under the field 'category'."},
{"role": "user", "content": prompt}
],
temperature=0.3,
max_tokens=60
)
return result.category
except Exception as e:
print(f"Classification error: {e}")
return None
def summarize_and_classify_post(text, labels):
summary = summarize_post(text)
category = classify_post(summary, labels) if summary else None
return {
"summary": summary,
"category": category
}
def batch_summarize_and_classify(posts):
labels = [
"Company Culture and Values",
"Employee Stories and Spotlights",
"Work-Life Balance, Flexibility, and Well-being",
"Diversity, Equity, and Inclusion (DEI)",
"Professional Development and Growth Opportunities",
"Mission, Vision, and Social Responsibility",
"None"
]
results = []
for post in posts:
text = post.get("text")
result = summarize_and_classify_post(text, labels)
results.append(result)
return results
|