Spaces:
Running
Running
File size: 4,813 Bytes
16353a0 87232cf 16353a0 1ef06de 16353a0 4004355 16353a0 1ef06de 16353a0 1ef06de 16353a0 8fc987e 482c776 16353a0 1ef06de 8fc987e 482c776 1ef06de 16353a0 1ef06de 16353a0 1d55124 16353a0 ae41008 16353a0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import pandas as pd
from groq import Groq, RateLimitError
import instructor
from pydantic import BaseModel
import os
api_key = os.getenv('GROQ_API_KEY')
# Create single patched Groq client with instructor for structured output
client = instructor.from_groq(Groq(api_key=api_key), mode=instructor.Mode.JSON)
class SummaryOutput(BaseModel):
summary: str
# Define pydantic schema for classification output
class ClassificationOutput(BaseModel):
category: str
PRIMARY_SUMMARIZER_MODEL = "deepseek-r1-distill-llama-70b"
FALLBACK_SUMMARIZER_MODEL = "llama-3.3-70b-versatile"
# Summarize post text
def summarize_post(text):
if pd.isna(text) or text is None:
return None
text = str(text)[:500] # truncate to avoid token overflow
prompt = f"""
Summarize the following LinkedIn post in 5 to 10 words.
Only return the summary inside a JSON field called 'summary'.
Post Text:
\"\"\"{text}\"\"\"
"""
try:
# Attempt with primary model
print(f"Attempting summarization with primary model: {PRIMARY_SUMMARIZER_MODEL}")
response = client.chat.completions.create(
model=PRIMARY_SUMMARIZER_MODEL,
response_model=SummaryOutput,
messages=[
{"role": "system", "content": "You are a precise summarizer. Only return a JSON object with a 'summary' string."},
{"role": "user", "content": prompt}
],
temperature=0.3
)
return response.summary
except RateLimitError:
print(f"Rate limit hit for primary summarizer model: {PRIMARY_SUMMARIZER_MODEL}. Trying fallback: {FALLBACK_SUMMARIZER_MODEL}")
try:
# Attempt with fallback model
response = client.chat.completions.create(
model=FALLBACK_SUMMARIZER_MODEL,
response_model=SummaryOutput,
messages=[
{"role": "system", "content": "You are a precise summarizer. Only return a JSON object with a 'summary' string."},
{"role": "user", "content": prompt}
],
temperature=0.3 # Keep temperature consistent or adjust as needed for fallback
)
print(f"Summarization successful with fallback model: {FALLBACK_SUMMARIZER_MODEL}")
return response.summary
except RateLimitError as rle_fallback:
print(f"Rate limit hit for fallback summarizer model ({FALLBACK_SUMMARIZER_MODEL}): {rle_fallback}. Summarization failed.")
return None
except Exception as e_fallback:
print(f"Error during summarization with fallback model ({FALLBACK_SUMMARIZER_MODEL}): {e_fallback}")
return None
except Exception as e_primary:
print(f"Error during summarization with primary model ({PRIMARY_SUMMARIZER_MODEL}): {e_primary}")
# You could also try fallback here for non-rate-limit errors if desired
return None
# Classify post summary into structured categories
def classify_post(summary, labels):
if pd.isna(summary) or summary is None:
return None
prompt = f"""
Post Summary: "{summary}"
Available Categories:
{', '.join(labels)}
Task: Choose the single most relevant category from the list above that applies to this summary. Return only one category in a structured JSON format under the field 'category'.
If no category applies, return 'None'.
"""
try:
result = client.chat.completions.create(
model="meta-llama/llama-4-maverick-17b-128e-instruct",
response_model=ClassificationOutput,
messages=[
{"role": "system", "content": "You are a strict classifier. Return only one matching category name under the field 'category'."},
{"role": "user", "content": prompt}
],
temperature=0
)
return result.category
except Exception as e:
print(f"Classification error: {e}")
return None
def summarize_and_classify_post(text, labels):
summary = summarize_post(text)
category = classify_post(summary, labels) if summary else None
return {
"summary": summary,
"category": category
}
def batch_summarize_and_classify(posts):
labels = [
"Company Culture and Values",
"Employee Stories and Spotlights",
"Work-Life Balance, Flexibility, and Well-being",
"Diversity, Equity, and Inclusion (DEI)",
"Professional Development and Growth Opportunities",
"Mission, Vision, and Social Responsibility",
"None"
]
results = []
for post in posts:
text = post.get("text")
result = summarize_and_classify_post(text, labels)
results.append(result)
return results
|