Spaces:
Running
Running
| import pandas as pd | |
| from groq import Groq, RateLimitError | |
| import instructor | |
| from pydantic import BaseModel | |
| import os | |
| api_key = os.getenv('GROQ_API_KEY') | |
| # Create single patched Groq client with instructor for structured output | |
| client = instructor.from_groq(Groq(api_key=api_key), mode=instructor.Mode.JSON) | |
| class SummaryOutput(BaseModel): | |
| summary: str | |
| # Define pydantic schema for classification output | |
| class ClassificationOutput(BaseModel): | |
| category: str | |
| PRIMARY_SUMMARIZER_MODEL = "deepseek-r1-distill-llama-70b" | |
| FALLBACK_SUMMARIZER_MODEL = "llama-3.3-70b-versatile" | |
| # Summarize post text | |
| def summarize_post(text): | |
| if pd.isna(text) or text is None: | |
| return None | |
| text = str(text)[:500] # truncate to avoid token overflow | |
| prompt = f""" | |
| Summarize the following LinkedIn post in 5 to 10 words. | |
| Only return the summary inside a JSON field called 'summary'. | |
| Post Text: | |
| \"\"\"{text}\"\"\" | |
| """ | |
| try: | |
| # Attempt with primary model | |
| print(f"Attempting summarization with primary model: {PRIMARY_SUMMARIZER_MODEL}") | |
| response = client.chat.completions.create( | |
| model=PRIMARY_SUMMARIZER_MODEL, | |
| response_model=SummaryOutput, | |
| messages=[ | |
| {"role": "system", "content": "You are a precise summarizer. Only return a JSON object with a 'summary' string."}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| temperature=0.3 | |
| ) | |
| return response.summary | |
| except RateLimitError: | |
| print(f"Rate limit hit for primary summarizer model: {PRIMARY_SUMMARIZER_MODEL}. Trying fallback: {FALLBACK_SUMMARIZER_MODEL}") | |
| try: | |
| # Attempt with fallback model | |
| response = client.chat.completions.create( | |
| model=FALLBACK_SUMMARIZER_MODEL, | |
| response_model=SummaryOutput, | |
| messages=[ | |
| {"role": "system", "content": "You are a precise summarizer. Only return a JSON object with a 'summary' string."}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| temperature=0.3 # Keep temperature consistent or adjust as needed for fallback | |
| ) | |
| print(f"Summarization successful with fallback model: {FALLBACK_SUMMARIZER_MODEL}") | |
| return response.summary | |
| except RateLimitError as rle_fallback: | |
| print(f"Rate limit hit for fallback summarizer model ({FALLBACK_SUMMARIZER_MODEL}): {rle_fallback}. Summarization failed.") | |
| return None | |
| except Exception as e_fallback: | |
| print(f"Error during summarization with fallback model ({FALLBACK_SUMMARIZER_MODEL}): {e_fallback}") | |
| return None | |
| except Exception as e_primary: | |
| print(f"Error during summarization with primary model ({PRIMARY_SUMMARIZER_MODEL}): {e_primary}") | |
| # You could also try fallback here for non-rate-limit errors if desired | |
| return None | |
| # Classify post summary into structured categories | |
| def classify_post(summary, labels): | |
| if pd.isna(summary) or summary is None: | |
| return None | |
| prompt = f""" | |
| Post Summary: "{summary}" | |
| Available Categories: | |
| {', '.join(labels)} | |
| Task: Choose the single most relevant category from the list above that applies to this summary. Return only one category in a structured JSON format under the field 'category'. | |
| If no category applies, return 'None'. | |
| """ | |
| try: | |
| result = client.chat.completions.create( | |
| model="meta-llama/llama-4-maverick-17b-128e-instruct", | |
| response_model=ClassificationOutput, | |
| messages=[ | |
| {"role": "system", "content": "You are a strict classifier. Return only one matching category name under the field 'category'."}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| temperature=0 | |
| ) | |
| return result.category | |
| except Exception as e: | |
| print(f"Classification error: {e}") | |
| return None | |
| def summarize_and_classify_post(text, labels): | |
| summary = summarize_post(text) | |
| category = classify_post(summary, labels) if summary else None | |
| return { | |
| "summary": summary, | |
| "category": category | |
| } | |
| def batch_summarize_and_classify(posts): | |
| labels = [ | |
| "Company Culture and Values", | |
| "Employee Stories and Spotlights", | |
| "Work-Life Balance, Flexibility, and Well-being", | |
| "Diversity, Equity, and Inclusion (DEI)", | |
| "Professional Development and Growth Opportunities", | |
| "Mission, Vision, and Social Responsibility", | |
| "None" | |
| ] | |
| results = [] | |
| for post in posts: | |
| text = post.get("text") | |
| result = summarize_and_classify_post(text, labels) | |
| results.append(result) | |
| return results | |