import pandas as pd from groq import Groq, RateLimitError import instructor from pydantic import BaseModel import os api_key = os.getenv('GROQ_API_KEY') # Create single patched Groq client with instructor for structured output client = instructor.from_groq(Groq(api_key=api_key), mode=instructor.Mode.JSON) class SummaryOutput(BaseModel): summary: str # Define pydantic schema for classification output class ClassificationOutput(BaseModel): category: str PRIMARY_SUMMARIZER_MODEL = "deepseek-r1-distill-llama-70b" FALLBACK_SUMMARIZER_MODEL = "llama-3.3-70b-versatile" # Summarize post text def summarize_post(text): if pd.isna(text) or text is None: return None text = str(text)[:500] # truncate to avoid token overflow prompt = f""" Summarize the following LinkedIn post in 5 to 10 words. Only return the summary inside a JSON field called 'summary'. Post Text: \"\"\"{text}\"\"\" """ try: # Attempt with primary model print(f"Attempting summarization with primary model: {PRIMARY_SUMMARIZER_MODEL}") response = client.chat.completions.create( model=PRIMARY_SUMMARIZER_MODEL, response_model=SummaryOutput, messages=[ {"role": "system", "content": "You are a precise summarizer. Only return a JSON object with a 'summary' string."}, {"role": "user", "content": prompt} ], temperature=0.3 ) return response.summary except RateLimitError: print(f"Rate limit hit for primary summarizer model: {PRIMARY_SUMMARIZER_MODEL}. Trying fallback: {FALLBACK_SUMMARIZER_MODEL}") try: # Attempt with fallback model response = client.chat.completions.create( model=FALLBACK_SUMMARIZER_MODEL, response_model=SummaryOutput, messages=[ {"role": "system", "content": "You are a precise summarizer. Only return a JSON object with a 'summary' string."}, {"role": "user", "content": prompt} ], temperature=0.3 # Keep temperature consistent or adjust as needed for fallback ) print(f"Summarization successful with fallback model: {FALLBACK_SUMMARIZER_MODEL}") return response.summary except RateLimitError as rle_fallback: print(f"Rate limit hit for fallback summarizer model ({FALLBACK_SUMMARIZER_MODEL}): {rle_fallback}. Summarization failed.") return None except Exception as e_fallback: print(f"Error during summarization with fallback model ({FALLBACK_SUMMARIZER_MODEL}): {e_fallback}") return None except Exception as e_primary: print(f"Error during summarization with primary model ({PRIMARY_SUMMARIZER_MODEL}): {e_primary}") # You could also try fallback here for non-rate-limit errors if desired return None # Classify post summary into structured categories def classify_post(summary, labels): if pd.isna(summary) or summary is None: return None prompt = f""" Post Summary: "{summary}" Available Categories: {', '.join(labels)} Task: Choose the single most relevant category from the list above that applies to this summary. Return only one category in a structured JSON format under the field 'category'. If no category applies, return 'None'. """ try: result = client.chat.completions.create( model="meta-llama/llama-4-maverick-17b-128e-instruct", response_model=ClassificationOutput, messages=[ {"role": "system", "content": "You are a strict classifier. Return only one matching category name under the field 'category'."}, {"role": "user", "content": prompt} ], temperature=0 ) return result.category except Exception as e: print(f"Classification error: {e}") return None def summarize_and_classify_post(text, labels): summary = summarize_post(text) category = classify_post(summary, labels) if summary else None return { "summary": summary, "category": category } def batch_summarize_and_classify(posts): labels = [ "Company Culture and Values", "Employee Stories and Spotlights", "Work-Life Balance, Flexibility, and Well-being", "Diversity, Equity, and Inclusion (DEI)", "Professional Development and Growth Opportunities", "Mission, Vision, and Social Responsibility", "None" ] results = [] for post in posts: text = post.get("text") result = summarize_and_classify_post(text, labels) results.append(result) return results