Spaces:

GuglielmoTor
/

LinkedinMonitor

Sleeping

App Files Files Community

LinkedinMonitor / posts_categorization.py

GuglielmoTor

Update posts_categorization.py

ae41008 verified 8 months ago

raw

history blame

3.2 kB

	import pandas as pd
	from groq import Groq
	import instructor
	from pydantic import BaseModel
	import os

	api_key = os.getenv('GROQ_API_KEY')

	# Create single patched Groq client with instructor for structured output
	client = instructor.from_groq(Groq(api_key=api_key), mode=instructor.Mode.JSON)

	class SummaryOutput(BaseModel):
	summary: str

	# Define pydantic schema for classification output
	class ClassificationOutput(BaseModel):
	category: str

	# Summarize post text
	def summarize_post(text):
	if pd.isna(text) or text is None:
	return None

	text = str(text)[:2000] # truncate to avoid token overflow

	prompt = f"""
	Summarize the following LinkedIn post in 5 to 10 words.
	Only return the summary inside a JSON field called 'summary'.

	Post Text:
	\"\"\"{text}\"\"\"
	"""

	try:
	response = client.chat.completions.create(
	model="deepseek-r1-distill-llama-70b",
	response_model=SummaryOutput,
	messages=[
	{"role": "system", "content": "You are a precise summarizer. Only return a JSON object with a 'summary' string."},
	{"role": "user", "content": prompt}
	],
	temperature=0.3
	)
	return response.summary
	except Exception as e:
	print(f"Summarization error: {e}")
	return None


	# Classify post summary into structured categories
	def classify_post(summary, labels):
	if pd.isna(summary) or summary is None:
	return None

	prompt = f"""
	Post Summary: "{summary}"

	Available Categories:
	{', '.join(labels)}

	Task: Choose the single most relevant category from the list above that applies to this summary. Return only one category in a structured JSON format under the field 'category'.
	If no category applies, return 'None'.
	"""
	try:
	result = client.chat.completions.create(
	model="meta-llama/llama-4-maverick-17b-128e-instruct",
	response_model=ClassificationOutput,
	messages=[
	{"role": "system", "content": "You are a strict classifier. Return only one matching category name under the field 'category'."},
	{"role": "user", "content": prompt}
	],
	temperature=0.3,
	max_tokens=60
	)
	return result.category
	except Exception as e:
	print(f"Classification error: {e}")
	return None

	def summarize_and_classify_post(text, labels):
	summary = summarize_post(text)
	category = classify_post(summary, labels) if summary else None
	return {
	"summary": summary,
	"category": category
	}

	def batch_summarize_and_classify(posts):

	labels = [
	"Company Culture and Values",
	"Employee Stories and Spotlights",
	"Work-Life Balance, Flexibility, and Well-being",
	"Diversity, Equity, and Inclusion (DEI)",
	"Professional Development and Growth Opportunities",
	"Mission, Vision, and Social Responsibility",
	"None"
	]

	results = []
	for post in posts:
	text = post.get("text")
	result = summarize_and_classify_post(text, labels)
	results.append(result)
	return results