Upload model.py with huggingface_hub

e26d346 verified about 2 months ago

4.52 kB

	"""
	Sentiment Analysis Model Pipeline for Hugging Face Hub
	"""

	import pickle
	import re
	import numpy as np
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.naive_bayes import MultinomialNB
	from sklearn.pipeline import Pipeline
	from typing import Dict, List, Union

	class SentimentClassifier:
	"""Sentiment classification model."""

	def __init__(self):
	self.model = None
	self.classes = ['negative', 'neutral', 'positive']

	def preprocess_text(self, text: str) -> str:
	"""Clean and preprocess text."""
	text = text.lower()
	text = re.sub(r'[^a-zA-Z\s]', '', text)
	text = re.sub(r'\s+', ' ', text).strip()
	return text

	def predict(self, text: str) -> Dict:
	"""Predict sentiment of a single text."""
	if self.model is None:
	return {
	"text": text,
	"prediction": "error",
	"confidence": 0.0,
	"probabilities": {"positive": 0.33, "negative": 0.33, "neutral": 0.34}
	}

	processed_text = self.preprocess_text(text)
	probabilities = self.model.predict_proba([processed_text])[0]
	prediction_idx = np.argmax(probabilities)
	prediction = self.classes[prediction_idx]
	confidence = probabilities[prediction_idx]

	return {
	"text": text,
	"prediction": prediction,
	"confidence": float(confidence),
	"probabilities": {
	class_name: float(prob)
	for class_name, prob in zip(self.classes, probabilities)
	}
	}

	# Pipeline function for Hugging Face
	def pipeline(task: str, model=None, **kwargs):
	"""Pipeline function for Hugging Face Hub."""
	if task == "text-classification":
	return SentimentAnalysisPipeline(model)
	else:
	raise ValueError(f"Task {task} not supported")

	class SentimentAnalysisPipeline:
	"""Pipeline for sentiment analysis."""

	def __init__(self, model=None):
	self.classifier = SentimentClassifier()
	# Load the trained model
	self._load_model()

	def _load_model(self):
	"""Load the trained model."""
	try:
	# Try to load from model files
	import joblib
	self.classifier.model = joblib.load("model.pkl")
	except:
	# Fallback: create a simple model
	self._create_fallback_model()

	def _create_fallback_model(self):
	"""Create a fallback model."""
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.naive_bayes import MultinomialNB
	from sklearn.pipeline import Pipeline

	# Sample training data
	texts = [
	"I love this product!", "This is terrible.", "It's okay, nothing special.",
	"Amazing quality!", "Worst experience ever.", "Pretty good overall.",
	"Absolutely fantastic!", "Completely disappointed.", "Average product.",
	"Excellent service!", "Terrible customer support.", "Decent enough.",
	"Outstanding quality!", "Completely useless.", "It's fine, I guess.",
	"Best purchase ever!", "Waste of money.", "Nothing special.",
	"Highly recommended!", "Would not buy again.", "Average at best."
	]
	labels = ["positive", "negative", "neutral", "positive", "negative", "neutral",
	"positive", "negative", "neutral", "positive", "negative", "neutral",
	"positive", "negative", "neutral", "positive", "negative", "neutral",
	"positive", "negative", "neutral"]

	self.classifier.model = Pipeline([
	('tfidf', TfidfVectorizer(max_features=1000, stop_words='english', ngram_range=(1, 2))),
	('classifier', MultinomialNB())
	])
	self.classifier.model.fit(texts, labels)

	def __call__(self, inputs: Union[str, List[str]], **kwargs):
	"""Process inputs."""
	if isinstance(inputs, str):
	return self.classifier.predict(inputs)
	else:
	return [self.classifier.predict(text) for text in inputs]

	# For compatibility with transformers
	def sentiment_analysis_pipeline(model=None, **kwargs):
	"""Create sentiment analysis pipeline."""
	return SentimentAnalysisPipeline(model)