|
|
"""
|
|
|
Sentiment Analysis Model Pipeline for Hugging Face Hub
|
|
|
"""
|
|
|
|
|
|
import pickle
|
|
|
import re
|
|
|
import numpy as np
|
|
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
|
from sklearn.naive_bayes import MultinomialNB
|
|
|
from sklearn.pipeline import Pipeline
|
|
|
from typing import Dict, List, Union
|
|
|
|
|
|
class SentimentClassifier:
|
|
|
"""Sentiment classification model."""
|
|
|
|
|
|
def __init__(self):
|
|
|
self.model = None
|
|
|
self.classes = ['negative', 'neutral', 'positive']
|
|
|
|
|
|
def preprocess_text(self, text: str) -> str:
|
|
|
"""Clean and preprocess text."""
|
|
|
text = text.lower()
|
|
|
text = re.sub(r'[^a-zA-Z\s]', '', text)
|
|
|
text = re.sub(r'\s+', ' ', text).strip()
|
|
|
return text
|
|
|
|
|
|
def predict(self, text: str) -> Dict:
|
|
|
"""Predict sentiment of a single text."""
|
|
|
if self.model is None:
|
|
|
return {
|
|
|
"text": text,
|
|
|
"prediction": "error",
|
|
|
"confidence": 0.0,
|
|
|
"probabilities": {"positive": 0.33, "negative": 0.33, "neutral": 0.34}
|
|
|
}
|
|
|
|
|
|
processed_text = self.preprocess_text(text)
|
|
|
probabilities = self.model.predict_proba([processed_text])[0]
|
|
|
prediction_idx = np.argmax(probabilities)
|
|
|
prediction = self.classes[prediction_idx]
|
|
|
confidence = probabilities[prediction_idx]
|
|
|
|
|
|
return {
|
|
|
"text": text,
|
|
|
"prediction": prediction,
|
|
|
"confidence": float(confidence),
|
|
|
"probabilities": {
|
|
|
class_name: float(prob)
|
|
|
for class_name, prob in zip(self.classes, probabilities)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
def pipeline(task: str, model=None, **kwargs):
|
|
|
"""Pipeline function for Hugging Face Hub."""
|
|
|
if task == "text-classification":
|
|
|
return SentimentAnalysisPipeline(model)
|
|
|
else:
|
|
|
raise ValueError(f"Task {task} not supported")
|
|
|
|
|
|
class SentimentAnalysisPipeline:
|
|
|
"""Pipeline for sentiment analysis."""
|
|
|
|
|
|
def __init__(self, model=None):
|
|
|
self.classifier = SentimentClassifier()
|
|
|
|
|
|
self._load_model()
|
|
|
|
|
|
def _load_model(self):
|
|
|
"""Load the trained model."""
|
|
|
try:
|
|
|
|
|
|
import joblib
|
|
|
self.classifier.model = joblib.load("model.pkl")
|
|
|
except:
|
|
|
|
|
|
self._create_fallback_model()
|
|
|
|
|
|
def _create_fallback_model(self):
|
|
|
"""Create a fallback model."""
|
|
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
|
from sklearn.naive_bayes import MultinomialNB
|
|
|
from sklearn.pipeline import Pipeline
|
|
|
|
|
|
|
|
|
texts = [
|
|
|
"I love this product!", "This is terrible.", "It's okay, nothing special.",
|
|
|
"Amazing quality!", "Worst experience ever.", "Pretty good overall.",
|
|
|
"Absolutely fantastic!", "Completely disappointed.", "Average product.",
|
|
|
"Excellent service!", "Terrible customer support.", "Decent enough.",
|
|
|
"Outstanding quality!", "Completely useless.", "It's fine, I guess.",
|
|
|
"Best purchase ever!", "Waste of money.", "Nothing special.",
|
|
|
"Highly recommended!", "Would not buy again.", "Average at best."
|
|
|
]
|
|
|
labels = ["positive", "negative", "neutral", "positive", "negative", "neutral",
|
|
|
"positive", "negative", "neutral", "positive", "negative", "neutral",
|
|
|
"positive", "negative", "neutral", "positive", "negative", "neutral",
|
|
|
"positive", "negative", "neutral"]
|
|
|
|
|
|
self.classifier.model = Pipeline([
|
|
|
('tfidf', TfidfVectorizer(max_features=1000, stop_words='english', ngram_range=(1, 2))),
|
|
|
('classifier', MultinomialNB())
|
|
|
])
|
|
|
self.classifier.model.fit(texts, labels)
|
|
|
|
|
|
def __call__(self, inputs: Union[str, List[str]], **kwargs):
|
|
|
"""Process inputs."""
|
|
|
if isinstance(inputs, str):
|
|
|
return self.classifier.predict(inputs)
|
|
|
else:
|
|
|
return [self.classifier.predict(text) for text in inputs]
|
|
|
|
|
|
|
|
|
def sentiment_analysis_pipeline(model=None, **kwargs):
|
|
|
"""Create sentiment analysis pipeline."""
|
|
|
return SentimentAnalysisPipeline(model)
|
|
|
|