""" Sentiment Analysis Model Pipeline for Hugging Face Hub """ import pickle import re import numpy as np from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import Pipeline from typing import Dict, List, Union class SentimentClassifier: """Sentiment classification model.""" def __init__(self): self.model = None self.classes = ['negative', 'neutral', 'positive'] def preprocess_text(self, text: str) -> str: """Clean and preprocess text.""" text = text.lower() text = re.sub(r'[^a-zA-Z\s]', '', text) text = re.sub(r'\s+', ' ', text).strip() return text def predict(self, text: str) -> Dict: """Predict sentiment of a single text.""" if self.model is None: return { "text": text, "prediction": "error", "confidence": 0.0, "probabilities": {"positive": 0.33, "negative": 0.33, "neutral": 0.34} } processed_text = self.preprocess_text(text) probabilities = self.model.predict_proba([processed_text])[0] prediction_idx = np.argmax(probabilities) prediction = self.classes[prediction_idx] confidence = probabilities[prediction_idx] return { "text": text, "prediction": prediction, "confidence": float(confidence), "probabilities": { class_name: float(prob) for class_name, prob in zip(self.classes, probabilities) } } # Pipeline function for Hugging Face def pipeline(task: str, model=None, **kwargs): """Pipeline function for Hugging Face Hub.""" if task == "text-classification": return SentimentAnalysisPipeline(model) else: raise ValueError(f"Task {task} not supported") class SentimentAnalysisPipeline: """Pipeline for sentiment analysis.""" def __init__(self, model=None): self.classifier = SentimentClassifier() # Load the trained model self._load_model() def _load_model(self): """Load the trained model.""" try: # Try to load from model files import joblib self.classifier.model = joblib.load("model.pkl") except: # Fallback: create a simple model self._create_fallback_model() def _create_fallback_model(self): """Create a fallback model.""" from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import Pipeline # Sample training data texts = [ "I love this product!", "This is terrible.", "It's okay, nothing special.", "Amazing quality!", "Worst experience ever.", "Pretty good overall.", "Absolutely fantastic!", "Completely disappointed.", "Average product.", "Excellent service!", "Terrible customer support.", "Decent enough.", "Outstanding quality!", "Completely useless.", "It's fine, I guess.", "Best purchase ever!", "Waste of money.", "Nothing special.", "Highly recommended!", "Would not buy again.", "Average at best." ] labels = ["positive", "negative", "neutral", "positive", "negative", "neutral", "positive", "negative", "neutral", "positive", "negative", "neutral", "positive", "negative", "neutral", "positive", "negative", "neutral", "positive", "negative", "neutral"] self.classifier.model = Pipeline([ ('tfidf', TfidfVectorizer(max_features=1000, stop_words='english', ngram_range=(1, 2))), ('classifier', MultinomialNB()) ]) self.classifier.model.fit(texts, labels) def __call__(self, inputs: Union[str, List[str]], **kwargs): """Process inputs.""" if isinstance(inputs, str): return self.classifier.predict(inputs) else: return [self.classifier.predict(text) for text in inputs] # For compatibility with transformers def sentiment_analysis_pipeline(model=None, **kwargs): """Create sentiment analysis pipeline.""" return SentimentAnalysisPipeline(model)