GunjanSingh's picture
Upload model.py with huggingface_hub
e26d346 verified
"""
Sentiment Analysis Model Pipeline for Hugging Face Hub
"""
import pickle
import re
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from typing import Dict, List, Union
class SentimentClassifier:
"""Sentiment classification model."""
def __init__(self):
self.model = None
self.classes = ['negative', 'neutral', 'positive']
def preprocess_text(self, text: str) -> str:
"""Clean and preprocess text."""
text = text.lower()
text = re.sub(r'[^a-zA-Z\s]', '', text)
text = re.sub(r'\s+', ' ', text).strip()
return text
def predict(self, text: str) -> Dict:
"""Predict sentiment of a single text."""
if self.model is None:
return {
"text": text,
"prediction": "error",
"confidence": 0.0,
"probabilities": {"positive": 0.33, "negative": 0.33, "neutral": 0.34}
}
processed_text = self.preprocess_text(text)
probabilities = self.model.predict_proba([processed_text])[0]
prediction_idx = np.argmax(probabilities)
prediction = self.classes[prediction_idx]
confidence = probabilities[prediction_idx]
return {
"text": text,
"prediction": prediction,
"confidence": float(confidence),
"probabilities": {
class_name: float(prob)
for class_name, prob in zip(self.classes, probabilities)
}
}
# Pipeline function for Hugging Face
def pipeline(task: str, model=None, **kwargs):
"""Pipeline function for Hugging Face Hub."""
if task == "text-classification":
return SentimentAnalysisPipeline(model)
else:
raise ValueError(f"Task {task} not supported")
class SentimentAnalysisPipeline:
"""Pipeline for sentiment analysis."""
def __init__(self, model=None):
self.classifier = SentimentClassifier()
# Load the trained model
self._load_model()
def _load_model(self):
"""Load the trained model."""
try:
# Try to load from model files
import joblib
self.classifier.model = joblib.load("model.pkl")
except:
# Fallback: create a simple model
self._create_fallback_model()
def _create_fallback_model(self):
"""Create a fallback model."""
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
# Sample training data
texts = [
"I love this product!", "This is terrible.", "It's okay, nothing special.",
"Amazing quality!", "Worst experience ever.", "Pretty good overall.",
"Absolutely fantastic!", "Completely disappointed.", "Average product.",
"Excellent service!", "Terrible customer support.", "Decent enough.",
"Outstanding quality!", "Completely useless.", "It's fine, I guess.",
"Best purchase ever!", "Waste of money.", "Nothing special.",
"Highly recommended!", "Would not buy again.", "Average at best."
]
labels = ["positive", "negative", "neutral", "positive", "negative", "neutral",
"positive", "negative", "neutral", "positive", "negative", "neutral",
"positive", "negative", "neutral", "positive", "negative", "neutral",
"positive", "negative", "neutral"]
self.classifier.model = Pipeline([
('tfidf', TfidfVectorizer(max_features=1000, stop_words='english', ngram_range=(1, 2))),
('classifier', MultinomialNB())
])
self.classifier.model.fit(texts, labels)
def __call__(self, inputs: Union[str, List[str]], **kwargs):
"""Process inputs."""
if isinstance(inputs, str):
return self.classifier.predict(inputs)
else:
return [self.classifier.predict(text) for text in inputs]
# For compatibility with transformers
def sentiment_analysis_pipeline(model=None, **kwargs):
"""Create sentiment analysis pipeline."""
return SentimentAnalysisPipeline(model)