GunjanSingh commited on
Commit
e26d346
·
verified ·
1 Parent(s): 9364687

Upload model.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. model.py +116 -0
model.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Sentiment Analysis Model Pipeline for Hugging Face Hub
3
+ """
4
+
5
+ import pickle
6
+ import re
7
+ import numpy as np
8
+ from sklearn.feature_extraction.text import TfidfVectorizer
9
+ from sklearn.naive_bayes import MultinomialNB
10
+ from sklearn.pipeline import Pipeline
11
+ from typing import Dict, List, Union
12
+
13
+ class SentimentClassifier:
14
+ """Sentiment classification model."""
15
+
16
+ def __init__(self):
17
+ self.model = None
18
+ self.classes = ['negative', 'neutral', 'positive']
19
+
20
+ def preprocess_text(self, text: str) -> str:
21
+ """Clean and preprocess text."""
22
+ text = text.lower()
23
+ text = re.sub(r'[^a-zA-Z\s]', '', text)
24
+ text = re.sub(r'\s+', ' ', text).strip()
25
+ return text
26
+
27
+ def predict(self, text: str) -> Dict:
28
+ """Predict sentiment of a single text."""
29
+ if self.model is None:
30
+ return {
31
+ "text": text,
32
+ "prediction": "error",
33
+ "confidence": 0.0,
34
+ "probabilities": {"positive": 0.33, "negative": 0.33, "neutral": 0.34}
35
+ }
36
+
37
+ processed_text = self.preprocess_text(text)
38
+ probabilities = self.model.predict_proba([processed_text])[0]
39
+ prediction_idx = np.argmax(probabilities)
40
+ prediction = self.classes[prediction_idx]
41
+ confidence = probabilities[prediction_idx]
42
+
43
+ return {
44
+ "text": text,
45
+ "prediction": prediction,
46
+ "confidence": float(confidence),
47
+ "probabilities": {
48
+ class_name: float(prob)
49
+ for class_name, prob in zip(self.classes, probabilities)
50
+ }
51
+ }
52
+
53
+ # Pipeline function for Hugging Face
54
+ def pipeline(task: str, model=None, **kwargs):
55
+ """Pipeline function for Hugging Face Hub."""
56
+ if task == "text-classification":
57
+ return SentimentAnalysisPipeline(model)
58
+ else:
59
+ raise ValueError(f"Task {task} not supported")
60
+
61
+ class SentimentAnalysisPipeline:
62
+ """Pipeline for sentiment analysis."""
63
+
64
+ def __init__(self, model=None):
65
+ self.classifier = SentimentClassifier()
66
+ # Load the trained model
67
+ self._load_model()
68
+
69
+ def _load_model(self):
70
+ """Load the trained model."""
71
+ try:
72
+ # Try to load from model files
73
+ import joblib
74
+ self.classifier.model = joblib.load("model.pkl")
75
+ except:
76
+ # Fallback: create a simple model
77
+ self._create_fallback_model()
78
+
79
+ def _create_fallback_model(self):
80
+ """Create a fallback model."""
81
+ from sklearn.feature_extraction.text import TfidfVectorizer
82
+ from sklearn.naive_bayes import MultinomialNB
83
+ from sklearn.pipeline import Pipeline
84
+
85
+ # Sample training data
86
+ texts = [
87
+ "I love this product!", "This is terrible.", "It's okay, nothing special.",
88
+ "Amazing quality!", "Worst experience ever.", "Pretty good overall.",
89
+ "Absolutely fantastic!", "Completely disappointed.", "Average product.",
90
+ "Excellent service!", "Terrible customer support.", "Decent enough.",
91
+ "Outstanding quality!", "Completely useless.", "It's fine, I guess.",
92
+ "Best purchase ever!", "Waste of money.", "Nothing special.",
93
+ "Highly recommended!", "Would not buy again.", "Average at best."
94
+ ]
95
+ labels = ["positive", "negative", "neutral", "positive", "negative", "neutral",
96
+ "positive", "negative", "neutral", "positive", "negative", "neutral",
97
+ "positive", "negative", "neutral", "positive", "negative", "neutral",
98
+ "positive", "negative", "neutral"]
99
+
100
+ self.classifier.model = Pipeline([
101
+ ('tfidf', TfidfVectorizer(max_features=1000, stop_words='english', ngram_range=(1, 2))),
102
+ ('classifier', MultinomialNB())
103
+ ])
104
+ self.classifier.model.fit(texts, labels)
105
+
106
+ def __call__(self, inputs: Union[str, List[str]], **kwargs):
107
+ """Process inputs."""
108
+ if isinstance(inputs, str):
109
+ return self.classifier.predict(inputs)
110
+ else:
111
+ return [self.classifier.predict(text) for text in inputs]
112
+
113
+ # For compatibility with transformers
114
+ def sentiment_analysis_pipeline(model=None, **kwargs):
115
+ """Create sentiment analysis pipeline."""
116
+ return SentimentAnalysisPipeline(model)