Spaces:
Sleeping
Sleeping
File size: 3,306 Bytes
dbaa71b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import logging
from typing import Any, List, Optional
from pydantic import PrivateAttr
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from obsei.analyzer.base_analyzer import (
BaseAnalyzer,
BaseAnalyzerConfig,
)
from obsei.payload import TextPayload
from obsei.analyzer.classification_analyzer import (
ClassificationAnalyzerConfig,
ZeroShotClassificationAnalyzer,
)
logger = logging.getLogger(__name__)
class VaderSentimentAnalyzer(BaseAnalyzer):
_model: SentimentIntensityAnalyzer = PrivateAttr()
TYPE: str = "Sentiment"
def __init__(self, **data: Any):
super().__init__(**data)
self._model = SentimentIntensityAnalyzer()
def _get_sentiment_score_from_vader(self, text: str) -> float:
scores = self._model.polarity_scores(text)
return float(scores["compound"])
def analyze_input(
self,
source_response_list: List[TextPayload],
analyzer_config: Optional[BaseAnalyzerConfig] = None,
**kwargs: Any,
) -> List[TextPayload]:
analyzer_output: List[TextPayload] = []
for batch_responses in self.batchify(source_response_list, self.batch_size):
for source_response in batch_responses:
classification_map = {}
sentiment_value = self._get_sentiment_score_from_vader(
source_response.processed_text
)
if sentiment_value < 0.0:
classification_map["negative"] = -sentiment_value
classification_map["positive"] = (
1.0 - classification_map["negative"]
)
else:
classification_map["positive"] = sentiment_value
classification_map["negative"] = (
1.0 - classification_map["positive"]
)
segmented_data = {"classifier_data": classification_map}
if source_response.segmented_data:
segmented_data = {
**segmented_data,
**source_response.segmented_data,
}
analyzer_output.append(
TextPayload(
processed_text=source_response.processed_text,
meta=source_response.meta,
segmented_data=segmented_data,
source_name=source_response.source_name,
)
)
return analyzer_output
class TransformersSentimentAnalyzerConfig(ClassificationAnalyzerConfig):
TYPE: str = "Sentiment"
labels: List[str] = ["positive", "negative"]
multi_class_classification: bool = False
class TransformersSentimentAnalyzer(ZeroShotClassificationAnalyzer):
def analyze_input( # type: ignore[override]
self,
source_response_list: List[TextPayload],
analyzer_config: Optional[TransformersSentimentAnalyzerConfig] = None,
**kwargs: Any,
) -> List[TextPayload]:
return super().analyze_input(
source_response_list=source_response_list,
analyzer_config=analyzer_config,
add_positive_negative_labels=True,
**kwargs,
)
|