File size: 3,306 Bytes
dbaa71b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import logging
from typing import Any, List, Optional

from pydantic import PrivateAttr
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

from obsei.analyzer.base_analyzer import (
    BaseAnalyzer,
    BaseAnalyzerConfig,
)
from obsei.payload import TextPayload
from obsei.analyzer.classification_analyzer import (
    ClassificationAnalyzerConfig,
    ZeroShotClassificationAnalyzer,
)

logger = logging.getLogger(__name__)


class VaderSentimentAnalyzer(BaseAnalyzer):
    _model: SentimentIntensityAnalyzer = PrivateAttr()
    TYPE: str = "Sentiment"

    def __init__(self, **data: Any):
        super().__init__(**data)
        self._model = SentimentIntensityAnalyzer()

    def _get_sentiment_score_from_vader(self, text: str) -> float:
        scores = self._model.polarity_scores(text)
        return float(scores["compound"])

    def analyze_input(
        self,
        source_response_list: List[TextPayload],
        analyzer_config: Optional[BaseAnalyzerConfig] = None,
        **kwargs: Any,
    ) -> List[TextPayload]:
        analyzer_output: List[TextPayload] = []

        for batch_responses in self.batchify(source_response_list, self.batch_size):
            for source_response in batch_responses:
                classification_map = {}
                sentiment_value = self._get_sentiment_score_from_vader(
                    source_response.processed_text
                )
                if sentiment_value < 0.0:
                    classification_map["negative"] = -sentiment_value
                    classification_map["positive"] = (
                        1.0 - classification_map["negative"]
                    )
                else:
                    classification_map["positive"] = sentiment_value
                    classification_map["negative"] = (
                        1.0 - classification_map["positive"]
                    )

                segmented_data = {"classifier_data": classification_map}
                if source_response.segmented_data:
                    segmented_data = {
                        **segmented_data,
                        **source_response.segmented_data,
                    }

                analyzer_output.append(
                    TextPayload(
                        processed_text=source_response.processed_text,
                        meta=source_response.meta,
                        segmented_data=segmented_data,
                        source_name=source_response.source_name,
                    )
                )

        return analyzer_output


class TransformersSentimentAnalyzerConfig(ClassificationAnalyzerConfig):
    TYPE: str = "Sentiment"
    labels: List[str] = ["positive", "negative"]
    multi_class_classification: bool = False


class TransformersSentimentAnalyzer(ZeroShotClassificationAnalyzer):
    def analyze_input(  # type: ignore[override]
        self,
        source_response_list: List[TextPayload],
        analyzer_config: Optional[TransformersSentimentAnalyzerConfig] = None,
        **kwargs: Any,
    ) -> List[TextPayload]:
        return super().analyze_input(
            source_response_list=source_response_list,
            analyzer_config=analyzer_config,
            add_positive_negative_labels=True,
            **kwargs,
        )