Spaces:

MatteoFasulo
/

SubjectivityDetection

Running

App Files Files Community

MatteoFasulo commited on 24 days ago

Commit

33a1f42

verified ·

1 Parent(s): c2ae4ec

Update with new layout

Browse files

Files changed (1) hide show

app.py +92 -126

app.py CHANGED Viewed

@@ -5,133 +5,99 @@ from transformers.models.deberta.modeling_deberta import ContextPooler
 from transformers import pipeline
 import torch.nn as nn
-# Model cards and thresholds
 BASE_MODEL = "microsoft/mdeberta-v3-base"
 SENT_SUBJ_MODEL = "MatteoFasulo/mdeberta-v3-base-subjectivity-sentiment-multilingual-no-arabic"
 SUBJ_ONLY_MODEL = "MatteoFasulo/mdeberta-v3-base-subjectivity-multilingual-no-arabic"
-THRESHOLD = 0.65
-# Custom model for subjectivity (+ optional sentiment features)
-class CustomModel(PreTrainedModel):
-    config_class = DebertaV2Config
-    def __init__(self, config, sentiment_dim=0, num_labels=2, *args, **kwargs):
-        super().__init__(config, *args, **kwargs)
-        self.deberta = DebertaV2Model(config)
-        self.pooler = ContextPooler(config)
-        output_dim = self.pooler.output_dim
-        self.dropout = nn.Dropout(0.1)
-        self.classifier = nn.Linear(output_dim + sentiment_dim, num_labels)
-    def forward(self, input_ids, attention_mask=None, token_type_ids=None,
-                positive=None, neutral=None, negative=None):
-        outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
-        pooled = self.pooler(outputs[0])
-        if positive is not None and neutral is not None and negative is not None:
-            sent_feats = torch.stack((positive, neutral, negative), dim=1)
-            combined = torch.cat((pooled, sent_feats), dim=1)
-        else:
-            combined = pooled
-        logits = self.classifier(self.dropout(combined))
-        return logits
-# Load tokenizer and model helper
-def load_models():
-    # Tokenizer shared
-    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
-    # Sentiment+Subjectivity model
-    cfg1 = DebertaV2Config.from_pretrained(
-        SENT_SUBJ_MODEL,
-        num_labels=2,
-        id2label={0: 'OBJ', 1: 'SUBJ'},
-        label2id={'OBJ': 0, 'SUBJ': 1},
-        output_attentions=False,
-        output_hidden_states=False
-    )
-    model1 = CustomModel(config=cfg1, sentiment_dim=3)
-    model1 = model1.from_pretrained(SENT_SUBJ_MODEL)
-    # Subjectivity-only model
-    cfg2 = DebertaV2Config.from_pretrained(
-        SUBJ_ONLY_MODEL,
-        num_labels=2,
-        id2label={0: 'OBJ', 1: 'SUBJ'},
-        label2id={'OBJ': 0, 'SUBJ': 1},
-        output_attentions=False,
-        output_hidden_states=False
-    )
-    model2 = CustomModel(config=cfg2, sentiment_dim=0)
-    model2 = model2.from_pretrained(SUBJ_ONLY_MODEL)
-    return tokenizer, model1, model2
-# Sentiment pipeline
-sentiment_pipe = pipeline(
-    "sentiment-analysis",
-    model="cardiffnlp/twitter-xlm-roberta-base-sentiment",
-    tokenizer="cardiffnlp/twitter-xlm-roberta-base-sentiment",
-    top_k=None
-)
-def get_sentiment_scores(text: str):
-    results = sentiment_pipe(text)[0]
-    return {lbl: score for lbl, score in [(list(d.keys())[0], list(d.values())[0]) for d in results]}
-# Prediction function
-# Caches models on first call
-tokenizer, model_sent_subj, model_subj_only = None, None, None
-def predict_subjectivity(text):
-    global tokenizer, model_sent_subj, model_subj_only
-    if tokenizer is None:
-        tokenizer, model_sent_subj, model_subj_only = load_models()
-    # Tokenize input
-    inputs = tokenizer(text, padding=True, truncation=True, max_length=256, return_tensors='pt')
-    # Sentiment + subjectivity model inference
-    sent_scores = get_sentiment_scores(text)
-    pos, neu, neg = sent_scores['positive'], sent_scores['neutral'], sent_scores['negative']
-    logits1 = model_sent_subj(
-        input_ids=inputs['input_ids'],
-        attention_mask=inputs.get('attention_mask'),
-        positive=torch.tensor([pos]),
-        neutral=torch.tensor([neu]),
-        negative=torch.tensor([neg])
-    )
-    probs1 = torch.softmax(logits1, dim=1)[0]
-    # Subjectivity-only model inference
-    logits2 = model_subj_only(
-        input_ids=inputs['input_ids'],
-        attention_mask=inputs.get('attention_mask')
-    )
-    probs2 = torch.softmax(logits2, dim=1)[0]
-    # Formatting
-    output = []
-    output.append("Sentiment Scores (sent-subj model):")
-    output.append(f"- Positive: {pos:.2%}")
-    output.append(f"- Neutral: {neu:.2%}")
-    output.append(f"- Negative: {neg:.2%}\n")
-    output.append(f"Subjectivity (with sentiment) - OBJ: {probs1[0]:.2%}, SUBJ: {probs1[1]:.2%}")
-    output.append(f"Subjectivity (text only)   - OBJ: {probs2[0]:.2%}, SUBJ: {probs2[1]:.2%}")
-    return "\n".join(output)
-# Build Gradio interface
-demo = gr.Interface(
-    fn=predict_subjectivity,
-    inputs=gr.Textbox(
-        label='Input sentence',
-        placeholder='Enter a sentence from a news article',
-        info='Paste a sentence from a news article to determine subjectivity'
-    ),
-    outputs=gr.Textbox(
-        label='Results',
-        info='Sentiment & dual-model subjectivity probabilities'
-    ),
-    title='Dual-Model Subjectivity Detection',
-    description='Outputs sentiment scores and class probabilities from two subjectivity models.'
-)
-demo.launch()

 from transformers import pipeline
 import torch.nn as nn
+# -- Model definitions
 BASE_MODEL = "microsoft/mdeberta-v3-base"
 SENT_SUBJ_MODEL = "MatteoFasulo/mdeberta-v3-base-subjectivity-sentiment-multilingual-no-arabic"
 SUBJ_ONLY_MODEL = "MatteoFasulo/mdeberta-v3-base-subjectivity-multilingual-no-arabic"
+# -- Custom model builder
+from functools import partial
+def build_custom_model(sentiment_dim=0):
+    class CustomModel(PreTrainedModel):
+        config_class = DebertaV2Config
+        def __init__(self, config, *args, **kwargs):
+            super().__init__(config, *args, **kwargs)
+            self.deberta = DebertaV2Model(config)
+            self.pooler = ContextPooler(config)
+            self.dropout = nn.Dropout(0.1)
+            hidden_dim = self.pooler.output_dim + sentiment_dim
+            self.classifier = nn.Linear(hidden_dim, config.num_labels)
+        def forward(self, input_ids, attention_mask=None, **sent_kwargs):
+            x = self.deberta(input_ids=input_ids, attention_mask=attention_mask)[0]
+            pooled = self.pooler(x)
+            if sentiment_dim:
+                sent_feats = torch.stack((sent_kwargs['positive'], sent_kwargs['neutral'], sent_kwargs['negative']), dim=1)
+                pooled = torch.cat((pooled, sent_feats), dim=1)
+            return self.classifier(self.dropout(pooled))
+    return CustomModel
+# -- Load models and tokenizer
+tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
+# sentiment+subjectivity
+cfg1 = DebertaV2Config.from_pretrained(SENT_SUBJ_MODEL, num_labels=2, id2label={0:'OBJ',1:'SUBJ'}, label2id={'OBJ':0,'SUBJ':1})
+Model1Cls = build_custom_model(sentiment_dim=3)
+model1 = Model1Cls.from_pretrained(SENT_SUBJ_MODEL, config=cfg1, ignore_mismatched_sizes=True)
+# subjectivity-only
+cfg2 = DebertaV2Config.from_pretrained(SUBJ_ONLY_MODEL, num_labels=2, id2label={0:'OBJ',1:'SUBJ'}, label2id={'OBJ':0,'SUBJ':1})
+Model2Cls = build_custom_model(sentiment_dim=0)
+model2 = Model2Cls.from_pretrained(SUBJ_ONLY_MODEL, config=cfg2)
+# sentiment pipeline
+sentiment_pipe = pipeline("sentiment-analysis", model="cardiffnlp/twitter-xlm-roberta-base-sentiment", tokenizer="cardiffnlp/twitter-xlm-roberta-base-sentiment", top_k=None)
+def get_sentiment_scores(text):
+    out = sentiment_pipe(text)[0]
+    return {list(d.keys())[0]: list(d.values())[0] for d in out}
+# -- Prediction logic
+def analyze(text):
+    # Tokenize
+    inputs = tokenizer(text, truncation=True, padding=True, max_length=256, return_tensors='pt')
+    # Sentiment
+    scores = get_sentiment_scores(text)
+    pos, neu, neg = scores['positive'], scores['neutral'], scores['negative']
+    # Model1
+    logits1 = model1(input_ids=inputs.input_ids, attention_mask=inputs.attention_mask, positive=torch.tensor([pos]), neutral=torch.tensor([neu]), negative=torch.tensor([neg]))
+    p1 = torch.softmax(logits1, dim=1)[0]
+    # Model2
+    logits2 = model2(input_ids=inputs.input_ids, attention_mask=inputs.attention_mask)
+    p2 = torch.softmax(logits2, dim=1)[0]
+    # Build results
+    return {
+        'Positive': f"{pos:.2%}", 'Neutral': f"{neu:.2%}", 'Negative': f"{neg:.2%}",
+        'Sent-Subj OBJ': f"{p1[0]:.2%}", 'Sent-Subj SUBJ': f"{p1[1]:.2%}",
+        'TextOnly OBJ': f"{p2[0]:.2%}", 'TextOnly SUBJ': f"{p2[1]:.2%}"
+    }
+# -- Build Gradio Dashboard with Blocks
+dark_theme = gr.themes.Dark()
+with gr.Blocks(theme=dark_theme, css="""
+#result_table td { padding: 8px; font-size: 1rem; }
+#header { text-align: center; font-size: 2rem; font-weight: bold; margin-bottom: 10px; }
+""") as demo:
+    gr.Markdown("<div id='header'>🚀 Advanced Subjectivity & Sentiment Dashboard 🚀</div>")
+    with gr.Row():
+        txt = gr.Textbox(label="Enter text to analyze", placeholder="Paste news sentence here...", lines=2)
+        btn = gr.Button("Analyze 🔍", variant="primary")
+    with gr.Tabs():
+        with gr.TabItem("Overview 📊"):
+            chart = gr.BarPlot(x="category", y="value", label="Results", elem_id="result_chart")
+        with gr.TabItem("Raw Scores 📋"):
+            table = gr.Dataframe(headers=["Metric", "Value"], datatype=["str","str"], interactive=False, elem_id="result_table")
+        with gr.TabItem("About ℹ️"):
+            gr.Markdown("This dashboard uses two DeBERTa-based models (with and without sentiment integration) to detect subjectivity, alongside sentiment scores from an XLM-RoBERTa model.")
+            gr.Markdown("**Threshold** for subjective classification is adjustable in code (default: 0.65). Feel free to fork and customize! 🚀")
+    # Link inputs to outputs
+    btn.click(fn=analyze, inputs=txt, outputs=[chart, table])
+    # Add confetti effect on button click
+    btn.js_on_event("click", {
+        "type": "confetti",
+        "props": {"particleCount": 100, "spread": 60}
+    })
+# -- Launch
+demo.queue().launch(server_name="0.0.0.0", share=True)