import gradio as gr import torch from transformers import DebertaV2Model, DebertaV2Config, AutoTokenizer, PreTrainedModel from transformers.models.deberta.modeling_deberta import ContextPooler from transformers import pipeline import torch.nn as nn # -- Model definitions BASE_MODEL = "microsoft/mdeberta-v3-base" SENT_SUBJ_MODEL = "MatteoFasulo/mdeberta-v3-base-subjectivity-sentiment-multilingual-no-arabic" SUBJ_ONLY_MODEL = "MatteoFasulo/mdeberta-v3-base-subjectivity-multilingual-no-arabic" # -- Custom model builder from functools import partial def build_custom_model(sentiment_dim=0): class CustomModel(PreTrainedModel): config_class = DebertaV2Config def __init__(self, config, *args, **kwargs): super().__init__(config, *args, **kwargs) self.deberta = DebertaV2Model(config) self.pooler = ContextPooler(config) self.dropout = nn.Dropout(0.1) hidden_dim = self.pooler.output_dim + sentiment_dim self.classifier = nn.Linear(hidden_dim, config.num_labels) def forward(self, input_ids, attention_mask=None, **sent_kwargs): x = self.deberta(input_ids=input_ids, attention_mask=attention_mask)[0] pooled = self.pooler(x) if sentiment_dim: sent_feats = torch.stack((sent_kwargs['positive'], sent_kwargs['neutral'], sent_kwargs['negative']), dim=1) pooled = torch.cat((pooled, sent_feats), dim=1) return self.classifier(self.dropout(pooled)) return CustomModel # -- Load models and tokenizer tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) # sentiment+subjectivity cfg1 = DebertaV2Config.from_pretrained(SENT_SUBJ_MODEL, num_labels=2, id2label={0:'OBJ',1:'SUBJ'}, label2id={'OBJ':0,'SUBJ':1}) Model1Cls = build_custom_model(sentiment_dim=3) model1 = Model1Cls.from_pretrained(SENT_SUBJ_MODEL, config=cfg1, ignore_mismatched_sizes=True) # subjectivity-only cfg2 = DebertaV2Config.from_pretrained(SUBJ_ONLY_MODEL, num_labels=2, id2label={0:'OBJ',1:'SUBJ'}, label2id={'OBJ':0,'SUBJ':1}) Model2Cls = build_custom_model(sentiment_dim=0) model2 = Model2Cls.from_pretrained(SUBJ_ONLY_MODEL, config=cfg2) # sentiment pipeline sentiment_pipe = pipeline("sentiment-analysis", model="cardiffnlp/twitter-xlm-roberta-base-sentiment", tokenizer="cardiffnlp/twitter-xlm-roberta-base-sentiment", top_k=None) def get_sentiment_scores(text): out = sentiment_pipe(text)[0] return {list(d.keys())[0]: list(d.values())[0] for d in out} # -- Prediction logic def analyze(text): # Tokenize inputs = tokenizer(text, truncation=True, padding=True, max_length=256, return_tensors='pt') # Sentiment scores = get_sentiment_scores(text) pos, neu, neg = scores['positive'], scores['neutral'], scores['negative'] # Model1 logits1 = model1(input_ids=inputs.input_ids, attention_mask=inputs.attention_mask, positive=torch.tensor([pos]), neutral=torch.tensor([neu]), negative=torch.tensor([neg])) p1 = torch.softmax(logits1, dim=1)[0] # Model2 logits2 = model2(input_ids=inputs.input_ids, attention_mask=inputs.attention_mask) p2 = torch.softmax(logits2, dim=1)[0] # Build results return { 'Positive': f"{pos:.2%}", 'Neutral': f"{neu:.2%}", 'Negative': f"{neg:.2%}", 'Sent-Subj OBJ': f"{p1[0]:.2%}", 'Sent-Subj SUBJ': f"{p1[1]:.2%}", 'TextOnly OBJ': f"{p2[0]:.2%}", 'TextOnly SUBJ': f"{p2[1]:.2%}" } # -- Build Gradio Dashboard with Blocks dark_theme = gr.themes.Dark() with gr.Blocks(theme=dark_theme, css=""" #result_table td { padding: 8px; font-size: 1rem; } #header { text-align: center; font-size: 2rem; font-weight: bold; margin-bottom: 10px; } """) as demo: gr.Markdown("