import gradio as gr import torch from transformers import DebertaV2Model, DebertaV2Config, AutoTokenizer, PreTrainedModel from transformers.models.deberta.modeling_deberta import ContextPooler from transformers import pipeline, AutoModelForSequenceClassification import torch.nn as nn # Define the model and tokenizer model_card = "microsoft/mdeberta-v3-base" subjectivity_only_model = "MatteoFasulo/mdeberta-v3-base-subjectivity-multilingual-no-arabic" sentiment_model = "MatteoFasulo/mdeberta-v3-base-subjectivity-sentiment-multilingual-no-arabic" # Define some examples for the Gradio interface (cached to run on-the-fly) examples = [ ['Example1'], ['Example2'], ['Example3'], ] # Custom model class for combining sentiment analysis with subjectivity detection class CustomModel(PreTrainedModel): config_class = DebertaV2Config def __init__(self, config, sentiment_dim=3, num_labels=2, *args, **kwargs): super().__init__(config, *args, **kwargs) self.deberta = DebertaV2Model(config) self.pooler = ContextPooler(config) output_dim = self.pooler.output_dim self.dropout = nn.Dropout(0.1) self.classifier = nn.Linear(output_dim + sentiment_dim, num_labels) def forward(self, input_ids, positive, neutral, negative, token_type_ids=None, attention_mask=None, labels=None): outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask) encoder_layer = outputs[0] pooled_output = self.pooler(encoder_layer) # Sentiment features as a single tensor sentiment_features = torch.stack((positive, neutral, negative), dim=1) # Shape: (batch_size, 3) # Combine CLS embedding with sentiment features combined_features = torch.cat((pooled_output, sentiment_features), dim=1) # Classification head logits = self.classifier(self.dropout(combined_features)) return {'logits': logits} # Load the pre-trained tokenizer def load_tokenizer(model_name: str): return AutoTokenizer.from_pretrained(model_name) # Load the pre-trained model def load_model(model_name: str): if 'sentiment' in model_name: config = DebertaV2Config.from_pretrained( model_name, num_labels=2, id2label={0: 'OBJ', 1: 'SUBJ'}, label2id={'OBJ': 0, 'SUBJ': 1}, output_attentions=False, output_hidden_states=False ) model = CustomModel(config=config, sentiment_dim=3, num_labels=2).from_pretrained(model_name) else: model = AutoModelForSequenceClassification.from_pretrained( model_name, num_labels=2, id2label={0: 'OBJ', 1: 'SUBJ'}, label2id={'OBJ': 0, 'SUBJ': 1}, output_attentions=False, output_hidden_states=False ) return model # Get sentiment values using a pre-trained sentiment analysis model def get_sentiment_values(text: str): pipe = pipeline("sentiment-analysis", model="cardiffnlp/twitter-xlm-roberta-base-sentiment", tokenizer="cardiffnlp/twitter-xlm-roberta-base-sentiment", top_k=None) sentiments = pipe(text)[0] return {k:v for k,v in [(list(sentiment.values())[0], list(sentiment.values())[1]) for sentiment in sentiments]} # Modify the predict_subjectivity function to return additional information def analyze(text): # Extract sentiment values sentiment_values = get_sentiment_values(text) # Load the tokenizer and model tokenizer = load_tokenizer(model_card) sentiment_model = load_model(sentiment_model) subjectivity_model = load_model(subjectivity_only_model) # Tokenize inputs = tokenizer(text, padding=True, truncation=True, max_length=256, return_tensors='pt') # Get the sentiment values positive = sentiment_values['positive'] neutral = sentiment_values['neutral'] negative = sentiment_values['negative'] # Convert sentiment values to tensors inputs['positive'] = torch.tensor(positive).unsqueeze(0) inputs['neutral'] = torch.tensor(neutral).unsqueeze(0) inputs['negative'] = torch.tensor(negative).unsqueeze(0) # Get the sentiment model outputs outputs1 = sentiment_model(**inputs) logits1 = outputs1.get('logits') # Calculate probabilities using softmax p1 = torch.nn.functional.softmax(logits1, dim=1)[0] # Get the subjectivity model outputs outputs2 = subjectivity_model(**inputs) logits2 = outputs2.get('logits') # Calculate probabilities using softmax p2 = torch.nn.functional.softmax(logits2, dim=1)[0] # Format the output return { 'Positive': f"{positive:.2%}", 'Neutral': f"{neutral:.2%}", 'Negative': f"{negative:.2%}", 'Sent-Subj OBJ': f"{p1[0]:.2%}", 'Sent-Subj SUBJ': f"{p1[1]:.2%}", 'TextOnly OBJ': f"{p2[0]:.2%}", 'TextOnly SUBJ': f"{p2[1]:.2%}" } # Update the Gradio interface with gr.Blocks(theme=gr.themes.Soft(), css=""" #result_table td { padding: 8px; font-size: 1rem; } #header { text-align: center; font-size: 2rem; font-weight: bold; margin-bottom: 10px; } """) as demo: gr.Markdown("