|
import gradio as gr |
|
import torch |
|
from transformers import AutoTokenizer, AlbertForSequenceClassification |
|
import numpy as np |
|
import os |
|
import gdown |
|
|
|
|
|
model_drive_ids = { |
|
"sentiment": "your_sentiment_folder_id", |
|
"emotion": "your_emotion_folder_id", |
|
"hate_speech": "your_hate_speech_folder_id", |
|
"sarcasm": "your_sarcasm_folder_id" |
|
} |
|
|
|
|
|
save_dir = "./saved_models" |
|
os.makedirs(save_dir, exist_ok=True) |
|
|
|
|
|
for task, folder_id in model_drive_ids.items(): |
|
output_dir = os.path.join(save_dir, task) |
|
if not os.path.exists(output_dir): |
|
gdown.download_folder( |
|
f"https://drive.google.com/drive/folders/1kEXKoJxxD5-0FO8WvtagzseSIC5q-rRY?usp=sharing/{folder_id}", |
|
output=output_dir, |
|
quiet=False |
|
) |
|
|
|
|
|
tasks = ["sentiment", "emotion", "hate_speech", "sarcasm"] |
|
model_paths = {task: f"{save_dir}/{task}" for task in tasks} |
|
|
|
|
|
label_mappings = { |
|
"sentiment": ["negative", "neutral", "positive"], |
|
"emotion": ["happy", "sad", "angry", "fear"], |
|
"hate_speech": ["no", "yes"], |
|
"sarcasm": ["no", "yes"] |
|
} |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert") |
|
|
|
|
|
models = {} |
|
for task in tasks: |
|
if not os.path.exists(model_paths[task]): |
|
raise FileNotFoundError(f"Model directory {model_paths[task]} not found.") |
|
models[task] = AlbertForSequenceClassification.from_pretrained(model_paths[task]) |
|
|
|
|
|
def predict_task(text, task, model, tokenizer, max_length=128): |
|
inputs = tokenizer( |
|
text, |
|
padding=True, |
|
truncation=True, |
|
max_length=max_length, |
|
return_tensors="pt" |
|
) |
|
|
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
logits = outputs.logits |
|
probabilities = torch.softmax(logits, dim=1).squeeze().cpu().numpy() |
|
|
|
labels = label_mappings[task] |
|
return {label: f"{prob*100:.2f}%" for label, prob in zip(labels, probabilities)} |
|
|
|
|
|
def predict_all_tasks(text): |
|
if not text.strip(): |
|
return "Please enter some text." |
|
|
|
results = {} |
|
for task in tasks: |
|
results[task] = predict_task(text, task, models[task], tokenizer) |
|
|
|
output = "" |
|
for task, probs in results.items(): |
|
output += f"\n{task.capitalize()} Prediction:\n" |
|
for label, prob in probs.items(): |
|
output += f" {label}: {prob}\n" |
|
|
|
return output |
|
|
|
|
|
iface = gr.Interface( |
|
fn=predict_all_tasks, |
|
inputs=gr.Textbox(lines=2, placeholder="Enter Telugu text here..."), |
|
outputs="text", |
|
title="Telugu Text Analysis", |
|
description="Enter Telugu text to predict sentiment, emotion, hate speech, and sarcasm." |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch(server_name="0.0.0.0", server_port=7860) |