import gradio as gr import torch from transformers import AutoTokenizer, AlbertForSequenceClassification, AlbertTokenizer import numpy as np import os import gdown import logging # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Define Google Drive file IDs for each model's config and safetensors model_file_ids = { "sentiment": { "config": "11jwMJmQMGkiVZWBRQ5BLFyot1520FYIQ", "model": "115N5yiu9lfw4uJE5YxHNoHauHeYSSusu" }, "emotion": { "config": "1dSxK10jbZyRpMDCm6MCRf9Jy0weOzLP9", "model": "1Y3rTtPfo4zu28OhsRybdJF6czZN46I0Y" }, "hate_speech": { "config": "1QTejES8BZQs3qnxom9ymiZkLRUAZ91NP", "model": "1ol2xO4XbdHwP_HHCYsnX8iVutA6javy_" }, "sarcasm": { "config": "1ypl0j1Yp_-0szR4-P1-0CMyDYBwUn5Wz", "model": "1pbByLvTIHO_sT9HMeypvXbsdHsLVzTdk" } } # Define local directory to store downloaded models save_dir = "./saved_models" os.makedirs(save_dir, exist_ok=True) # Download individual model files for task, files in model_file_ids.items(): output_dir = os.path.join(save_dir, task) os.makedirs(output_dir, exist_ok=True) config_path = os.path.join(output_dir, "config.json") model_path = os.path.join(output_dir, "model.safetensors") if not os.path.exists(config_path): logger.info(f"Downloading {task} config.json from Google Drive...") gdown.download(f"https://drive.google.com/uc?id={files['config']}", config_path, quiet=False) else: logger.info(f"Config for {task} already exists, skipping download.") if not os.path.exists(model_path): logger.info(f"Downloading {task} model.safetensors from Google Drive...") gdown.download(f"https://drive.google.com/uc?id={files['model']}", model_path, quiet=False) else: logger.info(f"Model for {task} already exists, skipping download.") # Define model paths tasks = ["sentiment", "emotion", "hate_speech", "sarcasm"] model_paths = {task: f"{save_dir}/{task}" for task in tasks} # Define label mappings label_mappings = { "sentiment": ["negative", "neutral", "positive"], "emotion": ["happy", "sad", "angry", "fear"], "hate_speech": ["no", "yes"], "sarcasm": ["no", "yes"] } # Load tokenizer logger.info("Loading tokenizer...") try: # Explicitly use AlbertTokenizer with SentencePiece tokenizer = AlbertTokenizer.from_pretrained("ai4bharat/indic-bert", use_fast=False) except Exception as e: logger.error(f"Failed to load tokenizer: {str(e)}") raise # Load all models models = {} for task in tasks: logger.info(f"Loading model for {task}...") if not os.path.exists(model_paths[task]): raise FileNotFoundError(f"Model directory {model_paths[task]} not found.") try: models[task] = AlbertForSequenceClassification.from_pretrained(model_paths[task]) except Exception as e: logger.error(f"Failed to load model for {task}: {str(e)}") raise # Function to predict for a single task def predict_task(text, task, model, tokenizer, max_length=128): inputs = tokenizer( text, padding=True, truncation=True, max_length=max_length, return_tensors="pt" ) with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits probabilities = torch.softmax(logits, dim=1).squeeze().cpu().numpy() labels = label_mappings[task] return {label: f"{prob*100:.2f}%" for label, prob in zip(labels, probabilities)} # Gradio interface function def predict_all_tasks(text): if not text.strip(): return "Please enter some text." results = {} for task in tasks: results[task] = predict_task(text, task, models[task], tokenizer) output = "" for task, probs in results.items(): output += f"\n{task.capitalize()} Prediction:\n" for label, prob in probs.items(): output += f" {label}: {prob}\n" return output # Create Gradio interface iface = gr.Interface( fn=predict_all_tasks, inputs=gr.Textbox(lines=2, placeholder="Enter Telugu text here..."), outputs="text", title="Telugu Text Analysis", description="Enter Telugu text to predict sentiment, emotion, hate speech, and sarcasm." ) if __name__ == "__main__": logger.info("Launching Gradio interface...") iface.launch(server_name="0.0.0.0", server_port=7860)