Spaces:

koushikkumarkadari
/

hate-speech-detection

Sleeping

App Files Files Community

hate-speech-detection / app.py

koushikkumarkadari

Update app.py

e5e8bcf verified about 1 month ago

raw

history blame contribute delete

4.47 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AlbertForSequenceClassification, AlbertTokenizer
	import numpy as np
	import os
	import gdown
	import logging

	# Set up logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Define Google Drive file IDs for each model's config and safetensors
	model_file_ids = {
	"sentiment": {
	"config": "11jwMJmQMGkiVZWBRQ5BLFyot1520FYIQ",
	"model": "115N5yiu9lfw4uJE5YxHNoHauHeYSSusu"
	},
	"emotion": {
	"config": "1dSxK10jbZyRpMDCm6MCRf9Jy0weOzLP9",
	"model": "1Y3rTtPfo4zu28OhsRybdJF6czZN46I0Y"
	},
	"hate_speech": {
	"config": "1QTejES8BZQs3qnxom9ymiZkLRUAZ91NP",
	"model": "1ol2xO4XbdHwP_HHCYsnX8iVutA6javy_"
	},
	"sarcasm": {
	"config": "1ypl0j1Yp_-0szR4-P1-0CMyDYBwUn5Wz",
	"model": "1pbByLvTIHO_sT9HMeypvXbsdHsLVzTdk"
	}
	}

	# Define local directory to store downloaded models
	save_dir = "./saved_models"
	os.makedirs(save_dir, exist_ok=True)

	# Download individual model files
	for task, files in model_file_ids.items():
	output_dir = os.path.join(save_dir, task)
	os.makedirs(output_dir, exist_ok=True)

	config_path = os.path.join(output_dir, "config.json")
	model_path = os.path.join(output_dir, "model.safetensors")

	if not os.path.exists(config_path):
	logger.info(f"Downloading {task} config.json from Google Drive...")
	gdown.download(f"https://drive.google.com/uc?id={files['config']}", config_path, quiet=False)
	else:
	logger.info(f"Config for {task} already exists, skipping download.")

	if not os.path.exists(model_path):
	logger.info(f"Downloading {task} model.safetensors from Google Drive...")
	gdown.download(f"https://drive.google.com/uc?id={files['model']}", model_path, quiet=False)
	else:
	logger.info(f"Model for {task} already exists, skipping download.")

	# Define model paths
	tasks = ["sentiment", "emotion", "hate_speech", "sarcasm"]
	model_paths = {task: f"{save_dir}/{task}" for task in tasks}

	# Define label mappings
	label_mappings = {
	"sentiment": ["negative", "neutral", "positive"],
	"emotion": ["happy", "sad", "angry", "fear"],
	"hate_speech": ["no", "yes"],
	"sarcasm": ["no", "yes"]
	}

	# Load tokenizer
	logger.info("Loading tokenizer...")
	try:
	# Explicitly use AlbertTokenizer with SentencePiece
	tokenizer = AlbertTokenizer.from_pretrained("ai4bharat/indic-bert", use_fast=False)
	except Exception as e:
	logger.error(f"Failed to load tokenizer: {str(e)}")
	raise

	# Load all models
	models = {}
	for task in tasks:
	logger.info(f"Loading model for {task}...")
	if not os.path.exists(model_paths[task]):
	raise FileNotFoundError(f"Model directory {model_paths[task]} not found.")
	try:
	models[task] = AlbertForSequenceClassification.from_pretrained(model_paths[task])
	except Exception as e:
	logger.error(f"Failed to load model for {task}: {str(e)}")
	raise

	# Function to predict for a single task
	def predict_task(text, task, model, tokenizer, max_length=128):
	inputs = tokenizer(
	text,
	padding=True,
	truncation=True,
	max_length=max_length,
	return_tensors="pt"
	)

	with torch.no_grad():
	outputs = model(**inputs)
	logits = outputs.logits
	probabilities = torch.softmax(logits, dim=1).squeeze().cpu().numpy()

	labels = label_mappings[task]
	return {label: f"{prob*100:.2f}%" for label, prob in zip(labels, probabilities)}

	# Gradio interface function
	def predict_all_tasks(text):
	if not text.strip():
	return "Please enter some text."

	results = {}
	for task in tasks:
	results[task] = predict_task(text, task, models[task], tokenizer)

	output = ""
	for task, probs in results.items():
	output += f"\n{task.capitalize()} Prediction:\n"
	for label, prob in probs.items():
	output += f" {label}: {prob}\n"

	return output

	# Create Gradio interface
	iface = gr.Interface(
	fn=predict_all_tasks,
	inputs=gr.Textbox(lines=2, placeholder="Enter Telugu text here..."),
	outputs="text",
	title="Telugu Text Analysis",
	description="Enter Telugu text to predict sentiment, emotion, hate speech, and sarcasm."
	)

	if __name__ == "__main__":
	logger.info("Launching Gradio interface...")
	iface.launch(server_name="0.0.0.0", server_port=7860)