Spaces:

koushikkumarkadari
/

hate-speech-detection

Sleeping

App Files Files Community

hate-speech-detection / app.py

koushikkumarkadari

Update app.py

96ff0d7 verified 2 months ago

raw

history blame

4.13 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AlbertForSequenceClassification
	import numpy as np
	import os
	import gdown
	import logging

	# Set up logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Define Google Drive folder IDs for each model
	model_drive_ids = {
	"sentiment": "your_sentiment_folder_id", # Replace with actual folder ID
	"emotion": "your_emotion_folder_id", # Replace with actual folder ID
	"hate_speech": "your_hate_speech_folder_id", # Replace with actual folder ID
	"sarcasm": "your_sarcasm_folder_id" # Replace with actual folder ID
	}

	# Define local directory to store downloaded models
	save_dir = "./saved_models"
	os.makedirs(save_dir, exist_ok=True)

	# Download models from Google Drive
	for task, folder_id in model_drive_ids.items():
	output_dir = os.path.join(save_dir, task)
	if not os.path.exists(output_dir):
	logger.info(f"Downloading {task} model from Google Drive...")
	try:
	gdown.download_folder(
	f"https://drive.google.com/drive/folders/1kEXKoJxxD5-0FO8WvtagzseSIC5q-rRY?usp=sharing/{folder_id}",
	output=output_dir,
	quiet=False
	)
	except Exception as e:
	logger.error(f"Failed to download {task} model: {str(e)}")
	raise

	# Define model paths
	tasks = ["sentiment", "emotion", "hate_speech", "sarcasm"]
	model_paths = {task: f"{save_dir}/{task}" for task in tasks}

	# Define label mappings
	label_mappings = {
	"sentiment": ["negative", "neutral", "positive"],
	"emotion": ["happy", "sad", "angry", "fear"],
	"hate_speech": ["no", "yes"],
	"sarcasm": ["no", "yes"]
	}

	# Load tokenizer with use_fast=False to avoid fast tokenizer issues
	try:
	logger.info("Loading tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert", use_fast=False)
	except Exception as e:
	logger.error(f"Failed to load tokenizer: {str(e)}")
	raise

	# Load all models
	models = {}
	for task in tasks:
	model_path = model_paths[task]
	if not os.path.exists(model_path):
	raise FileNotFoundError(f"Model directory {model_path} not found.")
	try:
	logger.info(f"Loading {task} model...")
	models[task] = AlbertForSequenceClassification.from_pretrained(model_path)
	except Exception as e:
	logger.error(f"Failed to load {task} model: {str(e)}")
	raise

	# Function to predict for a single task
	def predict_task(text, task, model, tokenizer, max_length=128):
	try:
	inputs = tokenizer(
	text,
	padding=True,
	truncation=True,
	max_length=max_length,
	return_tensors="pt"
	)

	with torch.no_grad():
	outputs = model(**inputs)
	logits = outputs.logits
	probabilities = torch.softmax(logits, dim=1).squeeze().cpu().numpy()

	labels = label_mappings[task]
	return {label: f"{prob*100:.2f}%" for label, prob in zip(labels, probabilities)}
	except Exception as e:
	logger.error(f"Error predicting for {task}: {str(e)}")
	return {label: "Error" for label in label_mappings[task]}

	# Gradio interface function
	def predict_all_tasks(text):
	if not text.strip():
	return "Please enter some text."

	results = {}
	for task in tasks:
	results[task] = predict_task(text, task, models[task], tokenizer)

	output = ""
	for task, probs in results.items():
	output += f"\n{task.capitalize()} Prediction:\n"
	for label, prob in probs.items():
	output += f" {label}: {prob}\n"

	return output

	# Create Gradio interface
	iface = gr.Interface(
	fn=predict_all_tasks,
	inputs=gr.Textbox(lines=2, placeholder="Enter Telugu text here..."),
	outputs="text",
	title="Telugu Text Analysis",
	description="Enter Telugu text to predict sentiment, emotion, hate speech, and sarcasm."
	)

	if __name__ == "__main__":
	logger.info("Launching Gradio interface...")
	iface.launch(server_name="0.0.0.0", server_port=7860)