Spaces:

koushikkumarkadari
/

hate-speech-detection

Sleeping

App Files Files Community

koushikkumarkadari commited on Jul 2

Commit

d647001

verified ·

1 Parent(s): 96ff0d7

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -41

app.py CHANGED Viewed

@@ -10,12 +10,12 @@ import logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# Define Google Drive folder IDs for each model
 model_drive_ids = {
-    "sentiment": "your_sentiment_folder_id",  # Replace with actual folder ID
-    "emotion": "your_emotion_folder_id",      # Replace with actual folder ID
-    "hate_speech": "your_hate_speech_folder_id",  # Replace with actual folder ID
-    "sarcasm": "your_sarcasm_folder_id"       # Replace with actual folder ID
 }
 # Define local directory to store downloaded models
@@ -27,15 +27,14 @@ for task, folder_id in model_drive_ids.items():
     output_dir = os.path.join(save_dir, task)
     if not os.path.exists(output_dir):
         logger.info(f"Downloading {task} model from Google Drive...")
-        try:
-            gdown.download_folder(
-                f"https://drive.google.com/drive/folders/1kEXKoJxxD5-0FO8WvtagzseSIC5q-rRY?usp=sharing/{folder_id}",
-                output=output_dir,
-                quiet=False
-            )
-        except Exception as e:
-            logger.error(f"Failed to download {task} model: {str(e)}")
-            raise
 # Define model paths
 tasks = ["sentiment", "emotion", "hate_speech", "sarcasm"]
@@ -49,9 +48,9 @@ label_mappings = {
     "sarcasm": ["no", "yes"]
 }
-# Load tokenizer with use_fast=False to avoid fast tokenizer issues
 try:
-    logger.info("Loading tokenizer...")
     tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert", use_fast=False)
 except Exception as e:
     logger.error(f"Failed to load tokenizer: {str(e)}")
@@ -60,37 +59,32 @@ except Exception as e:
 # Load all models
 models = {}
 for task in tasks:
-    model_path = model_paths[task]
-    if not os.path.exists(model_path):
-        raise FileNotFoundError(f"Model directory {model_path} not found.")
     try:
-        logger.info(f"Loading {task} model...")
-        models[task] = AlbertForSequenceClassification.from_pretrained(model_path)
     except Exception as e:
-        logger.error(f"Failed to load {task} model: {str(e)}")
         raise
 # Function to predict for a single task
 def predict_task(text, task, model, tokenizer, max_length=128):
-    try:
-        inputs = tokenizer(
-            text,
-            padding=True,
-            truncation=True,
-            max_length=max_length,
-            return_tensors="pt"
-        )
-        with torch.no_grad():
-            outputs = model(**inputs)
-            logits = outputs.logits
-            probabilities = torch.softmax(logits, dim=1).squeeze().cpu().numpy()
-        labels = label_mappings[task]
-        return {label: f"{prob*100:.2f}%" for label, prob in zip(labels, probabilities)}
-    except Exception as e:
-        logger.error(f"Error predicting for {task}: {str(e)}")
-        return {label: "Error" for label in label_mappings[task]}
 # Gradio interface function
 def predict_all_tasks(text):

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Define Google Drive folder IDs for each model (use specific subfolder IDs)
 model_drive_ids = {
+    "sentiment": "1uHY8dme-adxXsq7KrqoHjT6jhCtHZ4xc",
+    "emotion": "1pHCJ2eqd9hHlfqNrRagV0sEszYwwQY2a",
+    "hate_speech": "1th6peD5GBtdSVdW9pPKAPRFn_I12RNiz",
+    "sarcasm": "1gjvxD7WoJx0V7AqtWPNFU_c4NmeFTRO8"
 }
 # Define local directory to store downloaded models
     output_dir = os.path.join(save_dir, task)
     if not os.path.exists(output_dir):
         logger.info(f"Downloading {task} model from Google Drive...")
+        gdown.download_folder(
+            f"https://drive.google.com/drive/folders/1kEXKoJxxD5-0FO8WvtagzseSIC5q-rRY?usp=sharing/{folder_id}",
+            output=output_dir,
+            quiet=False,
+            use_cookies=False
+        )
+    else:
+        logger.info(f"Model directory {output_dir} already exists, skipping download.")
 # Define model paths
 tasks = ["sentiment", "emotion", "hate_speech", "sarcasm"]
     "sarcasm": ["no", "yes"]
 }
+# Load tokenizer with use_fast=False to avoid tiktoken dependency
+logger.info("Loading tokenizer...")
 try:
     tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert", use_fast=False)
 except Exception as e:
     logger.error(f"Failed to load tokenizer: {str(e)}")
 # Load all models
 models = {}
 for task in tasks:
+    logger.info(f"Loading model for {task}...")
+    if not os.path.exists(model_paths[task]):
+        raise FileNotFoundError(f"Model directory {model_paths[task]} not found.")
     try:
+        models[task] = AlbertForSequenceClassification.from_pretrained(model_paths[task])
     except Exception as e:
+        logger.error(f"Failed to load model for {task}: {str(e)}")
         raise
 # Function to predict for a single task
 def predict_task(text, task, model, tokenizer, max_length=128):
+    inputs = tokenizer(
+        text,
+        padding=True,
+        truncation=True,
+        max_length=max_length,
+        return_tensors="pt"
+    )
+    with torch.no_grad():
+        outputs = model(**inputs)
+        logits = outputs.logits
+        probabilities = torch.softmax(logits, dim=1).squeeze().cpu().numpy()
+    labels = label_mappings[task]
+    return {label: f"{prob*100:.2f}%" for label, prob in zip(labels, probabilities)}
 # Gradio interface function
 def predict_all_tasks(text):