koushikkumarkadari commited on
Commit
96ff0d7
·
verified ·
1 Parent(s): ed7c2a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -25
app.py CHANGED
@@ -4,6 +4,11 @@ from transformers import AutoTokenizer, AlbertForSequenceClassification
4
  import numpy as np
5
  import os
6
  import gdown
 
 
 
 
 
7
 
8
  # Define Google Drive folder IDs for each model
9
  model_drive_ids = {
@@ -21,11 +26,16 @@ os.makedirs(save_dir, exist_ok=True)
21
  for task, folder_id in model_drive_ids.items():
22
  output_dir = os.path.join(save_dir, task)
23
  if not os.path.exists(output_dir):
24
- gdown.download_folder(
25
- f"https://drive.google.com/drive/folders/1kEXKoJxxD5-0FO8WvtagzseSIC5q-rRY?usp=sharing/{folder_id}",
26
- output=output_dir,
27
- quiet=False
28
- )
 
 
 
 
 
29
 
30
  # Define model paths
31
  tasks = ["sentiment", "emotion", "hate_speech", "sarcasm"]
@@ -39,33 +49,48 @@ label_mappings = {
39
  "sarcasm": ["no", "yes"]
40
  }
41
 
42
- # Load tokenizer
43
- tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert")
 
 
 
 
 
44
 
45
  # Load all models
46
  models = {}
47
  for task in tasks:
48
- if not os.path.exists(model_paths[task]):
49
- raise FileNotFoundError(f"Model directory {model_paths[task]} not found.")
50
- models[task] = AlbertForSequenceClassification.from_pretrained(model_paths[task])
 
 
 
 
 
 
51
 
52
  # Function to predict for a single task
53
  def predict_task(text, task, model, tokenizer, max_length=128):
54
- inputs = tokenizer(
55
- text,
56
- padding=True,
57
- truncation=True,
58
- max_length=max_length,
59
- return_tensors="pt"
60
- )
61
-
62
- with torch.no_grad():
63
- outputs = model(**inputs)
64
- logits = outputs.logits
65
- probabilities = torch.softmax(logits, dim=1).squeeze().cpu().numpy()
66
-
67
- labels = label_mappings[task]
68
- return {label: f"{prob*100:.2f}%" for label, prob in zip(labels, probabilities)}
 
 
 
 
69
 
70
  # Gradio interface function
71
  def predict_all_tasks(text):
@@ -94,4 +119,5 @@ iface = gr.Interface(
94
  )
95
 
96
  if __name__ == "__main__":
 
97
  iface.launch(server_name="0.0.0.0", server_port=7860)
 
4
  import numpy as np
5
  import os
6
  import gdown
7
+ import logging
8
+
9
+ # Set up logging
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
 
13
  # Define Google Drive folder IDs for each model
14
  model_drive_ids = {
 
26
  for task, folder_id in model_drive_ids.items():
27
  output_dir = os.path.join(save_dir, task)
28
  if not os.path.exists(output_dir):
29
+ logger.info(f"Downloading {task} model from Google Drive...")
30
+ try:
31
+ gdown.download_folder(
32
+ f"https://drive.google.com/drive/folders/1kEXKoJxxD5-0FO8WvtagzseSIC5q-rRY?usp=sharing/{folder_id}",
33
+ output=output_dir,
34
+ quiet=False
35
+ )
36
+ except Exception as e:
37
+ logger.error(f"Failed to download {task} model: {str(e)}")
38
+ raise
39
 
40
  # Define model paths
41
  tasks = ["sentiment", "emotion", "hate_speech", "sarcasm"]
 
49
  "sarcasm": ["no", "yes"]
50
  }
51
 
52
+ # Load tokenizer with use_fast=False to avoid fast tokenizer issues
53
+ try:
54
+ logger.info("Loading tokenizer...")
55
+ tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert", use_fast=False)
56
+ except Exception as e:
57
+ logger.error(f"Failed to load tokenizer: {str(e)}")
58
+ raise
59
 
60
  # Load all models
61
  models = {}
62
  for task in tasks:
63
+ model_path = model_paths[task]
64
+ if not os.path.exists(model_path):
65
+ raise FileNotFoundError(f"Model directory {model_path} not found.")
66
+ try:
67
+ logger.info(f"Loading {task} model...")
68
+ models[task] = AlbertForSequenceClassification.from_pretrained(model_path)
69
+ except Exception as e:
70
+ logger.error(f"Failed to load {task} model: {str(e)}")
71
+ raise
72
 
73
  # Function to predict for a single task
74
  def predict_task(text, task, model, tokenizer, max_length=128):
75
+ try:
76
+ inputs = tokenizer(
77
+ text,
78
+ padding=True,
79
+ truncation=True,
80
+ max_length=max_length,
81
+ return_tensors="pt"
82
+ )
83
+
84
+ with torch.no_grad():
85
+ outputs = model(**inputs)
86
+ logits = outputs.logits
87
+ probabilities = torch.softmax(logits, dim=1).squeeze().cpu().numpy()
88
+
89
+ labels = label_mappings[task]
90
+ return {label: f"{prob*100:.2f}%" for label, prob in zip(labels, probabilities)}
91
+ except Exception as e:
92
+ logger.error(f"Error predicting for {task}: {str(e)}")
93
+ return {label: "Error" for label in label_mappings[task]}
94
 
95
  # Gradio interface function
96
  def predict_all_tasks(text):
 
119
  )
120
 
121
  if __name__ == "__main__":
122
+ logger.info("Launching Gradio interface...")
123
  iface.launch(server_name="0.0.0.0", server_port=7860)