koushikkumarkadari commited on
Commit
d647001
·
verified ·
1 Parent(s): 96ff0d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -41
app.py CHANGED
@@ -10,12 +10,12 @@ import logging
10
  logging.basicConfig(level=logging.INFO)
11
  logger = logging.getLogger(__name__)
12
 
13
- # Define Google Drive folder IDs for each model
14
  model_drive_ids = {
15
- "sentiment": "your_sentiment_folder_id", # Replace with actual folder ID
16
- "emotion": "your_emotion_folder_id", # Replace with actual folder ID
17
- "hate_speech": "your_hate_speech_folder_id", # Replace with actual folder ID
18
- "sarcasm": "your_sarcasm_folder_id" # Replace with actual folder ID
19
  }
20
 
21
  # Define local directory to store downloaded models
@@ -27,15 +27,14 @@ for task, folder_id in model_drive_ids.items():
27
  output_dir = os.path.join(save_dir, task)
28
  if not os.path.exists(output_dir):
29
  logger.info(f"Downloading {task} model from Google Drive...")
30
- try:
31
- gdown.download_folder(
32
- f"https://drive.google.com/drive/folders/1kEXKoJxxD5-0FO8WvtagzseSIC5q-rRY?usp=sharing/{folder_id}",
33
- output=output_dir,
34
- quiet=False
35
- )
36
- except Exception as e:
37
- logger.error(f"Failed to download {task} model: {str(e)}")
38
- raise
39
 
40
  # Define model paths
41
  tasks = ["sentiment", "emotion", "hate_speech", "sarcasm"]
@@ -49,9 +48,9 @@ label_mappings = {
49
  "sarcasm": ["no", "yes"]
50
  }
51
 
52
- # Load tokenizer with use_fast=False to avoid fast tokenizer issues
 
53
  try:
54
- logger.info("Loading tokenizer...")
55
  tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert", use_fast=False)
56
  except Exception as e:
57
  logger.error(f"Failed to load tokenizer: {str(e)}")
@@ -60,37 +59,32 @@ except Exception as e:
60
  # Load all models
61
  models = {}
62
  for task in tasks:
63
- model_path = model_paths[task]
64
- if not os.path.exists(model_path):
65
- raise FileNotFoundError(f"Model directory {model_path} not found.")
66
  try:
67
- logger.info(f"Loading {task} model...")
68
- models[task] = AlbertForSequenceClassification.from_pretrained(model_path)
69
  except Exception as e:
70
- logger.error(f"Failed to load {task} model: {str(e)}")
71
  raise
72
 
73
  # Function to predict for a single task
74
  def predict_task(text, task, model, tokenizer, max_length=128):
75
- try:
76
- inputs = tokenizer(
77
- text,
78
- padding=True,
79
- truncation=True,
80
- max_length=max_length,
81
- return_tensors="pt"
82
- )
83
-
84
- with torch.no_grad():
85
- outputs = model(**inputs)
86
- logits = outputs.logits
87
- probabilities = torch.softmax(logits, dim=1).squeeze().cpu().numpy()
88
-
89
- labels = label_mappings[task]
90
- return {label: f"{prob*100:.2f}%" for label, prob in zip(labels, probabilities)}
91
- except Exception as e:
92
- logger.error(f"Error predicting for {task}: {str(e)}")
93
- return {label: "Error" for label in label_mappings[task]}
94
 
95
  # Gradio interface function
96
  def predict_all_tasks(text):
 
10
  logging.basicConfig(level=logging.INFO)
11
  logger = logging.getLogger(__name__)
12
 
13
+ # Define Google Drive folder IDs for each model (use specific subfolder IDs)
14
  model_drive_ids = {
15
+ "sentiment": "1uHY8dme-adxXsq7KrqoHjT6jhCtHZ4xc",
16
+ "emotion": "1pHCJ2eqd9hHlfqNrRagV0sEszYwwQY2a",
17
+ "hate_speech": "1th6peD5GBtdSVdW9pPKAPRFn_I12RNiz",
18
+ "sarcasm": "1gjvxD7WoJx0V7AqtWPNFU_c4NmeFTRO8"
19
  }
20
 
21
  # Define local directory to store downloaded models
 
27
  output_dir = os.path.join(save_dir, task)
28
  if not os.path.exists(output_dir):
29
  logger.info(f"Downloading {task} model from Google Drive...")
30
+ gdown.download_folder(
31
+ f"https://drive.google.com/drive/folders/1kEXKoJxxD5-0FO8WvtagzseSIC5q-rRY?usp=sharing/{folder_id}",
32
+ output=output_dir,
33
+ quiet=False,
34
+ use_cookies=False
35
+ )
36
+ else:
37
+ logger.info(f"Model directory {output_dir} already exists, skipping download.")
 
38
 
39
  # Define model paths
40
  tasks = ["sentiment", "emotion", "hate_speech", "sarcasm"]
 
48
  "sarcasm": ["no", "yes"]
49
  }
50
 
51
+ # Load tokenizer with use_fast=False to avoid tiktoken dependency
52
+ logger.info("Loading tokenizer...")
53
  try:
 
54
  tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert", use_fast=False)
55
  except Exception as e:
56
  logger.error(f"Failed to load tokenizer: {str(e)}")
 
59
  # Load all models
60
  models = {}
61
  for task in tasks:
62
+ logger.info(f"Loading model for {task}...")
63
+ if not os.path.exists(model_paths[task]):
64
+ raise FileNotFoundError(f"Model directory {model_paths[task]} not found.")
65
  try:
66
+ models[task] = AlbertForSequenceClassification.from_pretrained(model_paths[task])
 
67
  except Exception as e:
68
+ logger.error(f"Failed to load model for {task}: {str(e)}")
69
  raise
70
 
71
  # Function to predict for a single task
72
  def predict_task(text, task, model, tokenizer, max_length=128):
73
+ inputs = tokenizer(
74
+ text,
75
+ padding=True,
76
+ truncation=True,
77
+ max_length=max_length,
78
+ return_tensors="pt"
79
+ )
80
+
81
+ with torch.no_grad():
82
+ outputs = model(**inputs)
83
+ logits = outputs.logits
84
+ probabilities = torch.softmax(logits, dim=1).squeeze().cpu().numpy()
85
+
86
+ labels = label_mappings[task]
87
+ return {label: f"{prob*100:.2f}%" for label, prob in zip(labels, probabilities)}
 
 
 
 
88
 
89
  # Gradio interface function
90
  def predict_all_tasks(text):