koushikkumarkadari commited on
Commit
e5e8bcf
·
verified ·
1 Parent(s): d647001

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -20
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoTokenizer, AlbertForSequenceClassification
4
  import numpy as np
5
  import os
6
  import gdown
@@ -10,31 +10,49 @@ import logging
10
  logging.basicConfig(level=logging.INFO)
11
  logger = logging.getLogger(__name__)
12
 
13
- # Define Google Drive folder IDs for each model (use specific subfolder IDs)
14
- model_drive_ids = {
15
- "sentiment": "1uHY8dme-adxXsq7KrqoHjT6jhCtHZ4xc",
16
- "emotion": "1pHCJ2eqd9hHlfqNrRagV0sEszYwwQY2a",
17
- "hate_speech": "1th6peD5GBtdSVdW9pPKAPRFn_I12RNiz",
18
- "sarcasm": "1gjvxD7WoJx0V7AqtWPNFU_c4NmeFTRO8"
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
 
21
  # Define local directory to store downloaded models
22
  save_dir = "./saved_models"
23
  os.makedirs(save_dir, exist_ok=True)
24
 
25
- # Download models from Google Drive
26
- for task, folder_id in model_drive_ids.items():
27
  output_dir = os.path.join(save_dir, task)
28
- if not os.path.exists(output_dir):
29
- logger.info(f"Downloading {task} model from Google Drive...")
30
- gdown.download_folder(
31
- f"https://drive.google.com/drive/folders/1kEXKoJxxD5-0FO8WvtagzseSIC5q-rRY?usp=sharing/{folder_id}",
32
- output=output_dir,
33
- quiet=False,
34
- use_cookies=False
35
- )
 
 
 
 
 
 
36
  else:
37
- logger.info(f"Model directory {output_dir} already exists, skipping download.")
38
 
39
  # Define model paths
40
  tasks = ["sentiment", "emotion", "hate_speech", "sarcasm"]
@@ -48,10 +66,11 @@ label_mappings = {
48
  "sarcasm": ["no", "yes"]
49
  }
50
 
51
- # Load tokenizer with use_fast=False to avoid tiktoken dependency
52
  logger.info("Loading tokenizer...")
53
  try:
54
- tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert", use_fast=False)
 
55
  except Exception as e:
56
  logger.error(f"Failed to load tokenizer: {str(e)}")
57
  raise
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoTokenizer, AlbertForSequenceClassification, AlbertTokenizer
4
  import numpy as np
5
  import os
6
  import gdown
 
10
  logging.basicConfig(level=logging.INFO)
11
  logger = logging.getLogger(__name__)
12
 
13
+ # Define Google Drive file IDs for each model's config and safetensors
14
+ model_file_ids = {
15
+ "sentiment": {
16
+ "config": "11jwMJmQMGkiVZWBRQ5BLFyot1520FYIQ",
17
+ "model": "115N5yiu9lfw4uJE5YxHNoHauHeYSSusu"
18
+ },
19
+ "emotion": {
20
+ "config": "1dSxK10jbZyRpMDCm6MCRf9Jy0weOzLP9",
21
+ "model": "1Y3rTtPfo4zu28OhsRybdJF6czZN46I0Y"
22
+ },
23
+ "hate_speech": {
24
+ "config": "1QTejES8BZQs3qnxom9ymiZkLRUAZ91NP",
25
+ "model": "1ol2xO4XbdHwP_HHCYsnX8iVutA6javy_"
26
+ },
27
+ "sarcasm": {
28
+ "config": "1ypl0j1Yp_-0szR4-P1-0CMyDYBwUn5Wz",
29
+ "model": "1pbByLvTIHO_sT9HMeypvXbsdHsLVzTdk"
30
+ }
31
  }
32
 
33
  # Define local directory to store downloaded models
34
  save_dir = "./saved_models"
35
  os.makedirs(save_dir, exist_ok=True)
36
 
37
+ # Download individual model files
38
+ for task, files in model_file_ids.items():
39
  output_dir = os.path.join(save_dir, task)
40
+ os.makedirs(output_dir, exist_ok=True)
41
+
42
+ config_path = os.path.join(output_dir, "config.json")
43
+ model_path = os.path.join(output_dir, "model.safetensors")
44
+
45
+ if not os.path.exists(config_path):
46
+ logger.info(f"Downloading {task} config.json from Google Drive...")
47
+ gdown.download(f"https://drive.google.com/uc?id={files['config']}", config_path, quiet=False)
48
+ else:
49
+ logger.info(f"Config for {task} already exists, skipping download.")
50
+
51
+ if not os.path.exists(model_path):
52
+ logger.info(f"Downloading {task} model.safetensors from Google Drive...")
53
+ gdown.download(f"https://drive.google.com/uc?id={files['model']}", model_path, quiet=False)
54
  else:
55
+ logger.info(f"Model for {task} already exists, skipping download.")
56
 
57
  # Define model paths
58
  tasks = ["sentiment", "emotion", "hate_speech", "sarcasm"]
 
66
  "sarcasm": ["no", "yes"]
67
  }
68
 
69
+ # Load tokenizer
70
  logger.info("Loading tokenizer...")
71
  try:
72
+ # Explicitly use AlbertTokenizer with SentencePiece
73
+ tokenizer = AlbertTokenizer.from_pretrained("ai4bharat/indic-bert", use_fast=False)
74
  except Exception as e:
75
  logger.error(f"Failed to load tokenizer: {str(e)}")
76
  raise