Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
-
from transformers import AutoTokenizer, AlbertForSequenceClassification
|
4 |
import numpy as np
|
5 |
import os
|
6 |
import gdown
|
@@ -10,31 +10,49 @@ import logging
|
|
10 |
logging.basicConfig(level=logging.INFO)
|
11 |
logger = logging.getLogger(__name__)
|
12 |
|
13 |
-
# Define Google Drive
|
14 |
-
|
15 |
-
"sentiment":
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
}
|
20 |
|
21 |
# Define local directory to store downloaded models
|
22 |
save_dir = "./saved_models"
|
23 |
os.makedirs(save_dir, exist_ok=True)
|
24 |
|
25 |
-
# Download
|
26 |
-
for task,
|
27 |
output_dir = os.path.join(save_dir, task)
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
else:
|
37 |
-
logger.info(f"Model
|
38 |
|
39 |
# Define model paths
|
40 |
tasks = ["sentiment", "emotion", "hate_speech", "sarcasm"]
|
@@ -48,10 +66,11 @@ label_mappings = {
|
|
48 |
"sarcasm": ["no", "yes"]
|
49 |
}
|
50 |
|
51 |
-
# Load tokenizer
|
52 |
logger.info("Loading tokenizer...")
|
53 |
try:
|
54 |
-
|
|
|
55 |
except Exception as e:
|
56 |
logger.error(f"Failed to load tokenizer: {str(e)}")
|
57 |
raise
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
+
from transformers import AutoTokenizer, AlbertForSequenceClassification, AlbertTokenizer
|
4 |
import numpy as np
|
5 |
import os
|
6 |
import gdown
|
|
|
10 |
logging.basicConfig(level=logging.INFO)
|
11 |
logger = logging.getLogger(__name__)
|
12 |
|
13 |
+
# Define Google Drive file IDs for each model's config and safetensors
|
14 |
+
model_file_ids = {
|
15 |
+
"sentiment": {
|
16 |
+
"config": "11jwMJmQMGkiVZWBRQ5BLFyot1520FYIQ",
|
17 |
+
"model": "115N5yiu9lfw4uJE5YxHNoHauHeYSSusu"
|
18 |
+
},
|
19 |
+
"emotion": {
|
20 |
+
"config": "1dSxK10jbZyRpMDCm6MCRf9Jy0weOzLP9",
|
21 |
+
"model": "1Y3rTtPfo4zu28OhsRybdJF6czZN46I0Y"
|
22 |
+
},
|
23 |
+
"hate_speech": {
|
24 |
+
"config": "1QTejES8BZQs3qnxom9ymiZkLRUAZ91NP",
|
25 |
+
"model": "1ol2xO4XbdHwP_HHCYsnX8iVutA6javy_"
|
26 |
+
},
|
27 |
+
"sarcasm": {
|
28 |
+
"config": "1ypl0j1Yp_-0szR4-P1-0CMyDYBwUn5Wz",
|
29 |
+
"model": "1pbByLvTIHO_sT9HMeypvXbsdHsLVzTdk"
|
30 |
+
}
|
31 |
}
|
32 |
|
33 |
# Define local directory to store downloaded models
|
34 |
save_dir = "./saved_models"
|
35 |
os.makedirs(save_dir, exist_ok=True)
|
36 |
|
37 |
+
# Download individual model files
|
38 |
+
for task, files in model_file_ids.items():
|
39 |
output_dir = os.path.join(save_dir, task)
|
40 |
+
os.makedirs(output_dir, exist_ok=True)
|
41 |
+
|
42 |
+
config_path = os.path.join(output_dir, "config.json")
|
43 |
+
model_path = os.path.join(output_dir, "model.safetensors")
|
44 |
+
|
45 |
+
if not os.path.exists(config_path):
|
46 |
+
logger.info(f"Downloading {task} config.json from Google Drive...")
|
47 |
+
gdown.download(f"https://drive.google.com/uc?id={files['config']}", config_path, quiet=False)
|
48 |
+
else:
|
49 |
+
logger.info(f"Config for {task} already exists, skipping download.")
|
50 |
+
|
51 |
+
if not os.path.exists(model_path):
|
52 |
+
logger.info(f"Downloading {task} model.safetensors from Google Drive...")
|
53 |
+
gdown.download(f"https://drive.google.com/uc?id={files['model']}", model_path, quiet=False)
|
54 |
else:
|
55 |
+
logger.info(f"Model for {task} already exists, skipping download.")
|
56 |
|
57 |
# Define model paths
|
58 |
tasks = ["sentiment", "emotion", "hate_speech", "sarcasm"]
|
|
|
66 |
"sarcasm": ["no", "yes"]
|
67 |
}
|
68 |
|
69 |
+
# Load tokenizer
|
70 |
logger.info("Loading tokenizer...")
|
71 |
try:
|
72 |
+
# Explicitly use AlbertTokenizer with SentencePiece
|
73 |
+
tokenizer = AlbertTokenizer.from_pretrained("ai4bharat/indic-bert", use_fast=False)
|
74 |
except Exception as e:
|
75 |
logger.error(f"Failed to load tokenizer: {str(e)}")
|
76 |
raise
|