koyu008 commited on
Commit
fe3311f
·
verified ·
1 Parent(s): 2b470ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -18
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from fastapi import FastAPI, Request
2
  from pydantic import BaseModel
3
  from langdetect import detect
4
  import torch
@@ -7,22 +7,27 @@ from transformers import DistilBertModel, AutoModel, AutoTokenizer, DistilBertTo
7
  from huggingface_hub import snapshot_download
8
  import os
9
 
 
10
  app = FastAPI()
 
11
 
12
- # Use local cache folder for downloaded models
13
- os.environ["TRANSFORMERS_CACHE"] = "/app/.hf_cache"
14
- os.makedirs("/app/.hf_cache", exist_ok=True)
 
15
 
16
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
17
 
18
- # -------------------------------
19
- # Model Classes
20
- # -------------------------------
21
 
22
  class ToxicBERT(nn.Module):
23
  def __init__(self):
24
  super().__init__()
25
- self.bert = DistilBertModel.from_pretrained(snapshot_download("koyu008/English_Toxic_Classifier"))
26
  self.dropout = nn.Dropout(0.3)
27
  self.classifier = nn.Linear(self.bert.config.hidden_size, 6)
28
 
@@ -30,10 +35,11 @@ class ToxicBERT(nn.Module):
30
  output = self.bert(input_ids=input_ids, attention_mask=attention_mask).last_hidden_state[:, 0]
31
  return self.classifier(self.dropout(output))
32
 
 
33
  class HinglishToxicClassifier(nn.Module):
34
  def __init__(self):
35
  super().__init__()
36
- self.bert = AutoModel.from_pretrained(snapshot_download("koyu008/Hinglish_comment_classifier"))
37
  hidden_size = self.bert.config.hidden_size
38
  self.pool = lambda hidden: torch.cat([
39
  hidden.mean(dim=1),
@@ -52,9 +58,9 @@ class HinglishToxicClassifier(nn.Module):
52
  x = self.bottleneck(pooled)
53
  return self.classifier(x)
54
 
55
- # -------------------------------
56
- # Load Models and Tokenizers
57
- # -------------------------------
58
 
59
  english_model = ToxicBERT().to(device)
60
  english_model.load_state_dict(torch.load("bert_toxic_classifier.pt", map_location=device))
@@ -66,9 +72,9 @@ hinglish_model.load_state_dict(torch.load("best_hinglish_model.pt", map_location
66
  hinglish_model.eval()
67
  hinglish_tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
68
 
69
- # -------------------------------
70
- # Request & Inference
71
- # -------------------------------
72
 
73
  class InputText(BaseModel):
74
  text: str
@@ -83,10 +89,18 @@ async def predict(input: InputText):
83
  with torch.no_grad():
84
  logits = english_model(**inputs)
85
  probs = torch.softmax(logits, dim=1).cpu().numpy().tolist()[0]
86
- return {"language": "english", "classes": ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"], "probabilities": probs}
 
 
 
 
87
  else:
88
  inputs = hinglish_tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
89
  with torch.no_grad():
90
  logits = hinglish_model(**inputs)
91
  probs = torch.softmax(logits, dim=1).cpu().numpy().tolist()[0]
92
- return {"language": "hinglish", "classes": ["toxic", "non-toxic"], "probabilities": probs}
 
 
 
 
 
1
+ from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from langdetect import detect
4
  import torch
 
7
  from huggingface_hub import snapshot_download
8
  import os
9
 
10
+ # App and device
11
  app = FastAPI()
12
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
 
14
+ # Create safe local cache directory
15
+ hf_cache_dir = "./hf_cache"
16
+ os.makedirs(hf_cache_dir, exist_ok=True)
17
+ os.environ["TRANSFORMERS_CACHE"] = hf_cache_dir
18
 
19
+ # Download model repositories to local path
20
+ english_path = snapshot_download("koyu008/English_Toxic_Classifier", cache_dir=hf_cache_dir)
21
+ hinglish_path = snapshot_download("koyu008/Hinglish_comment_classifier", cache_dir=hf_cache_dir)
22
 
23
+ # ----------------------------
24
+ # Model classes
25
+ # ----------------------------
26
 
27
  class ToxicBERT(nn.Module):
28
  def __init__(self):
29
  super().__init__()
30
+ self.bert = DistilBertModel.from_pretrained(english_path)
31
  self.dropout = nn.Dropout(0.3)
32
  self.classifier = nn.Linear(self.bert.config.hidden_size, 6)
33
 
 
35
  output = self.bert(input_ids=input_ids, attention_mask=attention_mask).last_hidden_state[:, 0]
36
  return self.classifier(self.dropout(output))
37
 
38
+
39
  class HinglishToxicClassifier(nn.Module):
40
  def __init__(self):
41
  super().__init__()
42
+ self.bert = AutoModel.from_pretrained(hinglish_path)
43
  hidden_size = self.bert.config.hidden_size
44
  self.pool = lambda hidden: torch.cat([
45
  hidden.mean(dim=1),
 
58
  x = self.bottleneck(pooled)
59
  return self.classifier(x)
60
 
61
+ # ----------------------------
62
+ # Load Models & Tokenizers
63
+ # ----------------------------
64
 
65
  english_model = ToxicBERT().to(device)
66
  english_model.load_state_dict(torch.load("bert_toxic_classifier.pt", map_location=device))
 
72
  hinglish_model.eval()
73
  hinglish_tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
74
 
75
+ # ----------------------------
76
+ # API
77
+ # ----------------------------
78
 
79
  class InputText(BaseModel):
80
  text: str
 
89
  with torch.no_grad():
90
  logits = english_model(**inputs)
91
  probs = torch.softmax(logits, dim=1).cpu().numpy().tolist()[0]
92
+ return {
93
+ "language": "english",
94
+ "classes": ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"],
95
+ "probabilities": probs
96
+ }
97
  else:
98
  inputs = hinglish_tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
99
  with torch.no_grad():
100
  logits = hinglish_model(**inputs)
101
  probs = torch.softmax(logits, dim=1).cpu().numpy().tolist()[0]
102
+ return {
103
+ "language": "hinglish",
104
+ "classes": ["toxic", "non-toxic"],
105
+ "probabilities": probs
106
+ }