Spaces:

M2ai
/

MGTD-Demo

Running

App Files Files Community

minemaster01 commited on Jun 16

Commit

1f36bf0

verified ·

1 Parent(s): 142cc53

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -22

app.py CHANGED Viewed

@@ -58,35 +58,63 @@ model.eval()
 # Inference function
 def get_word_probabilities(text):
-    text = " ".join(text.split(" ")[:2048])
-    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
-    inputs = {k: v.to(device) for k, v in inputs.items()}
-    tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
-    with torch.no_grad():
-        tags, emission = model(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])
-    probs = torch.softmax(emission, dim=-1)[0, :, 1].cpu().numpy()
     word_probs = []
     current_word = ""
     current_probs = []
-    for token, prob in zip(tokens, probs):
-        if token in ["<s>", "</s>"]:
-            continue
-        if token.startswith("▁"):
-            if current_word and current_probs:
-                word_probs.append(sum(current_probs) / len(current_probs))
-            current_word = token[1:] if token != "▁" else ""
-            current_probs = [prob]
-        else:
-            current_word += token
-            current_probs.append(prob)
-    if current_word and current_probs:
-        word_probs.append(sum(current_probs) / len(current_probs))
     return word_probs
 # def get_word_classifications(text):

 # Inference function
 def get_word_probabilities(text):
+    try:
+        text = " ".join(text.split(" ")[:2048])
+    except Exception as e:
+        print("Error during text preprocessing:", e)
+        return []
+    try:
+        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+        inputs = {k: v.to(device) for k, v in inputs.items()}
+    except Exception as e:
+        print("Error during tokenization or moving inputs to device:", e)
+        return []
+    try:
+        tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
+    except Exception as e:
+        print("Error during token conversion:", e)
+        return []
+    try:
+        with torch.no_grad():
+            tags, emission = model(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])
+    except Exception as e:
+        print("Error during model inference:", e)
+        return []
+    try:
+        probs = torch.softmax(emission, dim=-1)[0, :, 1].cpu().numpy()
+    except Exception as e:
+        print("Error during softmax or extracting class probabilities:", e)
+        return []
     word_probs = []
     current_word = ""
     current_probs = []
+    try:
+        for token, prob in zip(tokens, probs):
+            if token in ["<s>", "</s>"]:
+                continue
+            if token.startswith("▁"):
+                if current_word and current_probs:
+                    word_probs.append(sum(current_probs) / len(current_probs))
+                current_word = token[1:] if token != "▁" else ""
+                current_probs = [prob]
+            else:
+                current_word += token
+                current_probs.append(prob)
+        if current_word and current_probs:
+            word_probs.append(sum(current_probs) / len(current_probs))
+    except Exception as e:
+        print("Error during word aggregation:", e)
+        return []
     return word_probs
 # def get_word_classifications(text):