Spaces:

M2ai
/

MGTD-Demo

Running

App Files Files Community

minemaster01 commited on Jun 16

Commit

c9abe45

verified ·

1 Parent(s): 5a64fc3

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -57

app.py CHANGED Viewed

@@ -56,67 +56,59 @@ model = model.to(device)
 model.eval()
 # Inference function
 def get_word_probabilities(text):
-    try:
-        text = " ".join(text.split(" ")[:2048])
-    except Exception as e:
-        print("Error during text preprocessing:", e)
-        return []
-    try:
-        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
-        inputs = {k: v.to(device) for k, v in inputs.items()}
-    except Exception as e:
-        print("Error during tokenization or moving inputs to device:", e)
-        return []
-    try:
-        tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
-    except Exception as e:
-        print("Error during token conversion:", e)
-        return []
-    try:
-        with torch.no_grad():
-            tags, emission = model(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])
-    except Exception as e:
-        print("Error during model inference:", e)
-        return []
-    try:
-        probs = torch.softmax(emission, dim=-1)[0, :, 1].cpu().numpy()
-    except Exception as e:
-        print("Error during softmax or extracting class probabilities:", e)
-        return []
     word_probs = []
     current_word = ""
     current_probs = []
-    try:
-        for token, prob in zip(tokens, probs):
-            if token in ["<s>", "</s>"]:
-                continue
-            if token.startswith("▁"):
-                if current_word and current_probs:
-                    word_probs.append(sum(current_probs) / len(current_probs))
-                current_word = token[1:] if token != "▁" else ""
-                current_probs = [prob]
-            else:
-                current_word += token
-                current_probs.append(prob)
-        if current_word and current_probs:
-            word_probs.append(sum(current_probs) / len(current_probs))
-    except Exception as e:
-        print("Error during word aggregation:", e)
-        return []
-    word_probs = [float(p) for p in word_probs]
-    return word_probs
 # def get_word_classifications(text):
 #     text = " ".join(text.split(" ")[:2048])
@@ -186,7 +178,7 @@ def infer_and_log(text_input):
         "id": submission_id,
         "timestamp": timestamp,
         "input": text_input,
-        "output_tags": word_tags
     }
     os.makedirs("logs", exist_ok=True)
@@ -207,7 +199,7 @@ def infer_and_log(text_input):
         except Exception as e:
             print(f"Error uploading log: {e}")
-    return json.dumps(word_tags, indent=2)
 def clear_fields():

 model.eval()
 # Inference function
 def get_word_probabilities(text):
+    text = " ".join(text.split(" ")[:2048])
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+    inputs = {k: v.to(device) for k, v in inputs.items()}
+    tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
+    with torch.no_grad():
+        tags, emission = model(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])
+    probs = torch.softmax(emission, dim=-1)[0, :, 1].cpu().numpy()
     word_probs = []
+    word_colors = []
     current_word = ""
     current_probs = []
+    for token, prob in zip(tokens, probs):
+        if token in ["<s>", "</s>"]:
+            continue
+        if token.startswith("▁"):
+            if current_word and current_probs:
+                current_prob = sum(current_probs) / len(current_probs)
+                word_probs.append(current_prob)
+                # Determine color based on probability
+                color = (
+                    "green" if current_prob < 0.25 else
+                    "yellow" if current_prob < 0.5 else
+                    "orange" if current_prob < 0.75 else
+                    "red"
+                )
+                word_colors.append(color)
+            current_word = token[1:] if token != "▁" else ""
+            current_probs = [prob]
+        else:
+            current_word += token
+            current_probs.append(prob)
+    if current_word and current_probs:
+        current_prob = sum(current_probs) / len(current_probs)
+        word_probs.append(current_prob)
+        # Determine color for the last word
+        color = (
+            "green" if current_prob < 0.25 else
+            "yellow" if current_prob < 0.5 else
+            "orange" if current_prob < 0.75 else
+            "red"
+        )
+        word_colors.append(color)
+    word_probs = [float(p) for p in word_probs]
+    return word_probs,
 # def get_word_classifications(text):
 #     text = " ".join(text.split(" ")[:2048])
         "id": submission_id,
         "timestamp": timestamp,
         "input": text_input,
+        "output_probs": word_probs
     }
     os.makedirs("logs", exist_ok=True)
         except Exception as e:
             print(f"Error uploading log: {e}")
+    return json.dumps(word_probs, indent=2)
 def clear_fields():