Spaces:

M2ai
/

MGTD-Demo

Running

App Files Files Community

minemaster01 commited on Jun 16

Commit

ec5bad4

verified ·

1 Parent(s): 5050833

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -54

app.py CHANGED Viewed

@@ -56,75 +56,75 @@ model = model.to(device)
 model.eval()
 # Inference function
-# def get_word_classifications(text):
-#     text = " ".join(text.split(" ")[:2048])
-#     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
-#     inputs = {k: v.to(device) for k, v in inputs.items()}
-#     tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
-#     with torch.no_grad():
-#         tags, _ = model(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])
-#     word_tags = []
-#     current_word = ""
-#     current_tag = ""
-#     for token, tag in zip(tokens, tags[0]):
-#         if token in ["<s>", "</s>"]:
-#             continue
-#         if token.startswith("▁"):
-#             if current_word:
-#                 word_tags.append(str(current_tag))
-#             current_word = token[1:] if token != "▁" else ""
-#             current_tag = tag
-#         else:
-#             current_word += token
-#     if current_word:
-#         word_tags.append(str(current_tag))
-#     return word_tags
 def get_word_classifications(text):
     text = " ".join(text.split(" ")[:2048])
     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
     inputs = {k: v.to(device) for k, v in inputs.items()}
     tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
     with torch.no_grad():
-        tags, emissions = model(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])
     word_tags = []
-    color_output = []
     current_word = ""
-    current_prob = 0.0
-    for token, prob in zip(tokens, tags[0]):
         if token in ["<s>", "</s>"]:
             continue
         if token.startswith("▁"):
             if current_word:
-                word_tags.append(round(current_prob, 3))
-                color = (
-                    "green" if current_prob < 0.25 else
-                    "yellow" if current_prob < 0.5 else
-                    "orange" if current_prob < 0.75 else
-                    "red"
-                )
-                color_output.append(f'<span style="color:{color}">{current_word}</span>')
             current_word = token[1:] if token != "▁" else ""
-            current_prob = prob
         else:
             current_word += token
-            current_prob = max(current_prob, prob)
     if current_word:
-        word_tags.append(round(current_prob, 3))
-        color = (
-            "green" if current_prob < 0.25 else
-            "yellow" if current_prob < 0.5 else
-            "orange" if current_prob < 0.75 else
-            "red"
-        )
-        color_output.append(f'<span style="color:{color}">{current_word}</span>')
-    output = " ".join(color_output)
-    return output, word_tags
 # HF logging setup
@@ -140,7 +140,7 @@ def setup_hf_dataset():
 # Main inference + logging function
 def infer_and_log(text_input):
-    output, word_tags = get_word_classifications(text_input)
     timestamp = datetime.datetime.now().isoformat()
     submission_id = str(uuid.uuid4())
@@ -169,7 +169,7 @@ def infer_and_log(text_input):
         except Exception as e:
             print(f"Error uploading log: {e}")
-    return output
 def clear_fields():
     return "", ""

 model.eval()
 # Inference function
 def get_word_classifications(text):
     text = " ".join(text.split(" ")[:2048])
     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
     inputs = {k: v.to(device) for k, v in inputs.items()}
     tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
     with torch.no_grad():
+        tags, _ = model(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])
     word_tags = []
     current_word = ""
+    current_tag = ""
+    for token, tag in zip(tokens, tags[0]):
         if token in ["<s>", "</s>"]:
             continue
         if token.startswith("▁"):
             if current_word:
+                word_tags.append(str(current_tag))
             current_word = token[1:] if token != "▁" else ""
+            current_tag = tag
         else:
             current_word += token
     if current_word:
+        word_tags.append(str(current_tag))
+    return word_tags
+# def get_word_classifications(text):
+#     text = " ".join(text.split(" ")[:2048])
+#     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+#     inputs = {k: v.to(device) for k, v in inputs.items()}
+#     tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
+#     with torch.no_grad():
+#         tags, emissions = model(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])
+#     word_tags = []
+#     color_output = []
+#     current_word = ""
+#     current_prob = 0.0
+#     for token, prob in zip(tokens, tags[0]):
+#         if token in ["<s>", "</s>"]:
+#             continue
+#         if token.startswith("▁"):
+#             if current_word:
+#                 word_tags.append(round(current_prob, 3))
+#                 color = (
+#                     "green" if current_prob < 0.25 else
+#                     "yellow" if current_prob < 0.5 else
+#                     "orange" if current_prob < 0.75 else
+#                     "red"
+#                 )
+#                 color_output.append(f'<span style="color:{color}">{current_word}</span>')
+#             current_word = token[1:] if token != "▁" else ""
+#             current_prob = prob
+#         else:
+#             current_word += token
+#             current_prob = max(current_prob, prob)
+#     if current_word:
+#         word_tags.append(round(current_prob, 3))
+#         color = (
+#             "green" if current_prob < 0.25 else
+#             "yellow" if current_prob < 0.5 else
+#             "orange" if current_prob < 0.75 else
+#             "red"
+#         )
+#         color_output.append(f'<span style="color:{color}">{current_word}</span>')
+#     output = " ".join(color_output)
+#     return output, word_tags
 # HF logging setup
 # Main inference + logging function
 def infer_and_log(text_input):
+    word_tags = get_word_classifications(text_input)
     timestamp = datetime.datetime.now().isoformat()
     submission_id = str(uuid.uuid4())
         except Exception as e:
             print(f"Error uploading log: {e}")
+    return "".join(word_tags)
 def clear_fields():
     return "", ""