Spaces:

M2ai
/

MGTD-Demo

Running

App Files Files Community

minemaster01 commited on Jun 16

Commit

5050833

verified ·

1 Parent(s): 4c81942

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -11

app.py CHANGED Viewed

@@ -56,29 +56,76 @@ model = model.to(device)
 model.eval()
 # Inference function
 def get_word_classifications(text):
     text = " ".join(text.split(" ")[:2048])
     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
     inputs = {k: v.to(device) for k, v in inputs.items()}
     tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
     with torch.no_grad():
-        tags, _ = model(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])
     word_tags = []
     current_word = ""
-    current_tag = None
-    for token, tag in zip(tokens, tags[0]):
         if token in ["<s>", "</s>"]:
             continue
         if token.startswith("▁"):
             if current_word:
-                word_tags.append(str(current_tag))
             current_word = token[1:] if token != "▁" else ""
-            current_tag = tag
         else:
             current_word += token
     if current_word:
-        word_tags.append(str(current_tag))
-    return word_tags
 # HF logging setup
 def setup_hf_dataset():
@@ -93,7 +140,7 @@ def setup_hf_dataset():
 # Main inference + logging function
 def infer_and_log(text_input):
-    word_tags = get_word_classifications(text_input)
     timestamp = datetime.datetime.now().isoformat()
     submission_id = str(uuid.uuid4())
@@ -122,7 +169,7 @@ def infer_and_log(text_input):
         except Exception as e:
             print(f"Error uploading log: {e}")
-    return " ".join(word_tags)
 def clear_fields():
     return "", ""
@@ -136,12 +183,13 @@ with gr.Blocks() as app:
     with gr.Row():
         input_box = gr.Textbox(label="Input Text", lines=10)
-        output_box = gr.Textbox(label="Output Tags", lines=10, interactive=False)
     with gr.Row():
         submit_btn = gr.Button("Submit")
         clear_btn = gr.Button("Clear")
     submit_btn.click(fn=infer_and_log, inputs=input_box, outputs=output_box)
     clear_btn.click(fn=clear_fields, outputs=[input_box, output_box])

 model.eval()
 # Inference function
+# def get_word_classifications(text):
+#     text = " ".join(text.split(" ")[:2048])
+#     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+#     inputs = {k: v.to(device) for k, v in inputs.items()}
+#     tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
+#     with torch.no_grad():
+#         tags, _ = model(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])
+#     word_tags = []
+#     current_word = ""
+#     current_tag = ""
+#     for token, tag in zip(tokens, tags[0]):
+#         if token in ["<s>", "</s>"]:
+#             continue
+#         if token.startswith("▁"):
+#             if current_word:
+#                 word_tags.append(str(current_tag))
+#             current_word = token[1:] if token != "▁" else ""
+#             current_tag = tag
+#         else:
+#             current_word += token
+#     if current_word:
+#         word_tags.append(str(current_tag))
+#     return word_tags
 def get_word_classifications(text):
     text = " ".join(text.split(" ")[:2048])
     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
     inputs = {k: v.to(device) for k, v in inputs.items()}
     tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
     with torch.no_grad():
+        tags, emissions = model(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])
     word_tags = []
+    color_output = []
     current_word = ""
+    current_prob = 0.0
+    for token, prob in zip(tokens, tags[0]):
         if token in ["<s>", "</s>"]:
             continue
         if token.startswith("▁"):
             if current_word:
+                word_tags.append(round(current_prob, 3))
+                color = (
+                    "green" if current_prob < 0.25 else
+                    "yellow" if current_prob < 0.5 else
+                    "orange" if current_prob < 0.75 else
+                    "red"
+                )
+                color_output.append(f'<span style="color:{color}">{current_word}</span>')
             current_word = token[1:] if token != "▁" else ""
+            current_prob = prob
         else:
             current_word += token
+            current_prob = max(current_prob, prob)
     if current_word:
+        word_tags.append(round(current_prob, 3))
+        color = (
+            "green" if current_prob < 0.25 else
+            "yellow" if current_prob < 0.5 else
+            "orange" if current_prob < 0.75 else
+            "red"
+        )
+        color_output.append(f'<span style="color:{color}">{current_word}</span>')
+    output = " ".join(color_output)
+    return output, word_tags
 # HF logging setup
 def setup_hf_dataset():
 # Main inference + logging function
 def infer_and_log(text_input):
+    output, word_tags = get_word_classifications(text_input)
     timestamp = datetime.datetime.now().isoformat()
     submission_id = str(uuid.uuid4())
         except Exception as e:
             print(f"Error uploading log: {e}")
+    return output
 def clear_fields():
     return "", ""
     with gr.Row():
         input_box = gr.Textbox(label="Input Text", lines=10)
+        output_box = gr.HTML(label="Output Tags")
     with gr.Row():
         submit_btn = gr.Button("Submit")
         clear_btn = gr.Button("Clear")
     submit_btn.click(fn=infer_and_log, inputs=input_box, outputs=output_box)
     clear_btn.click(fn=clear_fields, outputs=[input_box, output_box])