Spaces:

raicrits
/

NER_mcp

Sleeping

App Files Files Community

root commited on Jun 3

Commit

c6e5246

1 Parent(s): 2d805fd

test mcp server gradio

Browse files

Files changed (3) hide show

.gitignore +1 -0
app.py +88 -0
requirements.txt +3 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .ipynb_checkpoints/

app.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForTokenClassification
+from transformers import pipeline
+import json
+server_name = "0.0.0.0"
+server_port = 8890
+tokenizer = AutoTokenizer.from_pretrained("Babelscape/wikineural-multilingual-ner")
+model = AutoModelForTokenClassification.from_pretrained("Babelscape/wikineural-multilingual-ner")
+nlp = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
+def group_cat(entities):
+    categories = {}
+    for item in entities:
+        group = item.get('entity_group')
+        if group not in categories:
+            categories[group] = [item]
+        else:
+            categories[group].append(item)
+    return categories
+def ner(text: str) -> str:
+    """
+    Searches the input text for named entities and returns them organized by category.
+    Args:
+        text (str): The input text to analyze.
+    Returns:
+        str: A json string representing dictionary where each key is a named entity category (e.g., 'PER', 'ORG', 'LOC', etc.), and the corresponding value is a list of entities found in the text under that category.
+    """
+    max_len = tokenizer.model_max_length
+    stride = 50
+    # Tokenizza con overflow per gestire testi lunghi
+    inputs = tokenizer(
+        text,
+        return_overflowing_tokens=True,
+        stride=stride,
+        max_length=max_len,
+        truncation=True,
+        return_offsets_mapping=True,
+        padding=False
+    )
+    all_entities = []
+    seen = set()  # Per deduplicare (word, start, end)
+    for input_ids in inputs["input_ids"]:
+        chunk_text = tokenizer.decode(input_ids, skip_special_tokens=True)
+        chunk_entities = nlp(chunk_text)
+        for ent in chunk_entities:
+            key = (ent["word"], ent["start"], ent["end"])
+            if key not in seen:
+                seen.add(key)
+                all_entities.append(ent)
+    ner_results =group_cat(all_entities)
+    cleaned = {}
+    for category, items in ner_results.items():
+        cleaned[category] = {}
+        for ent in items:
+            cleaned[category][ent["word"]] = float(ent["score"])
+    dict_ner = json.dumps(cleaned, indent=2, separators=(',', ': '), ensure_ascii=False)
+    return dict_ner
+# Create a standard Gradio interface
+demo = gr.Interface(
+    fn=ner,
+    inputs=["text"],
+    outputs="text",
+    title="NER",
+    description="Detect named entity within the text in input using the model Babelscape/wikineural - This interface works as MCP server as well."
+)
+# Launch both the Gradio web interface and the MCP server
+if __name__ == "__main__":
+    demo.launch(server_name = server_name, server_port=server_port, mcp_server=True,)

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio==5.31.0
+transformers==4.50.3
+json