root commited on
Commit
c6e5246
·
1 Parent(s): 2d805fd

test mcp server gradio

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +88 -0
  3. requirements.txt +3 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .ipynb_checkpoints/
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForTokenClassification
3
+ from transformers import pipeline
4
+ import json
5
+
6
+ server_name = "0.0.0.0"
7
+
8
+ server_port = 8890
9
+
10
+ tokenizer = AutoTokenizer.from_pretrained("Babelscape/wikineural-multilingual-ner")
11
+ model = AutoModelForTokenClassification.from_pretrained("Babelscape/wikineural-multilingual-ner")
12
+
13
+ nlp = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
14
+
15
+ def group_cat(entities):
16
+ categories = {}
17
+ for item in entities:
18
+ group = item.get('entity_group')
19
+ if group not in categories:
20
+ categories[group] = [item]
21
+ else:
22
+ categories[group].append(item)
23
+ return categories
24
+
25
+
26
+
27
+
28
+ def ner(text: str) -> str:
29
+ """
30
+ Searches the input text for named entities and returns them organized by category.
31
+
32
+ Args:
33
+ text (str): The input text to analyze.
34
+
35
+ Returns:
36
+ str: A json string representing dictionary where each key is a named entity category (e.g., 'PER', 'ORG', 'LOC', etc.), and the corresponding value is a list of entities found in the text under that category.
37
+ """
38
+ max_len = tokenizer.model_max_length
39
+ stride = 50
40
+
41
+ # Tokenizza con overflow per gestire testi lunghi
42
+ inputs = tokenizer(
43
+ text,
44
+ return_overflowing_tokens=True,
45
+ stride=stride,
46
+ max_length=max_len,
47
+ truncation=True,
48
+ return_offsets_mapping=True,
49
+ padding=False
50
+ )
51
+
52
+ all_entities = []
53
+ seen = set() # Per deduplicare (word, start, end)
54
+
55
+ for input_ids in inputs["input_ids"]:
56
+ chunk_text = tokenizer.decode(input_ids, skip_special_tokens=True)
57
+ chunk_entities = nlp(chunk_text)
58
+
59
+ for ent in chunk_entities:
60
+ key = (ent["word"], ent["start"], ent["end"])
61
+ if key not in seen:
62
+ seen.add(key)
63
+ all_entities.append(ent)
64
+
65
+
66
+ ner_results =group_cat(all_entities)
67
+
68
+ cleaned = {}
69
+ for category, items in ner_results.items():
70
+ cleaned[category] = {}
71
+ for ent in items:
72
+ cleaned[category][ent["word"]] = float(ent["score"])
73
+
74
+ dict_ner = json.dumps(cleaned, indent=2, separators=(',', ': '), ensure_ascii=False)
75
+ return dict_ner
76
+
77
+ # Create a standard Gradio interface
78
+ demo = gr.Interface(
79
+ fn=ner,
80
+ inputs=["text"],
81
+ outputs="text",
82
+ title="NER",
83
+ description="Detect named entity within the text in input using the model Babelscape/wikineural - This interface works as MCP server as well."
84
+ )
85
+
86
+ # Launch both the Gradio web interface and the MCP server
87
+ if __name__ == "__main__":
88
+ demo.launch(server_name = server_name, server_port=server_port, mcp_server=True,)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==5.31.0
2
+ transformers==4.50.3
3
+ json