|
|
|
|
|
from transformers import pipeline |
|
from transformers import Tool |
|
|
|
class NamedEntityRecognitionTool(Tool): |
|
name = "ner_tool" |
|
description = "Identifies and labels various entities in a given text." |
|
inputs = ["text"] |
|
outputs = ["text"] |
|
|
|
def __call__(self, text: str): |
|
|
|
ner_analyzer = pipeline("ner") |
|
|
|
|
|
entities = ner_analyzer(text) |
|
|
|
|
|
word_entities = [] |
|
|
|
|
|
current_word = "" |
|
current_label = None |
|
|
|
for entity in entities: |
|
label = entity.get("entity", "UNKNOWN") |
|
word = entity.get("word", "") |
|
start = entity.get("start", -1) |
|
end = entity.get("end", -1) |
|
|
|
|
|
entity_text = text[start:end].strip() |
|
|
|
|
|
if "##" in word: |
|
|
|
current_word += entity_text |
|
current_label = label |
|
else: |
|
|
|
if current_word: |
|
word_entities.append({"word": current_word, "label": current_label, "entity_text": current_word}) |
|
current_word = "" |
|
current_label = None |
|
|
|
|
|
word_entities.append({"word": word, "label": label, "entity_text": entity_text}) |
|
|
|
|
|
if current_word: |
|
word_entities.append({"word": current_word, "label": current_label, "entity_text": current_word}) |
|
|
|
|
|
print(f"Word-level Entities: {word_entities}") |
|
|
|
return {"entities": word_entities} |
|
|