ayushsinha commited on
Commit
5f41e26
Β·
verified Β·
1 Parent(s): 3d51e45

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +83 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import DistilBertForTokenClassification, DistilBertTokenizerFast
3
+ import torch
4
+
5
+ # Load Model & Tokenizer
6
+ model_name = "AventIQ-AI/distilbert-base-uncased_token_classification"
7
+ model = DistilBertForTokenClassification.from_pretrained(model_name)
8
+ tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
9
+
10
+ # Define Icon Mapping for Entities
11
+ ICON_MAP = {
12
+ "Corporation": "🏒",
13
+ "Person": "πŸ‘€",
14
+ "Product": "πŸ“±",
15
+ "Location": "πŸ“",
16
+ "Creative-Work": "🎭",
17
+ "Group": "πŸ‘₯"
18
+ }
19
+
20
+ def predict_entities(text):
21
+ """Predict Named Entities using the model."""
22
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
23
+
24
+ with torch.no_grad():
25
+ outputs = model(**inputs)
26
+ predictions = torch.argmax(outputs.logits.float(), dim=2) # Convert logits to float32
27
+
28
+ predicted_labels = [model.config.id2label[t.item()] for t in predictions[0]]
29
+ tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
30
+
31
+ # Process Entities
32
+ entities = []
33
+ current_entity = None
34
+
35
+ for token, label in zip(tokens, predicted_labels):
36
+ if token in [tokenizer.cls_token, tokenizer.sep_token, tokenizer.pad_token]:
37
+ continue
38
+
39
+ if token.startswith("##"):
40
+ if current_entity:
41
+ current_entity["text"] += token[2:]
42
+ continue
43
+
44
+ if label == "O":
45
+ if current_entity:
46
+ entities.append(current_entity)
47
+ current_entity = None
48
+ else:
49
+ if label.startswith("B-"):
50
+ if current_entity:
51
+ entities.append(current_entity)
52
+ current_entity = {"text": token, "type": label[2:]}
53
+ elif label.startswith("I-") and current_entity:
54
+ current_entity["text"] += " " + token
55
+
56
+ if current_entity:
57
+ entities.append(current_entity)
58
+
59
+ return format_output(text, entities)
60
+
61
+ def format_output(text, entities):
62
+ """Format output for Gradio UI."""
63
+ output = f"πŸ“₯ **Input**: {text}\n\nπŸ” **Detected Entities**:\n"
64
+
65
+ if not entities:
66
+ output += "ℹ️ No named entities detected. Try another sentence!\n"
67
+ else:
68
+ for entity in entities:
69
+ icon = ICON_MAP.get(entity["type"], "πŸ”Ή")
70
+ output += f"- {icon} **{entity['text']}** β†’ `{entity['type']}`\n"
71
+
72
+ return output
73
+
74
+ # Create Gradio UI
75
+ gr.Interface(
76
+ fn=predict_entities,
77
+ inputs=gr.Textbox(placeholder="Enter text here...", label="Input Text"),
78
+ outputs=gr.Textbox(label="NER Output"),
79
+ title="πŸ“ Named Entity Recognition (NER)",
80
+ description="πŸ” Enter a sentence and the model will detect entities like **Person, Location, Product, etc.**",
81
+ theme="default",
82
+ allow_flagging="never"
83
+ ).launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ gradio
4
+ sentencepiece