Shenuki commited on
Commit
a70a295
·
verified ·
1 Parent(s): e08081f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -102
app.py CHANGED
@@ -1,106 +1,78 @@
 
 
 
1
  import gradio as gr
2
- from transformers import pipeline
3
 
4
- # Model names (keeping it programmatic)
5
- model_names = [
6
- "dslim/bert-base-NER",
7
- "dslim/bert-base-NER-uncased",
8
- "dslim/bert-large-NER",
9
- "dslim/distilbert-NER",
10
- ]
11
-
12
- example_sent = (
13
- "Nim Chimpsky was a chimpanzee at Columbia University named after Noam Chomsky."
14
- )
15
-
16
- # Programmatically build the model info dict
17
- model_info = {
18
- model_name: {
19
- "link": f"https://huggingface.co/{model_name}",
20
- "usage": f"""from transformers import pipeline
21
- ner = pipeline("ner", model="{model_name}", grouped_entities=True)
22
- result = ner("{example_sent}")
23
- print(result)""",
24
- }
25
- for model_name in model_names
26
- }
27
-
28
- # Load models into a dictionary programmatically for the analyze function
29
- models = {
30
- model_name: pipeline("ner", model=model_name, grouped_entities=True)
31
- for model_name in model_names
32
- }
33
-
34
-
35
- # Function to display model info (link and usage code)
36
- def display_model_info(model_name):
37
- info = model_info[model_name]
38
- usage_code = info["usage"]
39
- link_button = f'[Open model page for {model_name} ]({info["link"]})'
40
- return usage_code, link_button
41
-
42
-
43
- # Function to run NER on input text
44
- def analyze_text(text, model_name):
45
- ner = models[model_name]
46
- ner_results = ner(text)
47
- highlighted_text = []
48
- last_idx = 0
49
- for entity in ner_results:
50
- start = entity["start"]
51
- end = entity["end"]
52
- label = entity["entity_group"]
53
- # Add non-entity text
54
- if start > last_idx:
55
- highlighted_text.append((text[last_idx:start], None))
56
- # Add entity text
57
- highlighted_text.append((text[start:end], label))
58
- last_idx = end
59
- # Add any remaining text after the last entity
60
- if last_idx < len(text):
61
- highlighted_text.append((text[last_idx:], None))
62
- return highlighted_text
63
-
64
-
65
- with gr.Blocks() as demo:
66
- gr.Markdown("# Named Entity Recognition (NER) with BERT Models")
67
-
68
- # Dropdown for model selection
69
- model_selector = gr.Dropdown(
70
- choices=list(model_info.keys()),
71
- value=list(model_info.keys())[0],
72
- label="Select Model",
73
- )
74
-
75
- # Textbox for input text
76
- text_input = gr.Textbox(
77
- label="Enter Text",
78
- lines=5,
79
- value=example_sent,
80
- )
81
- analyze_button = gr.Button("Run NER Model")
82
- output = gr.HighlightedText(label="NER Result", combine_adjacent=True)
83
-
84
- # Outputs: usage code, model page link, and analyze button
85
- code_output = gr.Code(label="Use this model", visible=True)
86
- link_output = gr.Markdown(
87
- f"[Open model page for {model_selector} ]({model_selector})"
88
- )
89
- # Button for analyzing the input text
90
- analyze_button.click(
91
- analyze_text, inputs=[text_input, model_selector], outputs=output
92
- )
93
-
94
- # Trigger the code output and model link when model is changed
95
- model_selector.change(
96
- display_model_info, inputs=[model_selector], outputs=[code_output, link_output]
97
- )
98
-
99
- # Call the display_model_info function on load to set initial values
100
- demo.load(
101
- fn=display_model_info,
102
- inputs=[model_selector],
103
- outputs=[code_output, link_output],
104
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
- demo.launch()
 
 
1
+ import spacy
2
+ import requests
3
+ import wikipedia
4
  import gradio as gr
 
5
 
6
+ # 1) Load spaCy small English model (make sure to add en_core_web_sm in requirements.txt)
7
+ nlp = spacy.load("en_core_web_sm")
8
+
9
+ # 2) Helper: Overpass query for POIs
10
+ def fetch_osm(lat, lon, osm_filter, limit=5):
11
+ overpass = """
12
+ [out:json][timeout:25];
13
+ (
14
+ node{filt}(around:1000,{lat},{lon});
15
+ way{filt}(around:1000,{lat},{lon});
16
+ rel{filt}(around:1000,{lat},{lon});
17
+ );
18
+ out center {lim};
19
+ """.format(filt=osm_filter, lat=lat, lon=lon, lim=limit)
20
+ r = requests.post("https://overpass-api.de/api/interpreter", data={"data": overpass})
21
+ elems = r.json().get("elements", [])
22
+ results = []
23
+ for el in elems:
24
+ name = el.get("tags", {}).get("name")
25
+ if name:
26
+ results.append({"name": name, **({"info": el["tags"].get("cuisine")} if "cuisine" in el["tags"] else {})})
27
+ return results
28
+
29
+ # 3) Geocode via Nominatim
30
+ def geocode(place: str):
31
+ r = requests.get(
32
+ "https://nominatim.openstreetmap.org/search",
33
+ params={"q": place, "format": "json", "limit": 1},
34
+ headers={"User-Agent":"iVoiceContext/1.0"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  )
36
+ data = r.json()
37
+ if not data: return None
38
+ return float(data[0]["lat"]), float(data[0]["lon"])
39
+
40
+ # 4) Main context extractor
41
+ def get_context(text):
42
+ doc = nlp(text)
43
+ out = {}
44
+ # gather unique entities of interest
45
+ for ent in {e.text for e in doc.ents if e.label_ in ("GPE","LOC","PERSON","ORG")}:
46
+ label = next(e.label_ for e in doc.ents if e.text == ent)
47
+ if label in ("GPE","LOC"):
48
+ geo = geocode(ent)
49
+ if not geo:
50
+ out[ent] = {"type":"location","error":"could not geocode"}
51
+ else:
52
+ lat, lon = geo
53
+ out[ent] = {
54
+ "type": "location",
55
+ "restaurants": fetch_osm(lat, lon, '["amenity"="restaurant"]'),
56
+ "attractions": fetch_osm(lat, lon, '["tourism"="attraction"]'),
57
+ }
58
+ else: # PERSON or ORG
59
+ try:
60
+ summ = wikipedia.summary(ent, sentences=2)
61
+ except Exception:
62
+ summ = "No summary available"
63
+ out[ent] = {"type":"wiki","summary": summ}
64
+ if not out:
65
+ return {"error":"no named entities found"}
66
+ return out
67
+
68
+ # 5) Gradio interface
69
+ iface = gr.Interface(
70
+ fn=get_context,
71
+ inputs=gr.Textbox(lines=3, placeholder="Enter or paste your translated text…"),
72
+ outputs="json",
73
+ title="iVoice Context-Aware API",
74
+ description="Extracts people, places, orgs from text and returns nearby POIs or Wikipedia summaries."
75
+ )
76
 
77
+ if __name__ == "__main__":
78
+ iface.launch()