Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,20 +4,15 @@ from langdetect import detect
|
|
4 |
import requests
|
5 |
import wikipedia
|
6 |
|
7 |
-
# Load
|
8 |
ner_pipeline = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", grouped_entities=True)
|
9 |
|
10 |
-
#
|
11 |
-
translation_models = {}
|
12 |
-
|
13 |
-
# Get enriched Wikidata info via SPARQL
|
14 |
def get_wikidata_info(entity, lang="en"):
|
15 |
query = f'''
|
16 |
-
SELECT ?item ?itemLabel ?itemDescription ?coordinate
|
17 |
?item rdfs:label "{entity}"@{lang}.
|
18 |
OPTIONAL {{ ?item wdt:P625 ?coordinate. }}
|
19 |
-
OPTIONAL {{ ?item wdt:P856 ?website. }}
|
20 |
-
OPTIONAL {{ ?sitelink schema:about ?item; schema:isPartOf <https://{lang}.wikipedia.org/>. }}
|
21 |
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }}
|
22 |
}} LIMIT 1
|
23 |
'''
|
@@ -29,39 +24,25 @@ def get_wikidata_info(entity, lang="en"):
|
|
29 |
if data['results']['bindings']:
|
30 |
item = data['results']['bindings'][0]
|
31 |
label = item.get('itemLabel', {}).get('value', entity)
|
32 |
-
description = item.get('itemDescription', {}).get('value', '')
|
33 |
coord = item.get('coordinate', {}).get('value', '')
|
34 |
-
|
35 |
-
|
36 |
-
return label, description, coord, website, wiki
|
37 |
except:
|
38 |
pass
|
39 |
-
return entity, "
|
40 |
|
41 |
-
# Get Wikipedia
|
42 |
-
def
|
43 |
try:
|
44 |
wikipedia.set_lang(lang)
|
45 |
-
|
46 |
-
return
|
47 |
-
except:
|
48 |
-
return "No description available."
|
49 |
-
|
50 |
-
# Translate text using MarianMT models
|
51 |
-
def translate_text(text, src_lang, tgt_lang):
|
52 |
-
if src_lang == tgt_lang:
|
53 |
-
return text
|
54 |
-
model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}"
|
55 |
-
try:
|
56 |
-
if model_name not in translation_models:
|
57 |
-
translation_models[model_name] = pipeline("translation", model=model_name)
|
58 |
-
translator = translation_models[model_name]
|
59 |
-
return translator(text, max_length=256)[0]['translation_text']
|
60 |
except:
|
61 |
-
return
|
62 |
|
63 |
-
#
|
64 |
-
def
|
65 |
try:
|
66 |
detected_lang = detect(text)
|
67 |
except:
|
@@ -69,47 +50,47 @@ def multilingual_entity_info(text, output_lang):
|
|
69 |
|
70 |
entities = ner_pipeline(text)
|
71 |
seen = set()
|
72 |
-
result = f"
|
73 |
|
74 |
for ent in entities:
|
75 |
name = ent['word'].strip()
|
76 |
if name not in seen and name.isalpha():
|
77 |
seen.add(name)
|
78 |
-
label, desc, coord, website, wiki = get_wikidata_info(name, lang=detected_lang)
|
79 |
-
if not desc:
|
80 |
-
desc = get_wikipedia_summary(name, lang=detected_lang)
|
81 |
-
translated_desc = translate_text(desc, detected_lang, output_lang)
|
82 |
|
83 |
-
|
84 |
-
|
85 |
-
links += f"π [Wikipedia]({wiki}) "
|
86 |
-
if website:
|
87 |
-
links += f"π [Official Site]({website})"
|
88 |
|
89 |
-
|
90 |
if coord:
|
91 |
try:
|
92 |
-
|
93 |
-
|
94 |
-
map_embed = f"\n<iframe width='100%' height='300' frameborder='0' scrolling='no' marginheight='0' marginwidth='0' src='https://www.openstreetmap.org/export/embed.html?bbox={lon}%2C{lat}%2C{lon}%2C{lat}&layer=mapnik&marker={lat}%2C{lon}'></iframe>"
|
95 |
except:
|
96 |
pass
|
97 |
|
98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
return result if seen else "No named entities found."
|
101 |
|
102 |
-
# Gradio
|
103 |
iface = gr.Interface(
|
104 |
-
fn=
|
105 |
-
inputs=
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
outputs=gr.Markdown(),
|
110 |
-
title="π Multilingual NER + Wikidata + Wikipedia + Maps",
|
111 |
-
description="Detects entities in any language, fetches enriched Wikidata info, falls back to Wikipedia, translates the description, and embeds maps + links."
|
112 |
)
|
113 |
|
114 |
if __name__ == "__main__":
|
115 |
-
iface.launch()
|
|
|
4 |
import requests
|
5 |
import wikipedia
|
6 |
|
7 |
+
# Load NER model
|
8 |
ner_pipeline = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", grouped_entities=True)
|
9 |
|
10 |
+
# Get Wikidata entity info
|
|
|
|
|
|
|
11 |
def get_wikidata_info(entity, lang="en"):
|
12 |
query = f'''
|
13 |
+
SELECT ?item ?itemLabel ?itemDescription ?coordinate WHERE {{
|
14 |
?item rdfs:label "{entity}"@{lang}.
|
15 |
OPTIONAL {{ ?item wdt:P625 ?coordinate. }}
|
|
|
|
|
16 |
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }}
|
17 |
}} LIMIT 1
|
18 |
'''
|
|
|
24 |
if data['results']['bindings']:
|
25 |
item = data['results']['bindings'][0]
|
26 |
label = item.get('itemLabel', {}).get('value', entity)
|
27 |
+
description = item.get('itemDescription', {}).get('value', 'No description available.')
|
28 |
coord = item.get('coordinate', {}).get('value', '')
|
29 |
+
wikidata_link = item.get('item', {}).get('value', '')
|
30 |
+
return label, description, coord, wikidata_link
|
|
|
31 |
except:
|
32 |
pass
|
33 |
+
return entity, "No description available.", "", ""
|
34 |
|
35 |
+
# Get fallback Wikipedia link
|
36 |
+
def get_wikipedia_url(entity, lang="en"):
|
37 |
try:
|
38 |
wikipedia.set_lang(lang)
|
39 |
+
page = wikipedia.page(entity, auto_suggest=True, redirect=True)
|
40 |
+
return page.url
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
except:
|
42 |
+
return ""
|
43 |
|
44 |
+
# Main combined function
|
45 |
+
def ner_wikidata_lookup(text):
|
46 |
try:
|
47 |
detected_lang = detect(text)
|
48 |
except:
|
|
|
50 |
|
51 |
entities = ner_pipeline(text)
|
52 |
seen = set()
|
53 |
+
result = f"<b>π Detected Language:</b> <code>{detected_lang}</code><br><br>"
|
54 |
|
55 |
for ent in entities:
|
56 |
name = ent['word'].strip()
|
57 |
if name not in seen and name.isalpha():
|
58 |
seen.add(name)
|
|
|
|
|
|
|
|
|
59 |
|
60 |
+
label, desc, coord, wikidata_url = get_wikidata_info(name, lang=detected_lang)
|
61 |
+
wiki_url = get_wikipedia_url(name, lang=detected_lang)
|
|
|
|
|
|
|
62 |
|
63 |
+
osm_link = ""
|
64 |
if coord:
|
65 |
try:
|
66 |
+
lon, lat = coord.replace('Point(', '').replace(')', '').split(' ')
|
67 |
+
osm_link = f"<a href='https://www.openstreetmap.org/?mlat={lat}&mlon={lon}' target='_blank'>π View on OpenStreetMap</a>"
|
|
|
68 |
except:
|
69 |
pass
|
70 |
|
71 |
+
links = ""
|
72 |
+
if wikidata_url:
|
73 |
+
links += f"<a href='{wikidata_url}' target='_blank'>π Wikidata</a> "
|
74 |
+
if wiki_url:
|
75 |
+
links += f"<a href='{wiki_url}' target='_blank'>π Wikipedia</a>"
|
76 |
+
|
77 |
+
result += f"""
|
78 |
+
<hr><h3>π {label}</h3>
|
79 |
+
<p>{desc}</p>
|
80 |
+
<p>{links}</p>
|
81 |
+
<p>{osm_link}</p>
|
82 |
+
"""
|
83 |
|
84 |
return result if seen else "No named entities found."
|
85 |
|
86 |
+
# Gradio Interface using HTML output
|
87 |
iface = gr.Interface(
|
88 |
+
fn=ner_wikidata_lookup,
|
89 |
+
inputs=gr.Textbox(lines=4, placeholder="Type any sentence in any language..."),
|
90 |
+
outputs=gr.HTML(),
|
91 |
+
title="π NER with Wikidata + Wikipedia + Maps",
|
92 |
+
description="Detects named entities, retrieves Wikidata descriptions, adds Wikipedia links and map locations."
|
|
|
|
|
|
|
93 |
)
|
94 |
|
95 |
if __name__ == "__main__":
|
96 |
+
iface.launch()
|