Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
from langdetect import detect | |
import requests | |
import wikipedia | |
# Load NER model | |
ner_pipeline = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", grouped_entities=True) | |
# Get Wikidata entity info | |
def get_wikidata_info(entity, lang="en"): | |
query = f''' | |
SELECT ?item ?itemLabel ?itemDescription ?coordinate WHERE {{ | |
?item rdfs:label "{entity}"@{lang}. | |
OPTIONAL {{ ?item wdt:P625 ?coordinate. }} | |
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }} | |
}} LIMIT 1 | |
''' | |
url = "https://query.wikidata.org/sparql" | |
headers = {"Accept": "application/sparql-results+json"} | |
try: | |
response = requests.get(url, params={"query": query}, headers=headers) | |
data = response.json() | |
if data['results']['bindings']: | |
item = data['results']['bindings'][0] | |
label = item.get('itemLabel', {}).get('value', entity) | |
description = item.get('itemDescription', {}).get('value', 'No description available.') | |
coord = item.get('coordinate', {}).get('value', '') | |
wikidata_link = item.get('item', {}).get('value', '') | |
return label, description, coord, wikidata_link | |
except: | |
pass | |
return entity, "No description available.", "", "" | |
# Get fallback Wikipedia link | |
def get_wikipedia_url(entity, lang="en"): | |
try: | |
wikipedia.set_lang(lang) | |
page = wikipedia.page(entity, auto_suggest=True, redirect=True) | |
return page.url | |
except: | |
return "" | |
# Main combined function | |
def ner_wikidata_lookup(text): | |
try: | |
detected_lang = detect(text) | |
except: | |
detected_lang = "en" | |
entities = ner_pipeline(text) | |
seen = set() | |
result = f"<b>π Detected Language:</b> <code>{detected_lang}</code><br><br>" | |
for ent in entities: | |
name = ent['word'].strip() | |
if name not in seen and name.isalpha(): | |
seen.add(name) | |
label, desc, coord, wikidata_url = get_wikidata_info(name, lang=detected_lang) | |
wiki_url = get_wikipedia_url(name, lang=detected_lang) | |
osm_link = "" | |
if coord: | |
try: | |
lon, lat = coord.replace('Point(', '').replace(')', '').split(' ') | |
osm_link = f"<a href='https://www.openstreetmap.org/?mlat={lat}&mlon={lon}' target='_blank'>π View on OpenStreetMap</a>" | |
except: | |
pass | |
links = "" | |
if wikidata_url: | |
links += f"<a href='{wikidata_url}' target='_blank'>π Wikidata</a> " | |
if wiki_url: | |
links += f"<a href='{wiki_url}' target='_blank'>π Wikipedia</a>" | |
result += f""" | |
<hr><h3>π {label}</h3> | |
<p>{desc}</p> | |
<p>{links}</p> | |
<p>{osm_link}</p> | |
""" | |
return result if seen else "No named entities found." | |
# Gradio Interface using HTML output | |
iface = gr.Interface( | |
fn=ner_wikidata_lookup, | |
inputs=gr.Textbox(lines=4, placeholder="Type any sentence in any language..."), | |
outputs=gr.HTML(), | |
title="π NER with Wikidata + Wikipedia + Maps", | |
description="Detects named entities, retrieves Wikidata descriptions, adds Wikipedia links and map locations." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |