import gradio as gr
from transformers import pipeline
from langdetect import detect
import requests
import wikipedia
# Load NER model
ner_pipeline = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", grouped_entities=True)
# Get Wikidata entity info
def get_wikidata_info(entity, lang="en"):
query = f'''
SELECT ?item ?itemLabel ?itemDescription ?coordinate WHERE {{
?item rdfs:label "{entity}"@{lang}.
OPTIONAL {{ ?item wdt:P625 ?coordinate. }}
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }}
}} LIMIT 1
'''
url = "https://query.wikidata.org/sparql"
headers = {"Accept": "application/sparql-results+json"}
try:
response = requests.get(url, params={"query": query}, headers=headers)
data = response.json()
if data['results']['bindings']:
item = data['results']['bindings'][0]
label = item.get('itemLabel', {}).get('value', entity)
description = item.get('itemDescription', {}).get('value', 'No description available.')
coord = item.get('coordinate', {}).get('value', '')
wikidata_link = item.get('item', {}).get('value', '')
return label, description, coord, wikidata_link
except:
pass
return entity, "No description available.", "", ""
# Get fallback Wikipedia link
def get_wikipedia_url(entity, lang="en"):
try:
wikipedia.set_lang(lang)
page = wikipedia.page(entity, auto_suggest=True, redirect=True)
return page.url
except:
return ""
# Main combined function
def ner_wikidata_lookup(text):
try:
detected_lang = detect(text)
except:
detected_lang = "en"
entities = ner_pipeline(text)
seen = set()
result = f"🌐 Detected Language: {detected_lang}
"
for ent in entities:
name = ent['word'].strip()
if name not in seen and name.isalpha():
seen.add(name)
label, desc, coord, wikidata_url = get_wikidata_info(name, lang=detected_lang)
wiki_url = get_wikipedia_url(name, lang=detected_lang)
osm_link = ""
if coord:
try:
lon, lat = coord.replace('Point(', '').replace(')', '').split(' ')
osm_link = f"📍 View on OpenStreetMap"
except:
pass
links = ""
if wikidata_url:
links += f"🔗 Wikidata "
if wiki_url:
links += f"📘 Wikipedia"
result += f"""
{desc}
{links}
{osm_link}
""" return result if seen else "No named entities found." # Gradio Interface using HTML output iface = gr.Interface( fn=ner_wikidata_lookup, inputs=gr.Textbox(lines=4, placeholder="Type any sentence in any language..."), outputs=gr.HTML(), title="🌐 NER with Wikidata + Wikipedia + Maps", description="Detects named entities, retrieves Wikidata descriptions, adds Wikipedia links and map locations." ) if __name__ == "__main__": iface.launch()