Spaces:
Sleeping
Sleeping
File size: 3,383 Bytes
6c8edd7 a5b8665 6c8edd7 a5b8665 6c8edd7 a5b8665 6c8edd7 e111cbc 6c8edd7 a5b8665 e111cbc a5b8665 6c8edd7 a5b8665 6c8edd7 a5b8665 6c8edd7 a5b8665 6c8edd7 a5b8665 6c8edd7 a5b8665 6c8edd7 a5b8665 6c8edd7 e111cbc a5b8665 e111cbc a5b8665 e111cbc a5b8665 e111cbc a5b8665 6c8edd7 a5b8665 6c8edd7 a5b8665 6c8edd7 a5b8665 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import gradio as gr
from transformers import pipeline
from langdetect import detect
import requests
import wikipedia
# Load NER model
ner_pipeline = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", grouped_entities=True)
# Get Wikidata entity info
def get_wikidata_info(entity, lang="en"):
query = f'''
SELECT ?item ?itemLabel ?itemDescription ?coordinate WHERE {{
?item rdfs:label "{entity}"@{lang}.
OPTIONAL {{ ?item wdt:P625 ?coordinate. }}
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }}
}} LIMIT 1
'''
url = "https://query.wikidata.org/sparql"
headers = {"Accept": "application/sparql-results+json"}
try:
response = requests.get(url, params={"query": query}, headers=headers)
data = response.json()
if data['results']['bindings']:
item = data['results']['bindings'][0]
label = item.get('itemLabel', {}).get('value', entity)
description = item.get('itemDescription', {}).get('value', 'No description available.')
coord = item.get('coordinate', {}).get('value', '')
wikidata_link = item.get('item', {}).get('value', '')
return label, description, coord, wikidata_link
except:
pass
return entity, "No description available.", "", ""
# Get fallback Wikipedia link
def get_wikipedia_url(entity, lang="en"):
try:
wikipedia.set_lang(lang)
page = wikipedia.page(entity, auto_suggest=True, redirect=True)
return page.url
except:
return ""
# Main combined function
def ner_wikidata_lookup(text):
try:
detected_lang = detect(text)
except:
detected_lang = "en"
entities = ner_pipeline(text)
seen = set()
result = f"<b>π Detected Language:</b> <code>{detected_lang}</code><br><br>"
for ent in entities:
name = ent['word'].strip()
if name not in seen and name.isalpha():
seen.add(name)
label, desc, coord, wikidata_url = get_wikidata_info(name, lang=detected_lang)
wiki_url = get_wikipedia_url(name, lang=detected_lang)
osm_link = ""
if coord:
try:
lon, lat = coord.replace('Point(', '').replace(')', '').split(' ')
osm_link = f"<a href='https://www.openstreetmap.org/?mlat={lat}&mlon={lon}' target='_blank'>π View on OpenStreetMap</a>"
except:
pass
links = ""
if wikidata_url:
links += f"<a href='{wikidata_url}' target='_blank'>π Wikidata</a> "
if wiki_url:
links += f"<a href='{wiki_url}' target='_blank'>π Wikipedia</a>"
result += f"""
<hr><h3>π {label}</h3>
<p>{desc}</p>
<p>{links}</p>
<p>{osm_link}</p>
"""
return result if seen else "No named entities found."
# Gradio Interface using HTML output
iface = gr.Interface(
fn=ner_wikidata_lookup,
inputs=gr.Textbox(lines=4, placeholder="Type any sentence in any language..."),
outputs=gr.HTML(),
title="π NER with Wikidata + Wikipedia + Maps",
description="Detects named entities, retrieves Wikidata descriptions, adds Wikipedia links and map locations."
)
if __name__ == "__main__":
iface.launch()
|