Spaces:
Sleeping
Sleeping
File size: 4,676 Bytes
6c8edd7 e111cbc 6c8edd7 e111cbc 6c8edd7 e111cbc 6c8edd7 e111cbc 6c8edd7 e111cbc 6c8edd7 e111cbc 6c8edd7 e111cbc 6c8edd7 e111cbc 6c8edd7 e111cbc 6c8edd7 e111cbc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import gradio as gr
from transformers import pipeline
from langdetect import detect
import requests
import wikipedia
# Load multilingual NER model
ner_pipeline = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", grouped_entities=True)
# Translation models cache
translation_models = {}
# Get enriched Wikidata info via SPARQL
def get_wikidata_info(entity, lang="en"):
query = f'''
SELECT ?item ?itemLabel ?itemDescription ?coordinate ?website ?sitelink WHERE {{
?item rdfs:label "{entity}"@{lang}.
OPTIONAL {{ ?item wdt:P625 ?coordinate. }}
OPTIONAL {{ ?item wdt:P856 ?website. }}
OPTIONAL {{ ?sitelink schema:about ?item; schema:isPartOf <https://{lang}.wikipedia.org/>. }}
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }}
}} LIMIT 1
'''
url = "https://query.wikidata.org/sparql"
headers = {"Accept": "application/sparql-results+json"}
try:
response = requests.get(url, params={"query": query}, headers=headers)
data = response.json()
if data['results']['bindings']:
item = data['results']['bindings'][0]
label = item.get('itemLabel', {}).get('value', entity)
description = item.get('itemDescription', {}).get('value', '')
coord = item.get('coordinate', {}).get('value', '')
website = item.get('website', {}).get('value', '')
wiki = item.get('sitelink', {}).get('value', '')
return label, description, coord, website, wiki
except:
pass
return entity, "", "", "", ""
# Get Wikipedia description as fallback
def get_wikipedia_summary(entity, lang="en"):
try:
wikipedia.set_lang(lang)
summary = wikipedia.summary(entity, sentences=2, auto_suggest=True, redirect=True)
return summary
except:
return "No description available."
# Translate text using MarianMT models
def translate_text(text, src_lang, tgt_lang):
if src_lang == tgt_lang:
return text
model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}"
try:
if model_name not in translation_models:
translation_models[model_name] = pipeline("translation", model=model_name)
translator = translation_models[model_name]
return translator(text, max_length=256)[0]['translation_text']
except:
return text # Return untranslated if model fails
# Combined NER + Wikidata + fallback Wikipedia + translation + links + map
def multilingual_entity_info(text, output_lang):
try:
detected_lang = detect(text)
except:
detected_lang = "en"
entities = ner_pipeline(text)
seen = set()
result = f"**π Detected Language:** `{detected_lang}`\n**π Output Language:** `{output_lang}`\n\n"
for ent in entities:
name = ent['word'].strip()
if name not in seen and name.isalpha():
seen.add(name)
label, desc, coord, website, wiki = get_wikidata_info(name, lang=detected_lang)
if not desc:
desc = get_wikipedia_summary(name, lang=detected_lang)
translated_desc = translate_text(desc, detected_lang, output_lang)
links = ""
if wiki:
links += f"π [Wikipedia]({wiki}) "
if website:
links += f"π [Official Site]({website})"
map_embed = ""
if coord:
try:
latlon = coord.replace('Point(', '').replace(')', '').split(' ')
lon, lat = latlon[0], latlon[1]
map_embed = f"\n<iframe width='100%' height='300' frameborder='0' scrolling='no' marginheight='0' marginwidth='0' src='https://www.openstreetmap.org/export/embed.html?bbox={lon}%2C{lat}%2C{lon}%2C{lat}&layer=mapnik&marker={lat}%2C{lon}'></iframe>"
except:
pass
result += f"\n---\n\n## π {label}\n\n{translated_desc}\n\n{links}\n{map_embed}\n"
return result if seen else "No named entities found."
# Gradio UI with output language selector
iface = gr.Interface(
fn=multilingual_entity_info,
inputs=[
gr.Textbox(lines=4, placeholder="Type any sentence in any language..."),
gr.Dropdown(label="Select Output Language", choices=["en", "hi", "es", "fr", "de", "ta", "zh"], value="en")
],
outputs=gr.Markdown(),
title="π Multilingual NER + Wikidata + Wikipedia + Maps",
description="Detects entities in any language, fetches enriched Wikidata info, falls back to Wikipedia, translates the description, and embeds maps + links."
)
if __name__ == "__main__":
iface.launch() |