Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
from langdetect import detect | |
import requests | |
import wikipedia | |
# Load multilingual NER model | |
ner_pipeline = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", grouped_entities=True) | |
# Translation models cache | |
translation_models = {} | |
# Get Wikidata entity info via SPARQL | |
def get_wikidata_info(entity, lang="en"): | |
query = f''' | |
SELECT ?item ?itemLabel ?itemDescription WHERE {{ | |
?item rdfs:label "{entity}"@{lang}. | |
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }} | |
}} LIMIT 1 | |
''' | |
url = "https://query.wikidata.org/sparql" | |
headers = {"Accept": "application/sparql-results+json"} | |
try: | |
response = requests.get(url, params={"query": query}, headers=headers) | |
data = response.json() | |
if data['results']['bindings']: | |
item = data['results']['bindings'][0] | |
label = item.get('itemLabel', {}).get('value', entity) | |
description = item.get('itemDescription', {}).get('value', '') | |
return label, description | |
except: | |
pass | |
return entity, "" | |
# Get Wikipedia description as fallback | |
def get_wikipedia_summary(entity, lang="en"): | |
try: | |
wikipedia.set_lang(lang) | |
summary = wikipedia.summary(entity, sentences=2, auto_suggest=True, redirect=True) | |
return summary | |
except: | |
return "No description available." | |
# Translate text using MarianMT models | |
def translate_text(text, src_lang, tgt_lang): | |
if src_lang == tgt_lang: | |
return text | |
model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}" | |
try: | |
if model_name not in translation_models: | |
translation_models[model_name] = pipeline("translation", model=model_name) | |
translator = translation_models[model_name] | |
return translator(text, max_length=256)[0]['translation_text'] | |
except: | |
return text # Return untranslated if model fails | |
# Combined NER + Wikidata + fallback Wikipedia + translation | |
def multilingual_entity_info(text, output_lang): | |
try: | |
detected_lang = detect(text) | |
except: | |
detected_lang = "en" | |
entities = ner_pipeline(text) | |
seen = set() | |
result = f"**π Detected Language:** `{detected_lang}`\n**π Output Language:** `{output_lang}`\n\n" | |
for ent in entities: | |
name = ent['word'].strip() | |
if name not in seen and name.isalpha(): | |
seen.add(name) | |
label, desc = get_wikidata_info(name, lang=detected_lang) | |
if not desc: | |
desc = get_wikipedia_summary(name, lang=detected_lang) | |
translated_desc = translate_text(desc, detected_lang, output_lang) | |
result += f"\n---\n\n## π {label}\n\n{translated_desc}\n" | |
return result if seen else "No named entities found." | |
# Gradio UI with output language selector | |
iface = gr.Interface( | |
fn=multilingual_entity_info, | |
inputs=[ | |
gr.Textbox(lines=4, placeholder="Type any sentence in any language..."), | |
gr.Dropdown(label="Select Output Language", choices=["en", "hi", "es", "fr", "de", "ta", "zh"], value="en") | |
], | |
outputs=gr.Markdown(), | |
title="π Multilingual NER + Wikidata + Wikipedia", | |
description="Detects entities in any language, fetches descriptions from Wikidata (or Wikipedia), and translates the output into your chosen language." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |