import gradio as gr from transformers import pipeline from langdetect import detect import requests import wikipedia # Load multilingual NER model ner_pipeline = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", grouped_entities=True) # Translation models cache translation_models = {} # Get enriched Wikidata info via SPARQL def get_wikidata_info(entity, lang="en"): query = f''' SELECT ?item ?itemLabel ?itemDescription ?coordinate ?website ?sitelink WHERE {{ ?item rdfs:label "{entity}"@{lang}. OPTIONAL {{ ?item wdt:P625 ?coordinate. }} OPTIONAL {{ ?item wdt:P856 ?website. }} OPTIONAL {{ ?sitelink schema:about ?item; schema:isPartOf . }} SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }} }} LIMIT 1 ''' url = "https://query.wikidata.org/sparql" headers = {"Accept": "application/sparql-results+json"} try: response = requests.get(url, params={"query": query}, headers=headers) data = response.json() if data['results']['bindings']: item = data['results']['bindings'][0] label = item.get('itemLabel', {}).get('value', entity) description = item.get('itemDescription', {}).get('value', '') coord = item.get('coordinate', {}).get('value', '') website = item.get('website', {}).get('value', '') wiki = item.get('sitelink', {}).get('value', '') return label, description, coord, website, wiki except: pass return entity, "", "", "", "" # Get Wikipedia description as fallback def get_wikipedia_summary(entity, lang="en"): try: wikipedia.set_lang(lang) summary = wikipedia.summary(entity, sentences=2, auto_suggest=True, redirect=True) return summary except: return "No description available." # Translate text using MarianMT models def translate_text(text, src_lang, tgt_lang): if src_lang == tgt_lang: return text model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}" try: if model_name not in translation_models: translation_models[model_name] = pipeline("translation", model=model_name) translator = translation_models[model_name] return translator(text, max_length=256)[0]['translation_text'] except: return text # Return untranslated if model fails # Combined NER + Wikidata + fallback Wikipedia + translation + links + map def multilingual_entity_info(text, output_lang): try: detected_lang = detect(text) except: detected_lang = "en" entities = ner_pipeline(text) seen = set() result = f"**🌐 Detected Language:** `{detected_lang}`\n**🌍 Output Language:** `{output_lang}`\n\n" for ent in entities: name = ent['word'].strip() if name not in seen and name.isalpha(): seen.add(name) label, desc, coord, website, wiki = get_wikidata_info(name, lang=detected_lang) if not desc: desc = get_wikipedia_summary(name, lang=detected_lang) translated_desc = translate_text(desc, detected_lang, output_lang) links = "" if wiki: links += f"🔗 [Wikipedia]({wiki}) " if website: links += f"🌐 [Official Site]({website})" map_embed = "" if coord: try: latlon = coord.replace('Point(', '').replace(')', '').split(' ') lon, lat = latlon[0], latlon[1] map_embed = f"\n" except: pass result += f"\n---\n\n## 🔎 {label}\n\n{translated_desc}\n\n{links}\n{map_embed}\n" return result if seen else "No named entities found." # Gradio UI with output language selector iface = gr.Interface( fn=multilingual_entity_info, inputs=[ gr.Textbox(lines=4, placeholder="Type any sentence in any language..."), gr.Dropdown(label="Select Output Language", choices=["en", "hi", "es", "fr", "de", "ta", "zh"], value="en") ], outputs=gr.Markdown(), title="🌐 Multilingual NER + Wikidata + Wikipedia + Maps", description="Detects entities in any language, fetches enriched Wikidata info, falls back to Wikipedia, translates the description, and embeds maps + links." ) if __name__ == "__main__": iface.launch()