File size: 4,676 Bytes
6c8edd7
 
 
 
 
 
 
 
 
 
 
 
e111cbc
6c8edd7
 
e111cbc
6c8edd7
e111cbc
 
 
6c8edd7
 
 
 
 
 
 
 
 
 
 
 
e111cbc
 
 
 
6c8edd7
 
e111cbc
6c8edd7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e111cbc
6c8edd7
 
 
 
 
 
 
 
 
 
 
 
 
 
e111cbc
6c8edd7
 
 
e111cbc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c8edd7
 
 
 
 
 
 
 
 
 
 
e111cbc
 
6c8edd7
 
 
e111cbc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import gradio as gr
from transformers import pipeline
from langdetect import detect
import requests
import wikipedia

# Load multilingual NER model
ner_pipeline = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", grouped_entities=True)

# Translation models cache
translation_models = {}

# Get enriched Wikidata info via SPARQL
def get_wikidata_info(entity, lang="en"):
    query = f'''
    SELECT ?item ?itemLabel ?itemDescription ?coordinate ?website ?sitelink WHERE {{
      ?item rdfs:label "{entity}"@{lang}.
      OPTIONAL {{ ?item wdt:P625 ?coordinate. }}
      OPTIONAL {{ ?item wdt:P856 ?website. }}
      OPTIONAL {{ ?sitelink schema:about ?item; schema:isPartOf <https://{lang}.wikipedia.org/>. }}
      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }}
    }} LIMIT 1
    '''
    url = "https://query.wikidata.org/sparql"
    headers = {"Accept": "application/sparql-results+json"}
    try:
        response = requests.get(url, params={"query": query}, headers=headers)
        data = response.json()
        if data['results']['bindings']:
            item = data['results']['bindings'][0]
            label = item.get('itemLabel', {}).get('value', entity)
            description = item.get('itemDescription', {}).get('value', '')
            coord = item.get('coordinate', {}).get('value', '')
            website = item.get('website', {}).get('value', '')
            wiki = item.get('sitelink', {}).get('value', '')
            return label, description, coord, website, wiki
    except:
        pass
    return entity, "", "", "", ""

# Get Wikipedia description as fallback
def get_wikipedia_summary(entity, lang="en"):
    try:
        wikipedia.set_lang(lang)
        summary = wikipedia.summary(entity, sentences=2, auto_suggest=True, redirect=True)
        return summary
    except:
        return "No description available."

# Translate text using MarianMT models
def translate_text(text, src_lang, tgt_lang):
    if src_lang == tgt_lang:
        return text
    model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}"
    try:
        if model_name not in translation_models:
            translation_models[model_name] = pipeline("translation", model=model_name)
        translator = translation_models[model_name]
        return translator(text, max_length=256)[0]['translation_text']
    except:
        return text  # Return untranslated if model fails

# Combined NER + Wikidata + fallback Wikipedia + translation + links + map
def multilingual_entity_info(text, output_lang):
    try:
        detected_lang = detect(text)
    except:
        detected_lang = "en"

    entities = ner_pipeline(text)
    seen = set()
    result = f"**🌐 Detected Language:** `{detected_lang}`\n**🌍 Output Language:** `{output_lang}`\n\n"

    for ent in entities:
        name = ent['word'].strip()
        if name not in seen and name.isalpha():
            seen.add(name)
            label, desc, coord, website, wiki = get_wikidata_info(name, lang=detected_lang)
            if not desc:
                desc = get_wikipedia_summary(name, lang=detected_lang)
            translated_desc = translate_text(desc, detected_lang, output_lang)

            links = ""
            if wiki:
                links += f"πŸ”— [Wikipedia]({wiki})  "
            if website:
                links += f"🌐 [Official Site]({website})"

            map_embed = ""
            if coord:
                try:
                    latlon = coord.replace('Point(', '').replace(')', '').split(' ')
                    lon, lat = latlon[0], latlon[1]
                    map_embed = f"\n<iframe width='100%' height='300' frameborder='0' scrolling='no' marginheight='0' marginwidth='0' src='https://www.openstreetmap.org/export/embed.html?bbox={lon}%2C{lat}%2C{lon}%2C{lat}&layer=mapnik&marker={lat}%2C{lon}'></iframe>"
                except:
                    pass

            result += f"\n---\n\n## πŸ”Ž {label}\n\n{translated_desc}\n\n{links}\n{map_embed}\n"

    return result if seen else "No named entities found."

# Gradio UI with output language selector
iface = gr.Interface(
    fn=multilingual_entity_info,
    inputs=[
        gr.Textbox(lines=4, placeholder="Type any sentence in any language..."),
        gr.Dropdown(label="Select Output Language", choices=["en", "hi", "es", "fr", "de", "ta", "zh"], value="en")
    ],
    outputs=gr.Markdown(),
    title="🌐 Multilingual NER + Wikidata + Wikipedia + Maps",
    description="Detects entities in any language, fetches enriched Wikidata info, falls back to Wikipedia, translates the description, and embeds maps + links."
)

if __name__ == "__main__":
    iface.launch()