File size: 3,444 Bytes
6c8edd7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import gradio as gr
from transformers import pipeline
from langdetect import detect
import requests
import wikipedia

# Load multilingual NER model
ner_pipeline = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", grouped_entities=True)

# Translation models cache
translation_models = {}

# Get Wikidata entity info via SPARQL
def get_wikidata_info(entity, lang="en"):
    query = f'''
    SELECT ?item ?itemLabel ?itemDescription WHERE {{
      ?item rdfs:label "{entity}"@{lang}.
      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }}
    }} LIMIT 1
    '''
    url = "https://query.wikidata.org/sparql"
    headers = {"Accept": "application/sparql-results+json"}
    try:
        response = requests.get(url, params={"query": query}, headers=headers)
        data = response.json()
        if data['results']['bindings']:
            item = data['results']['bindings'][0]
            label = item.get('itemLabel', {}).get('value', entity)
            description = item.get('itemDescription', {}).get('value', '')
            return label, description
    except:
        pass
    return entity, ""

# Get Wikipedia description as fallback
def get_wikipedia_summary(entity, lang="en"):
    try:
        wikipedia.set_lang(lang)
        summary = wikipedia.summary(entity, sentences=2, auto_suggest=True, redirect=True)
        return summary
    except:
        return "No description available."

# Translate text using MarianMT models
def translate_text(text, src_lang, tgt_lang):
    if src_lang == tgt_lang:
        return text
    model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}"
    try:
        if model_name not in translation_models:
            translation_models[model_name] = pipeline("translation", model=model_name)
        translator = translation_models[model_name]
        return translator(text, max_length=256)[0]['translation_text']
    except:
        return text  # Return untranslated if model fails

# Combined NER + Wikidata + fallback Wikipedia + translation
def multilingual_entity_info(text, output_lang):
    try:
        detected_lang = detect(text)
    except:
        detected_lang = "en"

    entities = ner_pipeline(text)
    seen = set()
    result = f"**🌐 Detected Language:** `{detected_lang}`\n**🌍 Output Language:** `{output_lang}`\n\n"

    for ent in entities:
        name = ent['word'].strip()
        if name not in seen and name.isalpha():
            seen.add(name)
            label, desc = get_wikidata_info(name, lang=detected_lang)
            if not desc:
                desc = get_wikipedia_summary(name, lang=detected_lang)
            translated_desc = translate_text(desc, detected_lang, output_lang)
            result += f"\n---\n\n## πŸ”Ž {label}\n\n{translated_desc}\n"

    return result if seen else "No named entities found."

# Gradio UI with output language selector
iface = gr.Interface(
    fn=multilingual_entity_info,
    inputs=[
        gr.Textbox(lines=4, placeholder="Type any sentence in any language..."),
        gr.Dropdown(label="Select Output Language", choices=["en", "hi", "es", "fr", "de", "ta", "zh"], value="en")
    ],
    outputs=gr.Markdown(),
    title="🌐 Multilingual NER + Wikidata + Wikipedia",
    description="Detects entities in any language, fetches descriptions from Wikidata (or Wikipedia), and translates the output into your chosen language."
)

if __name__ == "__main__":
    iface.launch()