File size: 3,383 Bytes
6c8edd7
 
 
 
 
 
a5b8665
6c8edd7
 
a5b8665
6c8edd7
 
a5b8665
6c8edd7
e111cbc
6c8edd7
 
 
 
 
 
 
 
 
 
 
a5b8665
e111cbc
a5b8665
 
6c8edd7
 
a5b8665
6c8edd7
a5b8665
 
6c8edd7
 
a5b8665
 
6c8edd7
a5b8665
6c8edd7
a5b8665
 
6c8edd7
 
 
 
 
 
 
a5b8665
6c8edd7
 
 
 
 
e111cbc
a5b8665
 
e111cbc
a5b8665
e111cbc
 
a5b8665
 
e111cbc
 
 
a5b8665
 
 
 
 
 
 
 
 
 
 
 
6c8edd7
 
 
a5b8665
6c8edd7
a5b8665
 
 
 
 
6c8edd7
 
 
a5b8665
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import gradio as gr
from transformers import pipeline
from langdetect import detect
import requests
import wikipedia

# Load NER model
ner_pipeline = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", grouped_entities=True)

# Get Wikidata entity info
def get_wikidata_info(entity, lang="en"):
    query = f'''
    SELECT ?item ?itemLabel ?itemDescription ?coordinate WHERE {{
      ?item rdfs:label "{entity}"@{lang}.
      OPTIONAL {{ ?item wdt:P625 ?coordinate. }}
      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }}
    }} LIMIT 1
    '''
    url = "https://query.wikidata.org/sparql"
    headers = {"Accept": "application/sparql-results+json"}
    try:
        response = requests.get(url, params={"query": query}, headers=headers)
        data = response.json()
        if data['results']['bindings']:
            item = data['results']['bindings'][0]
            label = item.get('itemLabel', {}).get('value', entity)
            description = item.get('itemDescription', {}).get('value', 'No description available.')
            coord = item.get('coordinate', {}).get('value', '')
            wikidata_link = item.get('item', {}).get('value', '')
            return label, description, coord, wikidata_link
    except:
        pass
    return entity, "No description available.", "", ""

# Get fallback Wikipedia link
def get_wikipedia_url(entity, lang="en"):
    try:
        wikipedia.set_lang(lang)
        page = wikipedia.page(entity, auto_suggest=True, redirect=True)
        return page.url
    except:
        return ""

# Main combined function
def ner_wikidata_lookup(text):
    try:
        detected_lang = detect(text)
    except:
        detected_lang = "en"

    entities = ner_pipeline(text)
    seen = set()
    result = f"<b>🌐 Detected Language:</b> <code>{detected_lang}</code><br><br>"

    for ent in entities:
        name = ent['word'].strip()
        if name not in seen and name.isalpha():
            seen.add(name)

            label, desc, coord, wikidata_url = get_wikidata_info(name, lang=detected_lang)
            wiki_url = get_wikipedia_url(name, lang=detected_lang)

            osm_link = ""
            if coord:
                try:
                    lon, lat = coord.replace('Point(', '').replace(')', '').split(' ')
                    osm_link = f"<a href='https://www.openstreetmap.org/?mlat={lat}&mlon={lon}' target='_blank'>πŸ“ View on OpenStreetMap</a>"
                except:
                    pass

            links = ""
            if wikidata_url:
                links += f"<a href='{wikidata_url}' target='_blank'>πŸ”— Wikidata</a>  "
            if wiki_url:
                links += f"<a href='{wiki_url}' target='_blank'>πŸ“˜ Wikipedia</a>"

            result += f"""
            <hr><h3>πŸ”Ž {label}</h3>
            <p>{desc}</p>
            <p>{links}</p>
            <p>{osm_link}</p>
            """

    return result if seen else "No named entities found."

# Gradio Interface using HTML output
iface = gr.Interface(
    fn=ner_wikidata_lookup,
    inputs=gr.Textbox(lines=4, placeholder="Type any sentence in any language..."),
    outputs=gr.HTML(),
    title="🌐 NER with Wikidata + Wikipedia + Maps",
    description="Detects named entities, retrieves Wikidata descriptions, adds Wikipedia links and map locations."
)

if __name__ == "__main__":
    iface.launch()