Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import pipeline
|
3 |
+
from langdetect import detect
|
4 |
+
import requests
|
5 |
+
import wikipedia
|
6 |
+
|
7 |
+
# Load multilingual NER model
|
8 |
+
ner_pipeline = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", grouped_entities=True)
|
9 |
+
|
10 |
+
# Translation models cache
|
11 |
+
translation_models = {}
|
12 |
+
|
13 |
+
# Get Wikidata entity info via SPARQL
|
14 |
+
def get_wikidata_info(entity, lang="en"):
|
15 |
+
query = f'''
|
16 |
+
SELECT ?item ?itemLabel ?itemDescription WHERE {{
|
17 |
+
?item rdfs:label "{entity}"@{lang}.
|
18 |
+
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }}
|
19 |
+
}} LIMIT 1
|
20 |
+
'''
|
21 |
+
url = "https://query.wikidata.org/sparql"
|
22 |
+
headers = {"Accept": "application/sparql-results+json"}
|
23 |
+
try:
|
24 |
+
response = requests.get(url, params={"query": query}, headers=headers)
|
25 |
+
data = response.json()
|
26 |
+
if data['results']['bindings']:
|
27 |
+
item = data['results']['bindings'][0]
|
28 |
+
label = item.get('itemLabel', {}).get('value', entity)
|
29 |
+
description = item.get('itemDescription', {}).get('value', '')
|
30 |
+
return label, description
|
31 |
+
except:
|
32 |
+
pass
|
33 |
+
return entity, ""
|
34 |
+
|
35 |
+
# Get Wikipedia description as fallback
|
36 |
+
def get_wikipedia_summary(entity, lang="en"):
|
37 |
+
try:
|
38 |
+
wikipedia.set_lang(lang)
|
39 |
+
summary = wikipedia.summary(entity, sentences=2, auto_suggest=True, redirect=True)
|
40 |
+
return summary
|
41 |
+
except:
|
42 |
+
return "No description available."
|
43 |
+
|
44 |
+
# Translate text using MarianMT models
|
45 |
+
def translate_text(text, src_lang, tgt_lang):
|
46 |
+
if src_lang == tgt_lang:
|
47 |
+
return text
|
48 |
+
model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}"
|
49 |
+
try:
|
50 |
+
if model_name not in translation_models:
|
51 |
+
translation_models[model_name] = pipeline("translation", model=model_name)
|
52 |
+
translator = translation_models[model_name]
|
53 |
+
return translator(text, max_length=256)[0]['translation_text']
|
54 |
+
except:
|
55 |
+
return text # Return untranslated if model fails
|
56 |
+
|
57 |
+
# Combined NER + Wikidata + fallback Wikipedia + translation
|
58 |
+
def multilingual_entity_info(text, output_lang):
|
59 |
+
try:
|
60 |
+
detected_lang = detect(text)
|
61 |
+
except:
|
62 |
+
detected_lang = "en"
|
63 |
+
|
64 |
+
entities = ner_pipeline(text)
|
65 |
+
seen = set()
|
66 |
+
result = f"**π Detected Language:** `{detected_lang}`\n**π Output Language:** `{output_lang}`\n\n"
|
67 |
+
|
68 |
+
for ent in entities:
|
69 |
+
name = ent['word'].strip()
|
70 |
+
if name not in seen and name.isalpha():
|
71 |
+
seen.add(name)
|
72 |
+
label, desc = get_wikidata_info(name, lang=detected_lang)
|
73 |
+
if not desc:
|
74 |
+
desc = get_wikipedia_summary(name, lang=detected_lang)
|
75 |
+
translated_desc = translate_text(desc, detected_lang, output_lang)
|
76 |
+
result += f"\n---\n\n## π {label}\n\n{translated_desc}\n"
|
77 |
+
|
78 |
+
return result if seen else "No named entities found."
|
79 |
+
|
80 |
+
# Gradio UI with output language selector
|
81 |
+
iface = gr.Interface(
|
82 |
+
fn=multilingual_entity_info,
|
83 |
+
inputs=[
|
84 |
+
gr.Textbox(lines=4, placeholder="Type any sentence in any language..."),
|
85 |
+
gr.Dropdown(label="Select Output Language", choices=["en", "hi", "es", "fr", "de", "ta", "zh"], value="en")
|
86 |
+
],
|
87 |
+
outputs=gr.Markdown(),
|
88 |
+
title="π Multilingual NER + Wikidata + Wikipedia",
|
89 |
+
description="Detects entities in any language, fetches descriptions from Wikidata (or Wikipedia), and translates the output into your chosen language."
|
90 |
+
)
|
91 |
+
|
92 |
+
if __name__ == "__main__":
|
93 |
+
iface.launch()
|