Pracheethaa commited on
Commit
6c8edd7
Β·
verified Β·
1 Parent(s): 5cc13dd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -0
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ from langdetect import detect
4
+ import requests
5
+ import wikipedia
6
+
7
+ # Load multilingual NER model
8
+ ner_pipeline = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", grouped_entities=True)
9
+
10
+ # Translation models cache
11
+ translation_models = {}
12
+
13
+ # Get Wikidata entity info via SPARQL
14
+ def get_wikidata_info(entity, lang="en"):
15
+ query = f'''
16
+ SELECT ?item ?itemLabel ?itemDescription WHERE {{
17
+ ?item rdfs:label "{entity}"@{lang}.
18
+ SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }}
19
+ }} LIMIT 1
20
+ '''
21
+ url = "https://query.wikidata.org/sparql"
22
+ headers = {"Accept": "application/sparql-results+json"}
23
+ try:
24
+ response = requests.get(url, params={"query": query}, headers=headers)
25
+ data = response.json()
26
+ if data['results']['bindings']:
27
+ item = data['results']['bindings'][0]
28
+ label = item.get('itemLabel', {}).get('value', entity)
29
+ description = item.get('itemDescription', {}).get('value', '')
30
+ return label, description
31
+ except:
32
+ pass
33
+ return entity, ""
34
+
35
+ # Get Wikipedia description as fallback
36
+ def get_wikipedia_summary(entity, lang="en"):
37
+ try:
38
+ wikipedia.set_lang(lang)
39
+ summary = wikipedia.summary(entity, sentences=2, auto_suggest=True, redirect=True)
40
+ return summary
41
+ except:
42
+ return "No description available."
43
+
44
+ # Translate text using MarianMT models
45
+ def translate_text(text, src_lang, tgt_lang):
46
+ if src_lang == tgt_lang:
47
+ return text
48
+ model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}"
49
+ try:
50
+ if model_name not in translation_models:
51
+ translation_models[model_name] = pipeline("translation", model=model_name)
52
+ translator = translation_models[model_name]
53
+ return translator(text, max_length=256)[0]['translation_text']
54
+ except:
55
+ return text # Return untranslated if model fails
56
+
57
+ # Combined NER + Wikidata + fallback Wikipedia + translation
58
+ def multilingual_entity_info(text, output_lang):
59
+ try:
60
+ detected_lang = detect(text)
61
+ except:
62
+ detected_lang = "en"
63
+
64
+ entities = ner_pipeline(text)
65
+ seen = set()
66
+ result = f"**🌐 Detected Language:** `{detected_lang}`\n**🌍 Output Language:** `{output_lang}`\n\n"
67
+
68
+ for ent in entities:
69
+ name = ent['word'].strip()
70
+ if name not in seen and name.isalpha():
71
+ seen.add(name)
72
+ label, desc = get_wikidata_info(name, lang=detected_lang)
73
+ if not desc:
74
+ desc = get_wikipedia_summary(name, lang=detected_lang)
75
+ translated_desc = translate_text(desc, detected_lang, output_lang)
76
+ result += f"\n---\n\n## πŸ”Ž {label}\n\n{translated_desc}\n"
77
+
78
+ return result if seen else "No named entities found."
79
+
80
+ # Gradio UI with output language selector
81
+ iface = gr.Interface(
82
+ fn=multilingual_entity_info,
83
+ inputs=[
84
+ gr.Textbox(lines=4, placeholder="Type any sentence in any language..."),
85
+ gr.Dropdown(label="Select Output Language", choices=["en", "hi", "es", "fr", "de", "ta", "zh"], value="en")
86
+ ],
87
+ outputs=gr.Markdown(),
88
+ title="🌐 Multilingual NER + Wikidata + Wikipedia",
89
+ description="Detects entities in any language, fetches descriptions from Wikidata (or Wikipedia), and translates the output into your chosen language."
90
+ )
91
+
92
+ if __name__ == "__main__":
93
+ iface.launch()