Pracheethaa commited on
Commit
a5b8665
Β·
verified Β·
1 Parent(s): e111cbc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -58
app.py CHANGED
@@ -4,20 +4,15 @@ from langdetect import detect
4
  import requests
5
  import wikipedia
6
 
7
- # Load multilingual NER model
8
  ner_pipeline = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", grouped_entities=True)
9
 
10
- # Translation models cache
11
- translation_models = {}
12
-
13
- # Get enriched Wikidata info via SPARQL
14
  def get_wikidata_info(entity, lang="en"):
15
  query = f'''
16
- SELECT ?item ?itemLabel ?itemDescription ?coordinate ?website ?sitelink WHERE {{
17
  ?item rdfs:label "{entity}"@{lang}.
18
  OPTIONAL {{ ?item wdt:P625 ?coordinate. }}
19
- OPTIONAL {{ ?item wdt:P856 ?website. }}
20
- OPTIONAL {{ ?sitelink schema:about ?item; schema:isPartOf <https://{lang}.wikipedia.org/>. }}
21
  SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }}
22
  }} LIMIT 1
23
  '''
@@ -29,39 +24,25 @@ def get_wikidata_info(entity, lang="en"):
29
  if data['results']['bindings']:
30
  item = data['results']['bindings'][0]
31
  label = item.get('itemLabel', {}).get('value', entity)
32
- description = item.get('itemDescription', {}).get('value', '')
33
  coord = item.get('coordinate', {}).get('value', '')
34
- website = item.get('website', {}).get('value', '')
35
- wiki = item.get('sitelink', {}).get('value', '')
36
- return label, description, coord, website, wiki
37
  except:
38
  pass
39
- return entity, "", "", "", ""
40
 
41
- # Get Wikipedia description as fallback
42
- def get_wikipedia_summary(entity, lang="en"):
43
  try:
44
  wikipedia.set_lang(lang)
45
- summary = wikipedia.summary(entity, sentences=2, auto_suggest=True, redirect=True)
46
- return summary
47
- except:
48
- return "No description available."
49
-
50
- # Translate text using MarianMT models
51
- def translate_text(text, src_lang, tgt_lang):
52
- if src_lang == tgt_lang:
53
- return text
54
- model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}"
55
- try:
56
- if model_name not in translation_models:
57
- translation_models[model_name] = pipeline("translation", model=model_name)
58
- translator = translation_models[model_name]
59
- return translator(text, max_length=256)[0]['translation_text']
60
  except:
61
- return text # Return untranslated if model fails
62
 
63
- # Combined NER + Wikidata + fallback Wikipedia + translation + links + map
64
- def multilingual_entity_info(text, output_lang):
65
  try:
66
  detected_lang = detect(text)
67
  except:
@@ -69,47 +50,47 @@ def multilingual_entity_info(text, output_lang):
69
 
70
  entities = ner_pipeline(text)
71
  seen = set()
72
- result = f"**🌐 Detected Language:** `{detected_lang}`\n**🌍 Output Language:** `{output_lang}`\n\n"
73
 
74
  for ent in entities:
75
  name = ent['word'].strip()
76
  if name not in seen and name.isalpha():
77
  seen.add(name)
78
- label, desc, coord, website, wiki = get_wikidata_info(name, lang=detected_lang)
79
- if not desc:
80
- desc = get_wikipedia_summary(name, lang=detected_lang)
81
- translated_desc = translate_text(desc, detected_lang, output_lang)
82
 
83
- links = ""
84
- if wiki:
85
- links += f"πŸ”— [Wikipedia]({wiki}) "
86
- if website:
87
- links += f"🌐 [Official Site]({website})"
88
 
89
- map_embed = ""
90
  if coord:
91
  try:
92
- latlon = coord.replace('Point(', '').replace(')', '').split(' ')
93
- lon, lat = latlon[0], latlon[1]
94
- map_embed = f"\n<iframe width='100%' height='300' frameborder='0' scrolling='no' marginheight='0' marginwidth='0' src='https://www.openstreetmap.org/export/embed.html?bbox={lon}%2C{lat}%2C{lon}%2C{lat}&layer=mapnik&marker={lat}%2C{lon}'></iframe>"
95
  except:
96
  pass
97
 
98
- result += f"\n---\n\n## πŸ”Ž {label}\n\n{translated_desc}\n\n{links}\n{map_embed}\n"
 
 
 
 
 
 
 
 
 
 
 
99
 
100
  return result if seen else "No named entities found."
101
 
102
- # Gradio UI with output language selector
103
  iface = gr.Interface(
104
- fn=multilingual_entity_info,
105
- inputs=[
106
- gr.Textbox(lines=4, placeholder="Type any sentence in any language..."),
107
- gr.Dropdown(label="Select Output Language", choices=["en", "hi", "es", "fr", "de", "ta", "zh"], value="en")
108
- ],
109
- outputs=gr.Markdown(),
110
- title="🌐 Multilingual NER + Wikidata + Wikipedia + Maps",
111
- description="Detects entities in any language, fetches enriched Wikidata info, falls back to Wikipedia, translates the description, and embeds maps + links."
112
  )
113
 
114
  if __name__ == "__main__":
115
- iface.launch()
 
4
  import requests
5
  import wikipedia
6
 
7
+ # Load NER model
8
  ner_pipeline = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", grouped_entities=True)
9
 
10
+ # Get Wikidata entity info
 
 
 
11
  def get_wikidata_info(entity, lang="en"):
12
  query = f'''
13
+ SELECT ?item ?itemLabel ?itemDescription ?coordinate WHERE {{
14
  ?item rdfs:label "{entity}"@{lang}.
15
  OPTIONAL {{ ?item wdt:P625 ?coordinate. }}
 
 
16
  SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }}
17
  }} LIMIT 1
18
  '''
 
24
  if data['results']['bindings']:
25
  item = data['results']['bindings'][0]
26
  label = item.get('itemLabel', {}).get('value', entity)
27
+ description = item.get('itemDescription', {}).get('value', 'No description available.')
28
  coord = item.get('coordinate', {}).get('value', '')
29
+ wikidata_link = item.get('item', {}).get('value', '')
30
+ return label, description, coord, wikidata_link
 
31
  except:
32
  pass
33
+ return entity, "No description available.", "", ""
34
 
35
+ # Get fallback Wikipedia link
36
+ def get_wikipedia_url(entity, lang="en"):
37
  try:
38
  wikipedia.set_lang(lang)
39
+ page = wikipedia.page(entity, auto_suggest=True, redirect=True)
40
+ return page.url
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  except:
42
+ return ""
43
 
44
+ # Main combined function
45
+ def ner_wikidata_lookup(text):
46
  try:
47
  detected_lang = detect(text)
48
  except:
 
50
 
51
  entities = ner_pipeline(text)
52
  seen = set()
53
+ result = f"<b>🌐 Detected Language:</b> <code>{detected_lang}</code><br><br>"
54
 
55
  for ent in entities:
56
  name = ent['word'].strip()
57
  if name not in seen and name.isalpha():
58
  seen.add(name)
 
 
 
 
59
 
60
+ label, desc, coord, wikidata_url = get_wikidata_info(name, lang=detected_lang)
61
+ wiki_url = get_wikipedia_url(name, lang=detected_lang)
 
 
 
62
 
63
+ osm_link = ""
64
  if coord:
65
  try:
66
+ lon, lat = coord.replace('Point(', '').replace(')', '').split(' ')
67
+ osm_link = f"<a href='https://www.openstreetmap.org/?mlat={lat}&mlon={lon}' target='_blank'>πŸ“ View on OpenStreetMap</a>"
 
68
  except:
69
  pass
70
 
71
+ links = ""
72
+ if wikidata_url:
73
+ links += f"<a href='{wikidata_url}' target='_blank'>πŸ”— Wikidata</a> "
74
+ if wiki_url:
75
+ links += f"<a href='{wiki_url}' target='_blank'>πŸ“˜ Wikipedia</a>"
76
+
77
+ result += f"""
78
+ <hr><h3>πŸ”Ž {label}</h3>
79
+ <p>{desc}</p>
80
+ <p>{links}</p>
81
+ <p>{osm_link}</p>
82
+ """
83
 
84
  return result if seen else "No named entities found."
85
 
86
+ # Gradio Interface using HTML output
87
  iface = gr.Interface(
88
+ fn=ner_wikidata_lookup,
89
+ inputs=gr.Textbox(lines=4, placeholder="Type any sentence in any language..."),
90
+ outputs=gr.HTML(),
91
+ title="🌐 NER with Wikidata + Wikipedia + Maps",
92
+ description="Detects named entities, retrieves Wikidata descriptions, adds Wikipedia links and map locations."
 
 
 
93
  )
94
 
95
  if __name__ == "__main__":
96
+ iface.launch()