dindizz commited on
Commit
a24da76
·
verified ·
1 Parent(s): 2a11bbe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -16
app.py CHANGED
@@ -1,50 +1,91 @@
1
  import gradio as gr
2
 
3
- # --- Minimal Sanskrit lexicon (extend with real data) ---
 
 
4
  LEXICON = {
5
  "राम", "वन", "गच्छति", "गुरु", "इन्द्र", "तत्", "अपि",
6
- "धर्म", "क्षेत्र", "कुरु", "क्षेत्रे"
 
 
7
  }
8
 
9
- # --- Basic Reverse Sandhi Rules ---
 
 
10
  REVERSE_SANDHI_RULES = [
11
- ("ा", ["अ+अ"]), # ā → a + a
12
- ("", ["अ+इ", "अ+ई"]), # e a+i or a
13
- ("", ["अ+उ", "अ+ऊ"]), # o a+u or a
14
- ("", ["ः+"]), # visarga restoration
 
 
 
 
 
 
15
  ]
16
 
17
  def generate_candidates(word):
18
  candidates = []
19
  for i in range(1, len(word)):
20
  left, right = word[:i], word[i:]
 
21
  # Direct split
22
  if left in LEXICON and right in LEXICON:
23
  candidates.append((left, right))
24
- # Apply reverse sandhi substitutions
 
25
  for ch, expansions in REVERSE_SANDHI_RULES:
26
  if left.endswith(ch):
27
  for exp in expansions:
28
- l_base = left[:-1] + exp.split("+")[0]
29
  r_base = exp.split("+")[1] + right
30
  if l_base in LEXICON and r_base in LEXICON:
31
  candidates.append((l_base, r_base))
 
32
  # Deduplicate
33
  candidates = list(set(candidates))
34
  return candidates or [("No plausible split found", "")]
35
 
36
  def sandhi_splitter(word):
37
- candidates = generate_candidates(word.strip())
 
 
 
 
38
  formatted = [" + ".join(c) for c in candidates]
39
  return "\n".join(formatted)
40
 
 
41
  with gr.Blocks() as demo:
42
- gr.Markdown("## Sanskrit Sandhi-Splitter (Prototype)")
43
- gr.Markdown("Enter a Sanskrit compound word (Devanagari) to see possible splits.")
44
- inp = gr.Textbox(label="Compound Word (e.g. धर्मक्षेत्रे)")
45
- out = gr.Textbox(label="Candidate Splits")
46
- btn = gr.Button("Split Sandhi")
47
- btn.click(fn=sandhi_splitter, inputs=inp, outputs=out)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  if __name__ == "__main__":
50
  demo.launch()
 
1
  import gradio as gr
2
 
3
+ # --- Load Sanskrit Lexicon ---
4
+ # In a production setup, you might load from a file with thousands of entries.
5
+ # Here, we include an expanded illustrative lexicon; extend with MW/GRETIL for full coverage.
6
  LEXICON = {
7
  "राम", "वन", "गच्छति", "गुरु", "इन्द्र", "तत्", "अपि",
8
+ "धर्म", "क्षेत्र", "कुरु", "क्षेत्रे", "अस्ति", "शिव", "शक्ति",
9
+ "पाणि", "पतिः", "सीता", "लक्ष्मण", "हनुमान", "विष्णु", "देव", "गज"
10
+ # Add more entries or load from a full CSV
11
  }
12
 
13
+ # --- Expanded Reverse Sandhi Rules ---
14
+ # Format: (left_end, expansions)
15
+ # expansions: "Left+Right" representing the split form.
16
  REVERSE_SANDHI_RULES = [
17
+ # Vowel Sandhi
18
+ ("", ["अ+अ"]), # ā -> a + a
19
+ ("", ["अ+इ", "अ+ई"]), # e -> a+i or a
20
+ ("", ["अ+उ", "अ+ऊ"]), # o -> a+u or a+ū
21
+ # Consonant Sandhi: t/d to tt
22
+ ("त्त", ["त्+त", "त्+द"]),
23
+ # Visarga restoration
24
+ ("ः", ["ः+"]),
25
+ # Anusvara restoration (ṃ before consonants)
26
+ ("ं", ["म्+", "न्+"]),
27
  ]
28
 
29
  def generate_candidates(word):
30
  candidates = []
31
  for i in range(1, len(word)):
32
  left, right = word[:i], word[i:]
33
+
34
  # Direct split
35
  if left in LEXICON and right in LEXICON:
36
  candidates.append((left, right))
37
+
38
+ # Rule-based reverse sandhi
39
  for ch, expansions in REVERSE_SANDHI_RULES:
40
  if left.endswith(ch):
41
  for exp in expansions:
42
+ l_base = left[:-len(ch)] + exp.split("+")[0]
43
  r_base = exp.split("+")[1] + right
44
  if l_base in LEXICON and r_base in LEXICON:
45
  candidates.append((l_base, r_base))
46
+
47
  # Deduplicate
48
  candidates = list(set(candidates))
49
  return candidates or [("No plausible split found", "")]
50
 
51
  def sandhi_splitter(word):
52
+ word = word.strip()
53
+ if not word:
54
+ return "Please enter a word."
55
+
56
+ candidates = generate_candidates(word)
57
  formatted = [" + ".join(c) for c in candidates]
58
  return "\n".join(formatted)
59
 
60
+ # --- Gradio App ---
61
  with gr.Blocks() as demo:
62
+ with gr.Row():
63
+ with gr.Column(scale=3):
64
+ gr.Markdown("## Sanskrit Sandhi-Splitter (Prototype, Extended Rules)")
65
+ gr.Markdown(
66
+ "**Instructions:**\n"
67
+ "1. Enter a **Sanskrit compound word** in Devanagari (e.g. धर्मक्षेत्रे).\n"
68
+ "2. Click **Split Sandhi** to see possible splits.\n"
69
+ "3. Candidate splits are based on a small dictionary and reverse sandhi rules.\n\n"
70
+ "**Contact:** For issues, mail **[email protected]**"
71
+ )
72
+ inp = gr.Textbox(label="Compound Word (e.g. धर्मक्षेत्रे)")
73
+ btn = gr.Button("Split Sandhi")
74
+ out = gr.Textbox(label="Candidate Splits", lines=5)
75
+
76
+ btn.click(fn=sandhi_splitter, inputs=inp, outputs=out)
77
+
78
+ with gr.Column(scale=1):
79
+ gr.Markdown(
80
+ "### How to Use This Tool\n"
81
+ "- Input any Sanskrit **compound** word.\n"
82
+ "- Works best with **Devanagari script**.\n"
83
+ "- Multiple possible splits may appear.\n\n"
84
+ "### Notes\n"
85
+ "- Uses **rule-based splitting** + lexicon check.\n"
86
+ "- Limited lexicon in demo – extend with MW/GRETIL for accuracy.\n\n"
87
+ "**Support:** [email protected]"
88
+ )
89
 
90
  if __name__ == "__main__":
91
  demo.launch()