Spaces:

dindizz
/

sandhisplitter

Sleeping

App Files Files Community

dindizz commited on 18 days ago

Commit

a24da76

verified ·

1 Parent(s): 2a11bbe

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -16

app.py CHANGED Viewed

@@ -1,50 +1,91 @@
 import gradio as gr
-# --- Minimal Sanskrit lexicon (extend with real data) ---
 LEXICON = {
     "राम", "वन", "गच्छति", "गुरु", "इन्द्र", "तत्", "अपि",
-    "धर्म", "क्षेत्र", "कुरु", "क्षेत्रे"
 }
-# --- Basic Reverse Sandhi Rules ---
 REVERSE_SANDHI_RULES = [
-    ("ा", ["अ+अ"]),    # ā → a + a
-    ("े", ["अ+इ", "अ+ई"]),  # e → a+i or a+ī
-    ("ो", ["अ+उ", "अ+ऊ"]),  # o → a+u or a+ū
-    ("ः", ["ः+"]),          # visarga restoration
 ]
 def generate_candidates(word):
     candidates = []
     for i in range(1, len(word)):
         left, right = word[:i], word[i:]
         # Direct split
         if left in LEXICON and right in LEXICON:
             candidates.append((left, right))
-        # Apply reverse sandhi substitutions
         for ch, expansions in REVERSE_SANDHI_RULES:
             if left.endswith(ch):
                 for exp in expansions:
-                    l_base = left[:-1] + exp.split("+")[0]
                     r_base = exp.split("+")[1] + right
                     if l_base in LEXICON and r_base in LEXICON:
                         candidates.append((l_base, r_base))
     # Deduplicate
     candidates = list(set(candidates))
     return candidates or [("No plausible split found", "")]
 def sandhi_splitter(word):
-    candidates = generate_candidates(word.strip())
     formatted = [" + ".join(c) for c in candidates]
     return "\n".join(formatted)
 with gr.Blocks() as demo:
-    gr.Markdown("## Sanskrit Sandhi-Splitter (Prototype)")
-    gr.Markdown("Enter a Sanskrit compound word (Devanagari) to see possible splits.")
-    inp = gr.Textbox(label="Compound Word (e.g. धर्मक्षेत्रे)")
-    out = gr.Textbox(label="Candidate Splits")
-    btn = gr.Button("Split Sandhi")
-    btn.click(fn=sandhi_splitter, inputs=inp, outputs=out)
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
+# --- Load Sanskrit Lexicon ---
+# In a production setup, you might load from a file with thousands of entries.
+# Here, we include an expanded illustrative lexicon; extend with MW/GRETIL for full coverage.
 LEXICON = {
     "राम", "वन", "गच्छति", "गुरु", "इन्द्र", "तत्", "अपि",
+    "धर्म", "क्षेत्र", "कुरु", "क्षेत्रे", "अस्ति", "शिव", "शक्ति",
+    "पाणि", "पतिः", "सीता", "लक्ष्मण", "हनुमान", "विष्णु", "देव", "गज"
+    # Add more entries or load from a full CSV
 }
+# --- Expanded Reverse Sandhi Rules ---
+# Format: (left_end, expansions)
+# expansions: "Left+Right" representing the split form.
 REVERSE_SANDHI_RULES = [
+    # Vowel Sandhi
+    ("ा", ["अ+अ"]),          # ā -> a + a
+    ("े", ["अ+इ", "अ+ई"]),  # e -> a+i or a+ī
+    ("ो", ["अ+उ", "अ+ऊ"]),  # o -> a+u or a+ū
+    # Consonant Sandhi: t/d to tt
+    ("त्त", ["त्+त", "त्+द"]),
+    # Visarga restoration
+    ("ः", ["ः+"]),
+    # Anusvara restoration (ṃ before consonants)
+    ("ं", ["म्+", "न्+"]),
 ]
 def generate_candidates(word):
     candidates = []
     for i in range(1, len(word)):
         left, right = word[:i], word[i:]
         # Direct split
         if left in LEXICON and right in LEXICON:
             candidates.append((left, right))
+        # Rule-based reverse sandhi
         for ch, expansions in REVERSE_SANDHI_RULES:
             if left.endswith(ch):
                 for exp in expansions:
+                    l_base = left[:-len(ch)] + exp.split("+")[0]
                     r_base = exp.split("+")[1] + right
                     if l_base in LEXICON and r_base in LEXICON:
                         candidates.append((l_base, r_base))
     # Deduplicate
     candidates = list(set(candidates))
     return candidates or [("No plausible split found", "")]
 def sandhi_splitter(word):
+    word = word.strip()
+    if not word:
+        return "Please enter a word."
+    candidates = generate_candidates(word)
     formatted = [" + ".join(c) for c in candidates]
     return "\n".join(formatted)
+# --- Gradio App ---
 with gr.Blocks() as demo:
+    with gr.Row():
+        with gr.Column(scale=3):
+            gr.Markdown("## Sanskrit Sandhi-Splitter (Prototype, Extended Rules)")
+            gr.Markdown(
+                "**Instructions:**\n"
+                "1. Enter a **Sanskrit compound word** in Devanagari (e.g. धर्मक्षेत्रे).\n"
+                "2. Click **Split Sandhi** to see possible splits.\n"
+                "3. Candidate splits are based on a small dictionary and reverse sandhi rules.\n\n"
+                "**Contact:** For issues, mail **[email protected]**"
+            )
+            inp = gr.Textbox(label="Compound Word (e.g. धर्मक्षेत्रे)")
+            btn = gr.Button("Split Sandhi")
+            out = gr.Textbox(label="Candidate Splits", lines=5)
+            btn.click(fn=sandhi_splitter, inputs=inp, outputs=out)
+        with gr.Column(scale=1):
+            gr.Markdown(
+                "### How to Use This Tool\n"
+                "- Input any Sanskrit **compound** word.\n"
+                "- Works best with **Devanagari script**.\n"
+                "- Multiple possible splits may appear.\n\n"
+                "### Notes\n"
+                "- Uses **rule-based splitting** + lexicon check.\n"
+                "- Limited lexicon in demo – extend with MW/GRETIL for accuracy.\n\n"
+                "**Support:** [email protected]"
+            )
 if __name__ == "__main__":
     demo.launch()