Spaces:
Sleeping
Sleeping
File size: 1,955 Bytes
2a11bbe f120f79 2a11bbe f120f79 2a11bbe f120f79 394de29 2a11bbe f120f79 2a11bbe f120f79 2a11bbe f120f79 2a11bbe f120f79 2a11bbe f120f79 2a11bbe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import gradio as gr
# --- Minimal Sanskrit lexicon (extend with real data) ---
LEXICON = {
"राम", "वन", "गच्छति", "गुरु", "इन्द्र", "तत्", "अपि",
"धर्म", "क्षेत्र", "कुरु", "क्षेत्रे"
}
# --- Basic Reverse Sandhi Rules ---
REVERSE_SANDHI_RULES = [
("ा", ["अ+अ"]), # ā → a + a
("े", ["अ+इ", "अ+ई"]), # e → a+i or a+ī
("ो", ["अ+उ", "अ+ऊ"]), # o → a+u or a+ū
("ः", ["ः+"]), # visarga restoration
]
def generate_candidates(word):
candidates = []
for i in range(1, len(word)):
left, right = word[:i], word[i:]
# Direct split
if left in LEXICON and right in LEXICON:
candidates.append((left, right))
# Apply reverse sandhi substitutions
for ch, expansions in REVERSE_SANDHI_RULES:
if left.endswith(ch):
for exp in expansions:
l_base = left[:-1] + exp.split("+")[0]
r_base = exp.split("+")[1] + right
if l_base in LEXICON and r_base in LEXICON:
candidates.append((l_base, r_base))
# Deduplicate
candidates = list(set(candidates))
return candidates or [("No plausible split found", "")]
def sandhi_splitter(word):
candidates = generate_candidates(word.strip())
formatted = [" + ".join(c) for c in candidates]
return "\n".join(formatted)
with gr.Blocks() as demo:
gr.Markdown("## Sanskrit Sandhi-Splitter (Prototype)")
gr.Markdown("Enter a Sanskrit compound word (Devanagari) to see possible splits.")
inp = gr.Textbox(label="Compound Word (e.g. धर्मक्षेत्रे)")
out = gr.Textbox(label="Candidate Splits")
btn = gr.Button("Split Sandhi")
btn.click(fn=sandhi_splitter, inputs=inp, outputs=out)
if __name__ == "__main__":
demo.launch()
|