import gradio as gr # --- Minimal Sanskrit lexicon (extend with real data) --- LEXICON = { "राम", "वन", "गच्छति", "गुरु", "इन्द्र", "तत्", "अपि", "धर्म", "क्षेत्र", "कुरु", "क्षेत्रे" } # --- Basic Reverse Sandhi Rules --- REVERSE_SANDHI_RULES = [ ("ा", ["अ+अ"]), # ā → a + a ("े", ["अ+इ", "अ+ई"]), # e → a+i or a+ī ("ो", ["अ+उ", "अ+ऊ"]), # o → a+u or a+ū ("ः", ["ः+"]), # visarga restoration ] def generate_candidates(word): candidates = [] for i in range(1, len(word)): left, right = word[:i], word[i:] # Direct split if left in LEXICON and right in LEXICON: candidates.append((left, right)) # Apply reverse sandhi substitutions for ch, expansions in REVERSE_SANDHI_RULES: if left.endswith(ch): for exp in expansions: l_base = left[:-1] + exp.split("+")[0] r_base = exp.split("+")[1] + right if l_base in LEXICON and r_base in LEXICON: candidates.append((l_base, r_base)) # Deduplicate candidates = list(set(candidates)) return candidates or [("No plausible split found", "")] def sandhi_splitter(word): candidates = generate_candidates(word.strip()) formatted = [" + ".join(c) for c in candidates] return "\n".join(formatted) with gr.Blocks() as demo: gr.Markdown("## Sanskrit Sandhi-Splitter (Prototype)") gr.Markdown("Enter a Sanskrit compound word (Devanagari) to see possible splits.") inp = gr.Textbox(label="Compound Word (e.g. धर्मक्षेत्रे)") out = gr.Textbox(label="Candidate Splits") btn = gr.Button("Split Sandhi") btn.click(fn=sandhi_splitter, inputs=inp, outputs=out) if __name__ == "__main__": demo.launch()