Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +50 -0
- requirements.txt +1 -0
app.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
# --- Minimal Sanskrit lexicon (extend with real data) ---
|
4 |
+
LEXICON = {
|
5 |
+
"राम", "वन", "गच्छति", "गुरु", "इन्द्र", "तत्", "अपि",
|
6 |
+
"धर्म", "क्षेत्र", "कुरु", "क्षेत्रे"
|
7 |
+
}
|
8 |
+
|
9 |
+
# --- Basic Reverse Sandhi Rules ---
|
10 |
+
REVERSE_SANDHI_RULES = [
|
11 |
+
("ा", ["अ+अ"]), # ā → a + a
|
12 |
+
("े", ["अ+इ", "अ+ई"]), # e → a+i or a+ī
|
13 |
+
("ो", ["अ+उ", "अ+ऊ"]), # o → a+u or a+ū
|
14 |
+
("ः", ["ः+"]), # visarga restoration
|
15 |
+
]
|
16 |
+
|
17 |
+
def generate_candidates(word):
|
18 |
+
candidates = []
|
19 |
+
for i in range(1, len(word)):
|
20 |
+
left, right = word[:i], word[i:]
|
21 |
+
# Direct split
|
22 |
+
if left in LEXICON and right in LEXICON:
|
23 |
+
candidates.append((left, right))
|
24 |
+
# Apply reverse sandhi substitutions
|
25 |
+
for ch, expansions in REVERSE_SANDHI_RULES:
|
26 |
+
if left.endswith(ch):
|
27 |
+
for exp in expansions:
|
28 |
+
l_base = left[:-1] + exp.split("+")[0]
|
29 |
+
r_base = exp.split("+")[1] + right
|
30 |
+
if l_base in LEXICON and r_base in LEXICON:
|
31 |
+
candidates.append((l_base, r_base))
|
32 |
+
# Deduplicate
|
33 |
+
candidates = list(set(candidates))
|
34 |
+
return candidates or [("No plausible split found", "")]
|
35 |
+
|
36 |
+
def sandhi_splitter(word):
|
37 |
+
candidates = generate_candidates(word.strip())
|
38 |
+
formatted = [" + ".join(c) for c in candidates]
|
39 |
+
return "\n".join(formatted)
|
40 |
+
|
41 |
+
with gr.Blocks() as demo:
|
42 |
+
gr.Markdown("## Sanskrit Sandhi-Splitter (Prototype)")
|
43 |
+
gr.Markdown("Enter a Sanskrit compound word (Devanagari) to see possible splits.")
|
44 |
+
inp = gr.Textbox(label="Compound Word (e.g. धर्मक्षेत्रे)")
|
45 |
+
out = gr.Textbox(label="Candidate Splits")
|
46 |
+
btn = gr.Button("Split Sandhi")
|
47 |
+
btn.click(fn=sandhi_splitter, inputs=inp, outputs=out)
|
48 |
+
|
49 |
+
if __name__ == "__main__":
|
50 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
gradio
|