wagner-austin commited on
Commit
36e257e
·
1 Parent(s): 4ff8979

Initial Space – Gradio demo for Kazakh/Kyrgyz transliteration

Browse files
Files changed (2) hide show
  1. app.py +113 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ A simple web interface to demonstrate the Turkish transliteration.
3
+ """
4
+ import os
5
+ import gradio as gr
6
+ from turkic_translit.core import to_latin, to_ipa
7
+ import unicodedata as ud
8
+
9
+ def enable_submit(text):
10
+ return bool(text)
11
+
12
+ def transliterate(text, lang, include_arabic, output_format):
13
+ if not text:
14
+ return "", ""
15
+ try:
16
+ if output_format == "Latin":
17
+ result = to_latin(text, lang, include_arabic=include_arabic)
18
+ format_label = "Latin"
19
+ else:
20
+ result = to_ipa(text, lang)
21
+ format_label = "IPA"
22
+ result = ud.normalize("NFC", result)
23
+ stats_md = (f"**Bytes** — Cyrillic : {len(text.encode('utf8'))}, "
24
+ f"{format_label} : {len(result.encode('utf8'))}")
25
+ return result, stats_md
26
+ except Exception as e:
27
+ raise gr.Error(str(e))
28
+
29
+ # Create the Gradio interface
30
+ with gr.Blocks(title="Turkic Transliteration Demo") as demo:
31
+ gr.Markdown("# Turkic Transliteration Demo")
32
+ gr.Markdown("Enter Cyrillic text for Kazakh (kk) or Kyrgyz (ky) and see the Latin transliteration")
33
+
34
+ with gr.Row():
35
+ with gr.Column():
36
+ input_text = gr.Textbox(
37
+ label="Input Text (Cyrillic)",
38
+ placeholder="Enter Kazakh or Kyrgyz text in Cyrillic script...",
39
+ lines=5
40
+ )
41
+ lang = gr.Radio(
42
+ ["kk", "ky"],
43
+ label="Language",
44
+ info="kk = Kazakh, ky = Kyrgyz",
45
+ value="kk"
46
+ )
47
+ output_format = gr.Radio(
48
+ ["Latin", "IPA"],
49
+ label="Output Format",
50
+ info="Latin = Standard Latin alphabet, IPA = International Phonetic Alphabet",
51
+ value="Latin"
52
+ )
53
+ include_arabic = gr.Checkbox(False, label="Also transliterate Arabic script (Latin mode only)")
54
+ submit_btn = gr.Button("Transliterate", variant="primary", interactive=False)
55
+ input_text.change(
56
+ fn=enable_submit,
57
+ inputs=input_text,
58
+ outputs=submit_btn
59
+ )
60
+
61
+ with gr.Column():
62
+ output_text = gr.Textbox(
63
+ label="Transliteration Output",
64
+ lines=5,
65
+ interactive=False
66
+ )
67
+ stats = gr.Markdown(value="")
68
+
69
+ # Example inputs
70
+ examples = [
71
+ ["Қазақ тілі - Түркі тілдерінің бірі.", "kk", "Latin"],
72
+ ["Қазақ тілі - Түркі тілдерінің бірі.", "kk", "IPA"],
73
+ ["Кыргыз тили - Түрк тилдеринин бири.", "ky", "Latin"],
74
+ ["Кыргыз тили - Түрк тилдеринин бири.", "ky", "IPA"]
75
+ ]
76
+ gr.Examples(examples, [input_text, lang, output_format])
77
+
78
+ # Set up the event
79
+ submit_btn.click(
80
+ fn=transliterate,
81
+ inputs=[input_text, lang, include_arabic, output_format],
82
+ outputs=[output_text, stats]
83
+ )
84
+
85
+ # Also update on input change for real-time feedback
86
+ input_text.change(
87
+ fn=transliterate,
88
+ inputs=[input_text, lang, include_arabic, output_format],
89
+ outputs=[output_text, stats]
90
+ )
91
+
92
+ lang.change(
93
+ fn=transliterate,
94
+ inputs=[input_text, lang, include_arabic, output_format],
95
+ outputs=[output_text, stats]
96
+ )
97
+
98
+ include_arabic.change(
99
+ fn=transliterate,
100
+ inputs=[input_text, lang, include_arabic, output_format],
101
+ outputs=[output_text, stats]
102
+ )
103
+
104
+ output_format.change(
105
+ fn=transliterate,
106
+ inputs=[input_text, lang, include_arabic, output_format],
107
+ outputs=[output_text, stats]
108
+ )
109
+
110
+ # Launch the app
111
+ if __name__ == "__main__":
112
+ demo.queue(concurrency_count=int(os.getenv("CONCURRENCY", "4"))).launch()
113
+
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ turkic_transliterate==0.1.0 # will pull PyICU, panphon, etc.
2
+ gradio>=4.30
3
+ sentencepiece>=0.2
4
+ rapidfuzz>=3.5