File size: 5,194 Bytes
b9f3278
 
 
 
395aceb
b9f3278
 
 
 
 
 
 
 
395aceb
 
b9f3278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
395aceb
b9f3278
 
 
 
 
 
 
8d1d3f4
b9f3278
 
 
 
 
395aceb
b9f3278
 
 
 
 
 
 
8d1d3f4
b9f3278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d1d3f4
b9f3278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import gradio as gr
import torch
import os
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import spaces

# Load Hugging Face token from the environment variable
HF_TOKEN = os.getenv("HF_TOKEN")
if HF_TOKEN is None:
    raise ValueError("HF_TOKEN environment variable is not set. Please set it before running the script.")

# Check for GPU support and configure appropriately
device = "cuda" if torch.cuda.is_available() else "cpu"
zero = torch.Tensor([0]).to(device)
print(f"Device being used: {zero.device}")

# Model configurations
MSA_TO_SYRIAN_MODEL = "Omartificial-Intelligence-Space/Shami-MT"
SYRIAN_TO_MSA_MODEL = "Omartificial-Intelligence-Space/SHAMI-MT-2MSA"

# Load models and tokenizers
print("Loading MSA to Syrian model...")
msa_to_syrian_tokenizer = AutoTokenizer.from_pretrained(MSA_TO_SYRIAN_MODEL)
msa_to_syrian_model = AutoModelForSeq2SeqLM.from_pretrained(MSA_TO_SYRIAN_MODEL).to(device)

print("Loading Syrian to MSA model...")
syrian_to_msa_tokenizer = AutoTokenizer.from_pretrained(SYRIAN_TO_MSA_MODEL)
syrian_to_msa_model = AutoModelForSeq2SeqLM.from_pretrained(SYRIAN_TO_MSA_MODEL).to(device)

print("Models loaded successfully!")

@spaces.GPU(duration=120)
def translate_msa_to_syrian(text):
    """Translate from Modern Standard Arabic to Syrian dialect"""
    if not text.strip():
        return ""
    
    try:
        input_ids = msa_to_syrian_tokenizer(text, return_tensors="pt").input_ids.to(device)
        outputs = msa_to_syrian_model.generate(input_ids, max_length=128, num_beams=5, early_stopping=True)
        translated_text = msa_to_syrian_tokenizer.decode(outputs[0], skip_special_tokens=True)
        return translated_text
    except Exception as e:
        return f"Translation error: {str(e)}"

@spaces.GPU(duration=120)
def translate_syrian_to_msa(text):
    """Translate from Syrian dialect to Modern Standard Arabic"""
    if not text.strip():
        return ""
    
    try:
        input_ids = syrian_to_msa_tokenizer(text, return_tensors="pt").input_ids.to(device)
        outputs = syrian_to_msa_model.generate(input_ids, max_length=128, num_beams=5, early_stopping=True)
        translated_text = syrian_to_msa_tokenizer.decode(outputs[0], skip_special_tokens=True)
        return translated_text
    except Exception as e:
        return f"Translation error: {str(e)}"

def bidirectional_translate(text, direction):
    """Handle bidirectional translation based on user selection"""
    if direction == "MSA โ†’ Syrian":
        return translate_msa_to_syrian(text)
    elif direction == "Syrian โ†’ MSA":
        return translate_syrian_to_msa(text)
    else:
        return "Please select a translation direction"

# Create Gradio interface
with gr.Blocks(title="SHAMI-MT: Bidirectional Syria Arabic Dialect MT Framework") as demo:
    
    gr.HTML("""
    <div style="text-align: center; margin-bottom: 2rem;">
        <h1>๐ŸŒ SHAMI-MT: Bidirectional Arabic Translation</h1>
        <p>Translate between Modern Standard Arabic (MSA) and Syrian Dialect</p>
        <p><strong>Built on AraT5v2-base-1024 architecture</strong></p>
    </div>
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            gr.HTML("""
            <div style="background: #f8f9fa; padding: 1rem; border-radius: 8px; margin: 1rem 0;">
                <h3>๐Ÿ“š Model Information</h3>
                <ul>
                    <li><strong>Model Type:</strong> Sequence-to-Sequence Translation</li>
                    <li><strong>Base Model:</strong> UBC-NLP/AraT5v2-base-1024</li>
                    <li><strong>Languages:</strong> Arabic (MSA โ†” Syrian Dialect)</li>
                    <li><strong>Device:</strong> GPU/CPU Auto-detection</li>
                </ul>
            </div>
            """)
    
        with gr.Column(scale=2):
            direction = gr.Dropdown(
                choices=["MSA โ†’ Syrian", "Syrian โ†’ MSA"],
                value="MSA โ†’ Syrian",
                label="Translation Direction"
            )
            
            input_text = gr.Textbox(
                label="Input Text",
                placeholder="Enter Arabic text here...",
                lines=5
            )
            
            translate_btn = gr.Button("๐Ÿš€ Translate", variant="primary")
            
            output_text = gr.Textbox(
                label="Translation",
                lines=5
            )
    
    # Connect the interface
    translate_btn.click(
        fn=bidirectional_translate,
        inputs=[input_text, direction],
        outputs=output_text
    )
    
    # Add example inputs
    gr.Examples(
        examples=[
            ["ุฃู†ุง ู„ุง ุฃุนุฑู ุฅุฐุง ูƒุงู† ุณูŠุชู…ูƒู† ู…ู† ุงู„ุญุถูˆุฑ ุงู„ูŠูˆู… ุฃู… ู„ุง.", "MSA โ†’ Syrian"],
            ["ูƒูŠู ุญุงู„ูƒุŸ", "MSA โ†’ Syrian"],
            ["ู…ุง ุจุนุฑู ุฅุฐุง ุฑุญ ูŠู‚ุฏุฑ ูŠุฌูŠ ุงู„ูŠูˆู… ูˆู„ุง ู„ุฃ.", "Syrian โ†’ MSA"],
            ["ุดู„ูˆู†ูƒุŸ", "Syrian โ†’ MSA"]
        ],
        inputs=[input_text, direction],
        outputs=output_text,
        fn=bidirectional_translate
    )

# Launch the app
if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860, share=True)