Spaces:

AlphaWice
/

Salam-AI

Sleeping

App Files Files Community

AlphaWice commited on 23 days ago

Commit

b97cadb

verified ·

1 Parent(s): 0615894

Update app.py

Browse files

Files changed (1) hide show

app.py +166 -9

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 import torch
 from transformers import pipeline
 # Global variable to store the model
 pipe = None
@@ -19,17 +20,157 @@ def load_model():
         print("✅ Model loaded successfully!")
     return pipe
 def chat_with_atlas(message, history):
-    """Generate response from Atlas-Chat model"""
     if not message.strip():
-        return "مرحبا! أهلا وسهلا. Please enter a message!"
     try:
         # Load model if not already loaded
         model = load_model()
-        # Prepare the message
-        messages = [{"role": "user", "content": message}]
         # Generate response
         outputs = model(
@@ -42,10 +183,19 @@ def chat_with_atlas(message, history):
         # Extract the response
         response = outputs[0]["generated_text"][-1]["content"].strip()
         return response
     except Exception as e:
-        return f"عذراً، واجهت خطأ: {str(e)}. جرب مرة أخرى!"
 # Create the Gradio interface
 demo = gr.ChatInterface(
@@ -54,18 +204,25 @@ demo = gr.ChatInterface(
     description="""
     **مرحبا بك في أطلس شات!** Welcome to Atlas-Chat! 🇲🇦
-    I'm an AI assistant specialized in **Moroccan Arabic (Darija)** and English.
-    Ask me questions about Morocco, culture, or just have a chat!
     **جرب هذه الأسئلة / Try these questions:**
     """,
     examples=[
         "شكون لي صنعك؟",
-        "اشنو هو الطاجين؟",
         "شنو كيتسمى المنتخب المغربي؟",
         "What is Morocco famous for?",
         "Tell me about Casablanca",
-        "كيفاش نقدر نتعلم الدارجة؟"
     ],
     cache_examples=False
 )

 import gradio as gr
 import torch
 from transformers import pipeline
+import re
 # Global variable to store the model
 pipe = None
         print("✅ Model loaded successfully!")
     return pipe
+def detect_arabizi(text):
+    """
+    Detect if input text is written in Arabizi (Latin script with numbers)
+    Returns True if Arabizi is detected
+    """
+    if not text or len(text.strip()) < 2:
+        return False
+    # Remove spaces and convert to lowercase for analysis
+    clean_text = text.lower().replace(" ", "")
+    # Check for Arabic script - if present, it's NOT Arabizi
+    arabic_pattern = r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]'
+    if re.search(arabic_pattern, text):
+        return False
+    # Arabizi indicators
+    arabizi_numbers = ['2', '3', '7', '9']  # Common Arabic letter substitutions
+    arabizi_patterns = [
+        'wach', 'wash', 'ach', 'achno', 'chno', 'shno',  # What
+        'kif', 'kifash', 'ki', 'kayf',  # How
+        'feen', 'fin', 'fen',  # Where
+        'imta', 'meta', 'waqt',  # When
+        '3la', '3ala', 'ala',  # On/about
+        'hna', '7na', 'ahna',  # We/us
+        'nta', 'nti', 'ntuma',  # You
+        'howa', 'hiya', 'huma',  # He/she/they
+        'ma3', 'maa3', 'maak', 'maaki',  # With
+        'had', 'hadchi', 'hada', 'hadi',  # This
+        'bghit', 'bghiti', 'bgha',  # Want
+        'galt', 'galti', 'gal',  # Said
+        'rah', 'raha', 'rahi',  # Going
+        'kan', 'kanu', 'kana',  # Was/were
+        'ghadi', 'ghad', 'gha',  # Will/going to
+        'daba', 'dak', 'dakchi',  # Now/that
+        'bzf', 'bzzaf', 'bezzaf',  # A lot
+        'chway', 'chwiya', 'shwiya',  # A little
+        'khoya', 'khuya', 'akhi',  # Brother
+        'khti', 'khtiya', 'ukhti',  # Sister
+        'allah', 'llah', 'rabi',  # God
+        'inchallah', 'insha allah',  # God willing
+        'hamdulillah', 'alhamdulillah',  # Praise God
+        'salam', 'salamu aleikum',  # Peace
+        'baraka', 'barakallahu',  # Blessing
+        'yallah', 'yalla', 'hya'  # Come on/let's go
+    ]
+    # Count Latin letters
+    latin_letters = sum(1 for c in clean_text if c.isalpha() and ord(c) < 128)
+    # Count Arabizi number substitutions
+    arabizi_number_count = sum(1 for num in arabizi_numbers if num in clean_text)
+    # Count Arabizi word patterns
+    arabizi_word_count = sum(1 for pattern in arabizi_patterns if pattern in clean_text)
+    # Decision logic
+    total_chars = len(clean_text)
+    # Strong indicators
+    if arabizi_number_count >= 2:  # Multiple number substitutions
+        return True
+    if arabizi_word_count >= 2:  # Multiple Arabizi words
+        return True
+    # Medium indicators
+    if arabizi_number_count >= 1 and latin_letters > total_chars * 0.7:
+        return True
+    if arabizi_word_count >= 1 and latin_letters > total_chars * 0.8:
+        return True
+    # Weak but possible indicators
+    if latin_letters > total_chars * 0.9 and total_chars > 10:
+        # Mostly Latin letters in longer text - could be Arabizi
+        if arabizi_number_count >= 1 or arabizi_word_count >= 1:
+            return True
+    return False
+def determine_response_language(user_input):
+    """
+    Determine what language/script the response should be in
+    Returns: 'arabizi', 'arabic', or 'english'
+    """
+    if detect_arabizi(user_input):
+        return 'arabizi'
+    # Check for Arabic script
+    arabic_pattern = r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]'
+    if re.search(arabic_pattern, user_input):
+        return 'arabic'
+    # Default to English for Latin-only text without Arabizi indicators
+    return 'english'
+def create_system_prompt(response_lang):
+    """Create appropriate system prompt based on desired response language"""
+    if response_lang == 'arabizi':
+        return """You are Atlas-Chat, an AI assistant specialized in Moroccan Arabic (Darija).
+CRITICAL INSTRUCTION: The user has written in Arabizi (Latin script), so you MUST respond ONLY in Arabizi using Latin letters and numbers.
+ARABIZI RULES YOU MUST FOLLOW:
+- Use ONLY Latin letters (a-z) and numbers for Arabic sounds
+- Use these number substitutions: 3=ع, 7=ح, 9=ق, 2=ء, 5=خ, 6=ط, 8=غ
+- Write naturally in Moroccan Darija but with Latin script
+- Examples: "ana" (أنا), "hna" (حنا), "3la" (على), "7na" (حنا), "wach" (واش)
+- Do NOT use any Arabic script characters
+- Do NOT switch to English unless the user specifically asks for translation
+Respond naturally in Arabizi about Moroccan culture, language, and general topics."""
+    elif response_lang == 'arabic':
+        return """You are Atlas-Chat, an AI assistant specialized in Moroccan Arabic (Darija). Respond in Arabic script (Darija) as this is what the user is using. Be helpful and culturally aware about Morocco and its traditions."""
+    else:  # English
+        return """You are Atlas-Chat, an AI assistant specialized in Moroccan Arabic (Darija) but also fluent in English. The user has written in English, so respond in English while being knowledgeable about Moroccan culture and language."""
 def chat_with_atlas(message, history):
+    """Generate response from Atlas-Chat model with language detection"""
     if not message.strip():
+        return "مرحبا! أهلا وسهلا. Please enter a message! / Ahlan wa sahlan!"
     try:
         # Load model if not already loaded
         model = load_model()
+        # Determine response language
+        response_lang = determine_response_language(message)
+        # Create appropriate system prompt
+        system_prompt = create_system_prompt(response_lang)
+        # Prepare messages with system context
+        if response_lang == 'arabizi':
+            # Extra emphasis for Arabizi responses
+            enhanced_message = f"""System: {system_prompt}
+User message (in Arabizi): {message}
+Remember: Respond ONLY in Arabizi (Latin letters + numbers). Do not use Arabic script."""
+            messages = [{"role": "user", "content": enhanced_message}]
+        else:
+            messages = [
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": message}
+            ]
         # Generate response
         outputs = model(
         # Extract the response
         response = outputs[0]["generated_text"][-1]["content"].strip()
+        # Post-process for Arabizi if needed
+        if response_lang == 'arabizi':
+            # Remove any Arabic script that might have leaked through
+            arabic_pattern = r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]'
+            if re.search(arabic_pattern, response):
+                # If Arabic script is detected, provide a fallback Arabizi response
+                response = "ana Atlas-Chat, kay3jebni n7der m3ak! chno bghiti t3ref 3la lmaghrib? (I'm Atlas-Chat, I'd love to chat with you! What do you want to know about Morocco?)"
         return response
     except Exception as e:
+        return f"عذراً، واجهت خطأ: {str(e)}. جرب مرة أخرى! / Sorry, error occurred. Try again!"
 # Create the Gradio interface
 demo = gr.ChatInterface(
     description="""
     **مرحبا بك في أطلس شات!** Welcome to Atlas-Chat! 🇲🇦
+    I'm an AI assistant specialized in **Moroccan Arabic (Darija)** with smart language detection:
+    - **Arabic Script (العربية)** → I respond in Arabic
+    - **Arabizi (3arabi bi 7oruf latin)** → I respond in Arabizi
+    - **English** → I respond in English
     **جرب هذه الأسئلة / Try these questions:**
     """,
     examples=[
         "شكون لي صنعك؟",
+        "shkoun li sna3ek?",
+        "اشنو هو الطاجين؟",
+        "achno howa tajine?",
         "شنو كيتسمى المنتخب المغربي؟",
+        "chno kaytsma lmontakhab lmaghribi?",
         "What is Morocco famous for?",
         "Tell me about Casablanca",
+        "كيفاش نقدر نتعلم الدارجة؟",
+        "kifash n9der nt3elem darija?"
     ],
     cache_examples=False
 )