Spaces:

ahmadbeilouni
/

Falcon_testing

Sleeping

App Files Files Community

ahmadbeilouni commited on Aug 5

Commit

849104b

verified ·

1 Parent(s): aee9dcc

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -79

app.py CHANGED Viewed

@@ -1,66 +1,101 @@
-import os
-import gradio as gr
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-from huggingface_hub import snapshot_download
-# ================================
-# CONFIGURATION
-# ================================
-MODEL_NAME_PRIMARY = "tiiuae/Falcon-H1-7B-Instruct"
-MODEL_NAME_FALLBACK = "tiiuae/falcon-7b-instruct"
-MODEL_LOCAL_DIR = "./falcon_model"
-MAX_LENGTH = 120
-TEMPERATURE = 0.3
-REPETITION_PENALTY = 1.8
-print("🚀 Preparing environment...")
-# 1️⃣ Upgrade transformers & accelerate
-os.system("pip install --upgrade pip")
-os.system("pip install --upgrade transformers accelerate safetensors huggingface_hub")
-# 2️⃣ Ensure clean download of model
-try:
-    print(f"⬇️ Downloading model: {MODEL_NAME_PRIMARY}")
-    snapshot_download(MODEL_NAME_PRIMARY, local_dir=MODEL_LOCAL_DIR, force_download=True)
-    model_name = MODEL_LOCAL_DIR
-except Exception as e:
-    print(f"⚠️ Primary model download failed: {e}")
-    print("➡️ Falling back to Falcon 7B Instruct")
-    snapshot_download(MODEL_NAME_FALLBACK, local_dir=MODEL_LOCAL_DIR, force_download=True)
-    model_name = MODEL_LOCAL_DIR
-# 3️⃣ Load tokenizer and model
-try:
-    print("🔄 Loading tokenizer and model...")
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForCausalLM.from_pretrained(
-    MODEL_NAME_PRIMARY,
-    torch_dtype=torch.float16,
-    trust_remote_code=True,
-    device_map="auto",
-    low_cpu_mem_usage=True
-)
-    generator = pipeline(
-        "text-generation",
-        model=model,
-        tokenizer=tokenizer,
-        torch_dtype=torch.float16,
-        device=0 if torch.cuda.is_available() else -1
     )
-    print("✅ Model loaded successfully")
-    model_loaded = True
 except Exception as e:
-    print(f"❌ Model loading failed: {e}")
-    generator = None
-    model_loaded = False
-# ================================
-# Test Questions (Pre-Filled)
-# ================================
 test_questions = [
     "بدي شقة بالمالكي فيها شرفة وغسالة صحون.",
     "هل في شقة دوبلكس بالمزة الفيلات فيها موقفين سيارة؟",
@@ -71,43 +106,45 @@ test_questions = [
     "عندك منزل مستقل بالمهاجرين مع موقد حطب؟"
 ]
-# ================================
-# Falcon Chat Function
-# ================================
-def chat_falcon(user_input):
-    if not model_loaded:
-        return "❌ النموذج غير محمل. تحقق من الإعدادات."
-    prompt = f"أنت مساعد عقارات ذكي. أجب بجملة أو جملتين واضحتين.\nالسؤال: {user_input}\nالجواب:"
-    output = generator(
         prompt,
-        max_new_tokens=MAX_LENGTH,
         do_sample=True,
         temperature=TEMPERATURE,
         repetition_penalty=REPETITION_PENALTY,
-        top_p=0.9
     )[0]["generated_text"]
-    return output.replace(prompt, "").strip()
-# ================================
-# Build Gradio Interface
-# ================================
 with gr.Blocks() as demo:
-    gr.Markdown("## 🏠 Falcon H1 7B Instruct - Damascus Real Estate Test")
-    gr.Markdown("اختبر قدرة النموذج على فهم الأسئلة بالعربية (لهجة سورية أو فصحى)")
     with gr.Row():
         with gr.Column(scale=2):
-            user_input = gr.Textbox(label="اكتب سؤالك هنا", lines=3, placeholder="مثال: بدي شقة بالمزة فيها بلكون")
-            submit_btn = gr.Button("🔎 أرسل")
         with gr.Column(scale=1):
-            suggestions = gr.Dropdown(choices=test_questions, label="🧾 أسئلة جاهزة", value=test_questions[0])
-    output_box = gr.Textbox(label="إجابة النموذج", lines=8)
-    submit_btn.click(fn=chat_falcon, inputs=user_input, outputs=output_box)
-    suggestions.change(fn=chat_falcon, inputs=suggestions, outputs=output_box)
-demo.launch()

+# app.py — Falcon H1 7B Instruct loader + Gradio UI (HF Spaces)
+import os, sys, subprocess
+# ---- 0) Environment hygiene (Spaces shows "libgomp" warning) ----
+os.environ["OMP_NUM_THREADS"] = "1"
+os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"   # faster downloads when available
+# ---- 1) Upgrade critical deps BEFORE importing transformers ----
+def pipi(cmd: str):
+    print(f"[pip] {cmd}")
+    subprocess.run([sys.executable, "-m", "pip"] + cmd.split(), check=True)
+pipi("install --upgrade pip")
+pipi("install --upgrade --no-cache-dir safetensors>=0.4.5 tokenizers>=0.19.1 accelerate>=0.33.0")
+# Falcon-H1 needs the newest transformers; install from GitHub to be safe (per model card)
+pipi("install --no-cache-dir git+https://github.com/huggingface/transformers.git")
+# ---- 2) Imports AFTER upgrades ----
+import torch
+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+from huggingface_hub import snapshot_download
+# ---- 3) Config ----
+MODEL_NAME_PRIMARY   = "tiiuae/Falcon-H1-7B-Instruct"
+MODEL_NAME_FALLBACK  = "tiiuae/falcon-7b-instruct"  # stable fallback
+MODEL_LOCAL_DIR      = "./falcon_local"
+MAX_NEW_TOKENS       = 120
+TEMPERATURE          = 0.30
+TOP_P                = 0.90
+REPETITION_PENALTY   = 1.8
+print("🚀 Preparing model…")
+print("ℹ️  Target primary model:", MODEL_NAME_PRIMARY)
+# ---- 4) Get a clean local snapshot (avoid corrupted cache) ----
+def get_model_snapshot(repo_id: str, local_dir: str) -> str:
+    # allow_patterns keeps it lean; remove if anything’s missing
+    return snapshot_download(
+        repo_id,
+        local_dir=local_dir,
+        local_dir_use_symlinks=False,
+        force_download=True  # ensure fresh download if previous attempt was partial
     )
+model_path = None
+primary_ok = True
+try:
+    print(f"⬇️  Downloading {MODEL_NAME_PRIMARY} …")
+    model_path = get_model_snapshot(MODEL_NAME_PRIMARY, MODEL_LOCAL_DIR)
 except Exception as e:
+    print(f"⚠️  Primary download failed: {e}")
+    primary_ok = False
+if not primary_ok:
+    try:
+        print(f"➡️  Falling back to {MODEL_NAME_FALLBACK} …")
+        model_path = get_model_snapshot(MODEL_NAME_FALLBACK, MODEL_LOCAL_DIR)
+    except Exception as e:
+        print(f"❌ Fallback download failed: {e}")
+        model_path = None
+# ---- 5) Load model/tokenizer ----
+generator = None
+model_loaded = False
+if model_path:
+    try:
+        print("🔄 Loading tokenizer & model…")
+        tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True)
+        # H1 is BF16 on card; FP16 may work but BF16 is safer if hardware supports it.
+        dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
+        model = AutoModelForCausalLM.from_pretrained(
+            model_path,
+            torch_dtype=dtype,
+            device_map="auto",
+            trust_remote_code=True,
+            low_cpu_mem_usage=True,
+        )
+        generator = pipeline(
+            "text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            torch_dtype=dtype,
+            device=0 if torch.cuda.is_available() else -1
+        )
+        model_loaded = True
+        print("✅ Model loaded successfully")
+    except Exception as e:
+        print(f"❌ Model loading failed: {e}")
+        model_loaded = False
+# ---- 6) App logic ----
 test_questions = [
     "بدي شقة بالمالكي فيها شرفة وغسالة صحون.",
     "هل في شقة دوبلكس بالمزة الفيلات فيها موقفين سيارة؟",
     "عندك منزل مستقل بالمهاجرين مع موقد حطب؟"
 ]
+def chat_falcon(user_input: str) -> str:
+    if not model_loaded or generator is None:
+        return "❌ النموذج غير محمّل. الرجاء إعادة المحاولة لاحقاً."
+    # Compact, anchored instruction to reduce looping & keep it on-topic.
+    prompt = (
+        "أنت مساعد عقارات ذكي. أجب بجملة أو جملتين واضحتين فقط.\n"
+        f"السؤال: {user_input}\n"
+        "الجواب:"
+    )
+    out = generator(
         prompt,
+        max_new_tokens=MAX_NEW_TOKENS,
         do_sample=True,
         temperature=TEMPERATURE,
+        top_p=TOP_P,
         repetition_penalty=REPETITION_PENALTY,
+        eos_token_id=generator.tokenizer.eos_token_id
     )[0]["generated_text"]
+    # remove prompt echo if present
+    return out.replace(prompt, "").strip()
+# ---- 7) Gradio UI (no share=True inside Spaces) ----
 with gr.Blocks() as demo:
+    gr.Markdown("## 🏠 Falcon H1 7B Instruct — Damascus Real Estate Test")
+    gr.Markdown("اختبر قدرة النموذج على فهم الأسئلة بالعربية (فصحى ولهجة شامية).")
     with gr.Row():
         with gr.Column(scale=2):
+            tb = gr.Textbox(label="اكتب سؤالك هنا", lines=3, placeholder="مثال: بدي شقة بالمزة فيها بلكون")
+            btn = gr.Button("🔎 أرسل")
         with gr.Column(scale=1):
+            dd = gr.Dropdown(choices=test_questions, value=test_questions[0], label="🧾 أسئلة جاهزة")
+    out = gr.Textbox(label="إجابة النموذج", lines=8)
+    btn.click(chat_falcon, inputs=tb, outputs=out)
+    dd.change(chat_falcon, inputs=dd, outputs=out)
+demo.launch(server_name="0.0.0.0", server_port=7860)