import gradio as gr
from ctransformers import AutoModelForCausalLM  # کتابخانه سبک برای CPU

# تنظیم مدل - نسخه سبک‌شده ۱.۳B با فرمت GGUF (بهینه برای CPU)
model = AutoModelForCausalLM.from_pretrained(
    "TheBloke/deepseek-coder-1.3b-base-GGUF",
    model_file="deepseek-coder-1.3b-base.Q4_K_M.gguf",  # نسخه ۴-bit
    model_type="deepseek",
    gpu_layers=0  # اجرای کامل روی CPU
)

def respond(message, history):
    # تولید پاسخ با تنظیمات بهینه برای CPU
    response = model(
        f"<|system|>You are a coding assistant</s><|user|>{message}</s><|assistant|>",
        max_new_tokens=150,
        temperature=0.2
    )
    return response

gr.ChatInterface(
    respond,
    title="🧑‍💻 دستیار برنامه‌نویسی (CPU)",
    examples=["چطوری در پایتون فایل بخوانم؟", "خطای undefined در جاوااسکریپت"]
).launch()