import gradio as gr
import torch
from transformers import pipeline, logging

# کاهش مصرف حافظه
logging.set_verbosity_error()
torch.backends.cudnn.benchmark = True

# استفاده از مدل سبک‌تر
model_name = "deepseek-ai/deepseek-coder-1.3b-base"
coder = pipeline(
    "text-generation",
    model=model_name,
    device=0 if torch.cuda.is_available() else -1,
    torch_dtype=torch.float16
)

def respond(message, history):
    try:
        prompt = f"### سوال: {message}\n### پاسخ:"
        response = coder(prompt, max_new_tokens=150)[0]['generated_text']
        return response.split("### پاسخ:")[-1].strip()
    except Exception as e:
        return f"خطا: {str(e)}"

gr.ChatInterface(
    respond,
    title="🧑‍💻 دستیار برنامه‌نویسی",
    description="پرسش‌های برنامه‌نویسی خود را مطرح کنید"
).launch(server_port=7860, share=False)