Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -6,14 +6,12 @@ from llama_cpp_agent.providers import LlamaCppPythonProvider
|
|
6 |
from llama_cpp_agent.chat_history import BasicChatHistory
|
7 |
from llama_cpp_agent.chat_history.messages import UserMessage, AssistantMessage
|
8 |
|
9 |
-
# ⬇️ دانلود مدل
|
10 |
hf_hub_download(
|
11 |
repo_id="mradermacher/Arsh-llm-GGUF",
|
12 |
filename="Arsh-llm.Q4_K_M.gguf",
|
13 |
local_dir="./models"
|
14 |
)
|
15 |
|
16 |
-
# 🧠 لود مدل (فقط یکبار)
|
17 |
llm = Llama(
|
18 |
model_path="./models/Arsh-llm.Q4_K_M.gguf",
|
19 |
n_batch=512,
|
@@ -25,14 +23,12 @@ provider = LlamaCppPythonProvider(llm)
|
|
25 |
|
26 |
agent = LlamaCppAgent(
|
27 |
provider,
|
28 |
-
system_prompt="You are Arsh, a helpful assistant.",
|
29 |
predefined_messages_formatter_type=MessagesFormatterType.CHATML,
|
30 |
debug_output=False
|
31 |
)
|
32 |
|
33 |
-
# 💬 تابع پاسخدهنده
|
34 |
def respond(message, chat_history, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty):
|
35 |
-
# تنظیمات
|
36 |
settings = provider.get_provider_default_settings()
|
37 |
settings.temperature = temperature
|
38 |
settings.top_k = top_k
|
@@ -44,7 +40,6 @@ def respond(message, chat_history, system_message, max_tokens, temperature, top_
|
|
44 |
if chat_history is None:
|
45 |
chat_history = []
|
46 |
|
47 |
-
# ✅ استفاده صحیح از BasicChatHistory
|
48 |
messages = BasicChatHistory()
|
49 |
|
50 |
for msg in chat_history:
|
@@ -53,7 +48,6 @@ def respond(message, chat_history, system_message, max_tokens, temperature, top_
|
|
53 |
elif msg["role"] == "assistant":
|
54 |
messages.add_message(AssistantMessage(msg["content"]))
|
55 |
|
56 |
-
# گرفتن استریم پاسخ
|
57 |
stream = agent.get_chat_response(
|
58 |
message,
|
59 |
chat_history=messages,
|
@@ -67,15 +61,14 @@ def respond(message, chat_history, system_message, max_tokens, temperature, top_
|
|
67 |
response += token
|
68 |
yield response
|
69 |
|
70 |
-
# 🎛️ رابط Gradio
|
71 |
with gr.Blocks() as demo:
|
72 |
-
gr.Markdown("# Arsh-LLM
|
73 |
|
74 |
with gr.Row():
|
75 |
with gr.Column():
|
76 |
-
system_msg = gr.Textbox("You are Arsh, a helpful assistant.", label="System Message", interactive=True)
|
77 |
max_tokens = gr.Slider(1, 4096, value=2048, step=1, label="Max Tokens")
|
78 |
-
temperature = gr.Slider(0.1, 4.0, value=0.
|
79 |
top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")
|
80 |
top_k = gr.Slider(0, 100, value=40, step=1, label="Top-k")
|
81 |
repeat_penalty = gr.Slider(0.0, 2.0, value=1.1, step=0.1, label="Repetition Penalty")
|
@@ -98,6 +91,5 @@ with gr.Blocks() as demo:
|
|
98 |
chatbot=chatbot
|
99 |
)
|
100 |
|
101 |
-
# 🚀 اجرای برنامه
|
102 |
if __name__ == "__main__":
|
103 |
demo.launch()
|
|
|
6 |
from llama_cpp_agent.chat_history import BasicChatHistory
|
7 |
from llama_cpp_agent.chat_history.messages import UserMessage, AssistantMessage
|
8 |
|
|
|
9 |
hf_hub_download(
|
10 |
repo_id="mradermacher/Arsh-llm-GGUF",
|
11 |
filename="Arsh-llm.Q4_K_M.gguf",
|
12 |
local_dir="./models"
|
13 |
)
|
14 |
|
|
|
15 |
llm = Llama(
|
16 |
model_path="./models/Arsh-llm.Q4_K_M.gguf",
|
17 |
n_batch=512,
|
|
|
23 |
|
24 |
agent = LlamaCppAgent(
|
25 |
provider,
|
26 |
+
system_prompt="You are Arsh, a helpful assistant by Arshia Afshani. You should answer the user carefully.",
|
27 |
predefined_messages_formatter_type=MessagesFormatterType.CHATML,
|
28 |
debug_output=False
|
29 |
)
|
30 |
|
|
|
31 |
def respond(message, chat_history, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty):
|
|
|
32 |
settings = provider.get_provider_default_settings()
|
33 |
settings.temperature = temperature
|
34 |
settings.top_k = top_k
|
|
|
40 |
if chat_history is None:
|
41 |
chat_history = []
|
42 |
|
|
|
43 |
messages = BasicChatHistory()
|
44 |
|
45 |
for msg in chat_history:
|
|
|
48 |
elif msg["role"] == "assistant":
|
49 |
messages.add_message(AssistantMessage(msg["content"]))
|
50 |
|
|
|
51 |
stream = agent.get_chat_response(
|
52 |
message,
|
53 |
chat_history=messages,
|
|
|
61 |
response += token
|
62 |
yield response
|
63 |
|
|
|
64 |
with gr.Blocks() as demo:
|
65 |
+
gr.Markdown("# Arsh-LLM Demo")
|
66 |
|
67 |
with gr.Row():
|
68 |
with gr.Column():
|
69 |
+
system_msg = gr.Textbox("You are Arsh, a helpful assistant by Arshia Afshani. You should answer the user carefully.", label="System Message", interactive=True)
|
70 |
max_tokens = gr.Slider(1, 4096, value=2048, step=1, label="Max Tokens")
|
71 |
+
temperature = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
|
72 |
top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")
|
73 |
top_k = gr.Slider(0, 100, value=40, step=1, label="Top-k")
|
74 |
repeat_penalty = gr.Slider(0.0, 2.0, value=1.1, step=0.1, label="Repetition Penalty")
|
|
|
91 |
chatbot=chatbot
|
92 |
)
|
93 |
|
|
|
94 |
if __name__ == "__main__":
|
95 |
demo.launch()
|