Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,42 +1,43 @@
|
|
1 |
import os
|
2 |
import requests
|
3 |
-
|
4 |
-
from
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
response =
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
|
|
|
1 |
import os
|
2 |
import requests
|
3 |
+
import gradio as gr
|
4 |
+
from llama_cpp import Llama
|
5 |
+
|
6 |
+
MODEL_URL = "https://cas-bridge.xethub.hf.co/xet-bridge-us/680f85f7f3cf2673404f897f/d6cd862a0b513dd35067121f0413e369b37aa9cacde526e31fc016c68ce0d305?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=cas%2F20250503%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250503T191219Z&X-Amz-Expires=3600&X-Amz-Signature=2721d6c78a7db9b4d9b0ed1452ce3017d82b3392a190ae72b3755d873bc6c3df&X-Amz-SignedHeaders=host&X-Xet-Cas-Uid=67e6d6e1648b9f55980fa892&response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27Qwen3-4B-128K-Q4_K_M.gguf%3B+filename%3D%22Qwen3-4B-128K-Q4_K_M.gguf%22%3B&x-id=GetObject&Expires=1746303139&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc0NjMwMzEzOX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2FzLWJyaWRnZS54ZXRodWIuaGYuY28veGV0LWJyaWRnZS11cy82ODBmODVmN2YzY2YyNjczNDA0Zjg5N2YvZDZjZDg2MmEwYjUxM2RkMzUwNjcxMjFmMDQxM2UzNjliMzdhYTljYWNkZTUyNmUzMWZjMDE2YzY4Y2UwZDMwNSoifV19&Signature=lxo1PdbqSDERIivvWyQ88rq6oOx0DF5aefiy1LClNYXtBexSV476eNjAEl0wwE1nf7rW%7EcAIctjqzl%7ElVTEFSCosTEACuRrgqtGjyP94xL0xBFMDv1lqJ6E5UFokq0FTRNNd84Xt3GthKDMYanseX9kGpHBNpCwvlx-BRrNF6cbd2XWcCcmetP-NUrHtmoEHY89LVAFb72EXx7edlsOIMOWGzOYcgHI-IfuM2U4m%7Epquxo429CjrEbUi9xpQIQFVBvWHThIJN2LT5NqXpUpPyXswEJyuQDZ0sDTgBEh5gdHtwAWf0cF0i285VgitHIty8eYLEeYNu7J%7EqEDBu7RrYQ__&Key-Pair-Id=K2L8F4GPSG1IFC" # truncated for clarity
|
7 |
+
MODEL_PATH = "Qwen3-4B-128K-Q4_K_M.gguf"
|
8 |
+
|
9 |
+
# Download model if not already downloaded
|
10 |
+
if not os.path.exists(MODEL_PATH):
|
11 |
+
print("Downloading model...")
|
12 |
+
with requests.get(MODEL_URL, stream=True) as r:
|
13 |
+
r.raise_for_status()
|
14 |
+
with open(MODEL_PATH, 'wb') as f:
|
15 |
+
for chunk in r.iter_content(chunk_size=8192):
|
16 |
+
f.write(chunk)
|
17 |
+
print("Model downloaded.")
|
18 |
+
|
19 |
+
# Load the model
|
20 |
+
llm = Llama(
|
21 |
+
model_path=MODEL_PATH,
|
22 |
+
n_ctx=8192,
|
23 |
+
n_threads=4,
|
24 |
+
n_gpu_layers=20, # Adjust for HF GPU environment
|
25 |
+
chat_format="chatml"
|
26 |
+
)
|
27 |
+
|
28 |
+
def chat_interface(message, history):
|
29 |
+
if history is None:
|
30 |
+
history = []
|
31 |
+
|
32 |
+
chat_prompt = []
|
33 |
+
for user_msg, bot_msg in history:
|
34 |
+
chat_prompt.append({"role": "user", "content": user_msg})
|
35 |
+
chat_prompt.append({"role": "assistant", "content": bot_msg})
|
36 |
+
chat_prompt.append({"role": "user", "content": message})
|
37 |
+
|
38 |
+
response = llm.create_chat_completion(messages=chat_prompt, stream=False)
|
39 |
+
reply = response["choices"][0]["message"]["content"]
|
40 |
+
history.append((message, reply))
|
41 |
+
return reply, history
|
42 |
+
|
43 |
+
gr.ChatInterface(fn=chat_interface, title="Qwen3-4B Chat").launch()
|