Spaces:

YOUSEF2434
/

qwen

Sleeping

YOUSEF2434 commited on May 3

Commit

cda8406

verified ·

1 Parent(s): 8d0df65

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -16,12 +16,12 @@ if not os.path.exists(MODEL_PATH):
                 f.write(chunk)
     print("Model downloaded.")
-# Load the model
 llm = Llama(
     model_path=MODEL_PATH,
-    n_ctx=8192,
-    n_threads=4,
-    n_gpu_layers=20,  # Adjust for HF GPU environment
     chat_format="chatml"
 )
@@ -40,4 +40,4 @@ def chat_interface(message, history):
     history.append((message, reply))
     return reply, history
-gr.ChatInterface(fn=chat_interface, title="Qwen3-4B Chat").launch()

                 f.write(chunk)
     print("Model downloaded.")
+# Load the model with adjustments for CPU
 llm = Llama(
     model_path=MODEL_PATH,
+    n_ctx=4096,  # Reduced context window size
+    n_threads=2,  # Reduced threads for CPU use
+    n_gpu_layers=0,  # Set to 0 since we're using CPU
     chat_format="chatml"
 )
     history.append((message, reply))
     return reply, history
+gr.ChatInterface(fn=chat_interface, title="Ministral 3B Chat").launch()