Spaces:

open-llm-leaderboard
/

open_llm_leaderboard

Running on CPU Upgrade

App Files Files Community

1146

Create app.py

#1148

by iamtheabdullah - opened 17 days ago

base: refs/heads/main

←

from: refs/pr/1148

Discussion Files changed

+65

-0

Files changed (1) hide show

app.py +65 -0

app.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import os
+import gradio as gr
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+# --- Choose the GGUF you want from HF and set here (exact names) ---
+REPO_ID    = os.getenv("REPO_ID", "TheBloke/airoboros-mistral-7B-GGUF")
+MODEL_FILE = os.getenv("MODEL_FILE", "airoboros-mistral-7b.Q4_K_M.gguf")
+SYSTEM_PROMPT  = os.getenv("SYSTEM_PROMPT", "You are an uncensored assistant. Answer directly.")
+APP_PASSWORD   = os.getenv("APP_PASSWORD", "")  # set in Settings → Variables if you want a password
+CTX            = int(os.getenv("CTX", "4096"))
+THREADS        = int(os.getenv("THREADS", "4"))
+N_GPU_LAYERS   = int(os.getenv("N_GPU_LAYERS", "0"))  # 0 on free CPU Space
+# Download model (cached by HF infra)
+model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILE, local_dir="models")
+# Load model
+llm = Llama(
+    model_path=model_path,
+    n_ctx=CTX,
+    n_threads=THREADS,
+    n_gpu_layers=N_GPU_LAYERS,
+    verbose=False
+)
+def chat_fn(history, user_msg):
+    messages = [{"role":"system","content":SYSTEM_PROMPT}]
+    for role, content in history:
+        messages.append({"role": "user" if role=="user" else "assistant", "content": content})
+    messages.append({"role":"user","content":user_msg})
+    out = llm.create_chat_completion(
+        messages=messages,
+        temperature=0.9, top_p=0.92, repeat_penalty=1.05, max_tokens=1024
+    )
+    reply = out["choices"][0]["message"]["content"]
+    history = history + [("user", user_msg), ("assistant", reply)]
+    return history, ""
+with gr.Blocks(title="Airoboros Mistral 7B (Uncensored)") as demo:
+    gr.Markdown("## Airoboros Mistral 7B (Uncensored)\nFree CPU Space is slow. Add a GPU in **Settings → Hardware** for speed.")
+    # Simple password gate (optional)
+    with gr.Row():
+        user = gr.Textbox(label="User (any)")
+        pwd  = gr.Textbox(label="Password", type="password")
+        enter = gr.Button("Enter")
+    gate_info = gr.Markdown(visible=False, value="Access granted. Start chatting below.")
+    chat = gr.Chatbot(height=460, visible=False)
+    msg  = gr.Textbox(label="Message", visible=False, lines=3, placeholder="Ask anything…")
+    send = gr.Button("Send", visible=False)
+    state = gr.State([])
+    def allow(u, p):
+        if not APP_PASSWORD or p == APP_PASSWORD:
+            return gr.update(visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
+        return gr.update(value="Wrong password. Try again."), gr.update(), gr.update(), gr.update(), gr.update()
+    enter.click(allow, [user, pwd], [gate_info, user, chat, msg, send])
+    send.click(chat_fn, [state, msg], [state, msg]).then(lambda s: s, state, chat)
+demo.queue().launch()