Spaces:

ljcamargo
/

amlonet

Build error

Luis J Camargo commited on May 4

Commit

2d27170

1 Parent(s): 6c0ab65

second commit

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,10 +1,7 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
 from llama_cpp import Llama
-# Initialize the InferenceClient
-client = InferenceClient()
 llm = Llama.from_pretrained(
     repo_id="ljcamargo/amlonet_llama",
     filename="unsloth.Q4_K_M.gguf",
@@ -29,18 +26,21 @@ def respond(
     messages.append({"role": "user", "content": message})
     response = ""
-    # Use the client to get the chat completion
-    for message in client.chat_completion(
-        messages,
         max_tokens=max_tokens,
         stream=True,
         temperature=temperature,
         top_p=top_p,
     ):
-        token = message['choices'][0]['delta']['content']
-        response += token
-        yield response
 demo = gr.ChatInterface(
     respond,

 import gradio as gr
 from llama_cpp import Llama
+# Initialize the local model
 llm = Llama.from_pretrained(
     repo_id="ljcamargo/amlonet_llama",
     filename="unsloth.Q4_K_M.gguf",
     messages.append({"role": "user", "content": message})
     response = ""
+    # Use the local model for generation
+    for chunk in llm.create_chat_completion(
+        messages=messages,
         max_tokens=max_tokens,
         stream=True,
         temperature=temperature,
         top_p=top_p,
     ):
+        if "choices" in chunk and len(chunk["choices"]) > 0:
+            if "delta" in chunk["choices"][0] and "content" in chunk["choices"][0]["delta"]:
+                token = chunk["choices"][0]["delta"]["content"]
+                if token:
+                    response += token
+                    yield response
 demo = gr.ChatInterface(
     respond,