Luis J Camargo
commited on
Commit
·
2d27170
1
Parent(s):
6c0ab65
second commit
Browse files
app.py
CHANGED
|
@@ -1,10 +1,7 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from huggingface_hub import InferenceClient
|
| 3 |
from llama_cpp import Llama
|
| 4 |
|
| 5 |
-
# Initialize the
|
| 6 |
-
client = InferenceClient()
|
| 7 |
-
|
| 8 |
llm = Llama.from_pretrained(
|
| 9 |
repo_id="ljcamargo/amlonet_llama",
|
| 10 |
filename="unsloth.Q4_K_M.gguf",
|
|
@@ -29,18 +26,21 @@ def respond(
|
|
| 29 |
messages.append({"role": "user", "content": message})
|
| 30 |
|
| 31 |
response = ""
|
| 32 |
-
|
| 33 |
-
# Use the
|
| 34 |
-
for
|
| 35 |
-
messages,
|
| 36 |
max_tokens=max_tokens,
|
| 37 |
stream=True,
|
| 38 |
temperature=temperature,
|
| 39 |
top_p=top_p,
|
| 40 |
):
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
demo = gr.ChatInterface(
|
| 46 |
respond,
|
|
|
|
| 1 |
import gradio as gr
|
|
|
|
| 2 |
from llama_cpp import Llama
|
| 3 |
|
| 4 |
+
# Initialize the local model
|
|
|
|
|
|
|
| 5 |
llm = Llama.from_pretrained(
|
| 6 |
repo_id="ljcamargo/amlonet_llama",
|
| 7 |
filename="unsloth.Q4_K_M.gguf",
|
|
|
|
| 26 |
messages.append({"role": "user", "content": message})
|
| 27 |
|
| 28 |
response = ""
|
| 29 |
+
|
| 30 |
+
# Use the local model for generation
|
| 31 |
+
for chunk in llm.create_chat_completion(
|
| 32 |
+
messages=messages,
|
| 33 |
max_tokens=max_tokens,
|
| 34 |
stream=True,
|
| 35 |
temperature=temperature,
|
| 36 |
top_p=top_p,
|
| 37 |
):
|
| 38 |
+
if "choices" in chunk and len(chunk["choices"]) > 0:
|
| 39 |
+
if "delta" in chunk["choices"][0] and "content" in chunk["choices"][0]["delta"]:
|
| 40 |
+
token = chunk["choices"][0]["delta"]["content"]
|
| 41 |
+
if token:
|
| 42 |
+
response += token
|
| 43 |
+
yield response
|
| 44 |
|
| 45 |
demo = gr.ChatInterface(
|
| 46 |
respond,
|