Spaces:

lpetrl
/

demo-llm

Sleeping

Petro commited on Feb 28, 2024

Commit

48afe02

1 Parent(s): 461052c

First model version

Files changed (2) hide show

main.py CHANGED Viewed

@@ -1,18 +1,26 @@
-from llama_cpp import Llama
 from fastapi import FastAPI
 from pydantic import BaseModel
-model_file = "zephyr-7b-beta.Q4_K_S.gguf"
-llm = Llama(model_path=model_file, n_ctx=512, n_batch=126)
 class validation(BaseModel):
     prompt: str
 app = FastAPI()
 @app.post("/llm_on_cpu")
 async def stream(item: validation):
     system_prompt = 'Below is an instruction that describes a task. Write a response that appropriately completes the request.'
@@ -20,4 +28,4 @@ async def stream(item: validation):
     user, assistant = "<|user|>", "<|assistant|>"
     prompt = f"{system_prompt}{E_INST}\n{user}\n{item.prompt}{E_INST}\n{assistant}\n"
-    return llm("What is an LLM?", max_tokens=100)

+from langchain.llms import CTransformers
 from fastapi import FastAPI
 from pydantic import BaseModel
+file_name = "zephyr-7b-beta.Q4_K_S.gguf"
+config = {
+    "max_new_tokens": 1024,
+    "model_type": "mistral",
+    # "stream": True,
+}
+llm = CTransformers(
+    model=file_name,
+    **config
+)
 class validation(BaseModel):
     prompt: str
+#Fast API
 app = FastAPI()
 @app.post("/llm_on_cpu")
 async def stream(item: validation):
     system_prompt = 'Below is an instruction that describes a task. Write a response that appropriately completes the request.'
     user, assistant = "<|user|>", "<|assistant|>"
     prompt = f"{system_prompt}{E_INST}\n{user}\n{item.prompt}{E_INST}\n{assistant}\n"
+    return llm(prompt)

requirements.txt CHANGED Viewed

@@ -5,4 +5,4 @@ uvicorn
 requests
 python-dotenv
 ctransformers
-llama-cpp-python

 requests
 python-dotenv
 ctransformers
+langchain