Spaces:

moraxgiga
/

llama_3.2

Sleeping

moraxgiga commited on Sep 27, 2024

Commit

39b2182

verified ·

1 Parent(s): 80de85a

Upload 4 files

Files changed (5) hide show

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+Llama-3.2-1B-Instruct-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

+FROM python:3.11
+WORKDIR /app
+RUN mkdir -p /app/hf_cache
+RUN chmod -R 777 /app/hf_cache
+ENV HF_HOME=/app/hf_cache
+COPY ./requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+EXPOSE 7860
+CMD ["python", "server.py"]

Llama-3.2-1B-Instruct-Q4_K_M.gguf ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:f7ede42862ceca07ad1c88a97b67520019c4ac7e5ced250d2e696fa62ab189af
+size 807690688

requirements.txt ADDED Viewed

+litserve
+--extra-index-url https://download.pytorch.org/whl/cpu
+transformers==4.38.2
+accelerate
+bitsandbytes
+optimum
+llama-cpp-python
+uvloop

server.py ADDED Viewed

+import litserve as ls
+from typing import List, Dict, Any
+from llama_cpp import Llama
+from fastapi import Depends, HTTPException
+from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
+class SimpleLitAPI(ls.LitAPI):
+    def setup(self, device):
+        self.llm = Llama(
+        model_path="Llama-3.2-1B-Instruct-Q4_K_M.gguf",
+        verbose=False
+    )
+    def decode_request(self, request):
+        return request
+    def predict(self, input):
+        temperature = input.get("temperature", 1.0)
+        max_tokens = input.get("max_tokens", 100)
+        top_k = input.get("top_k", 50)
+        top_p = input.get("top_p", 0.9)
+        repeat_penalty = input.get("repeat_penalty", 1.0)
+        return self.llm.create_chat_completion(
+            messages=input["messages"],
+            temperature=temperature,
+            max_tokens=max_tokens,
+            top_k=top_k,
+            top_p=top_p,
+            repeat_penalty=repeat_penalty,
+        )
+    def encode_response(self, output):
+        return {"output": output}
+    def authorize(self, auth: HTTPAuthorizationCredentials = Depends(HTTPBearer())):
+        if auth.scheme != "Bearer" or auth.credentials != "1234":
+            raise HTTPException(status_code=401, detail="Bad token")
+if __name__ == "__main__":
+    api = SimpleLitAPI()
+    server = ls.LitServer(api, accelerator="cpu", devices=2, workers_per_device=2)
+    server.run(port=7860)