Spaces:

ahmed-eisa
/

genai_service

Sleeping

ahmed-eisa commited on Aug 1

Commit

23bd097

1 Parent(s): 3abedd3

init

Files changed (4) hide show

Dockerfile ADDED Viewed

+FROM python:3.9-slim
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user main.py models.py requirements.txt ./
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

main.py ADDED Viewed

+# main.py
+from fastapi import FastAPI
+from openai import OpenAI
+from models import load_text_model,generate_text
+app = FastAPI()
+openai_client = OpenAI(api_key="your_api_key")
+@app.get("/")
+def root_controller():
+    return {"status": "healthy"}
+@app.get("/generate/text")
+def serve_language_model_controller(prompt: str) -> str:
+    pipe = load_text_model()
+    output = generate_text(pipe, prompt)
+    return output

models.py ADDED Viewed

+# models.py
+import torch
+from transformers import Pipeline, pipeline
+prompt = "How to set up a FastAPI project?"
+system_prompt = """
+Your name is FastAPI bot and you are a helpful
+chatbot responsible for teaching FastAPI to your users.
+Always respond in markdown.
+"""
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+def load_text_model():
+    pipe = pipeline(
+        "text-generation",
+        model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+        torch_dtype=torch.bfloat16,
+        device=device
+    )
+    return pipe
+def generate_text(pipe: Pipeline, prompt: str, temperature: float = 0.7) -> str:
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": prompt},
+    ]
+    prompt = pipe.tokenizer.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+    predictions = pipe(
+        prompt,
+        temperature=temperature,
+        max_new_tokens=256,
+        do_sample=True,
+        top_k=50,
+        top_p=0.95,
+    )
+    output = predictions[0]["generated_text"].split("</s>\n<|assistant|>\n")[-1]
+    return output

requirements.txt ADDED Viewed

+fastapi
+uvicorn
+transformers
+torch
+pydantic
+bitsandbytes