ahmed-eisa commited on
Commit
23bd097
·
1 Parent(s): 3abedd3
Files changed (4) hide show
  1. Dockerfile +15 -0
  2. main.py +17 -0
  3. models.py +42 -0
  4. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user main.py models.py requirements.txt ./
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ COPY --chown=user . /app
13
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
14
+
15
+
main.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # main.py
2
+ from fastapi import FastAPI
3
+ from openai import OpenAI
4
+ from models import load_text_model,generate_text
5
+
6
+ app = FastAPI()
7
+ openai_client = OpenAI(api_key="your_api_key")
8
+
9
+ @app.get("/")
10
+ def root_controller():
11
+ return {"status": "healthy"}
12
+
13
+ @app.get("/generate/text")
14
+ def serve_language_model_controller(prompt: str) -> str:
15
+ pipe = load_text_model()
16
+ output = generate_text(pipe, prompt)
17
+ return output
models.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # models.py
2
+
3
+ import torch
4
+ from transformers import Pipeline, pipeline
5
+
6
+ prompt = "How to set up a FastAPI project?"
7
+ system_prompt = """
8
+ Your name is FastAPI bot and you are a helpful
9
+ chatbot responsible for teaching FastAPI to your users.
10
+ Always respond in markdown.
11
+ """
12
+
13
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
+
15
+ def load_text_model():
16
+ pipe = pipeline(
17
+ "text-generation",
18
+ model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
19
+ torch_dtype=torch.bfloat16,
20
+ device=device
21
+ )
22
+ return pipe
23
+
24
+
25
+ def generate_text(pipe: Pipeline, prompt: str, temperature: float = 0.7) -> str:
26
+ messages = [
27
+ {"role": "system", "content": system_prompt},
28
+ {"role": "user", "content": prompt},
29
+ ]
30
+ prompt = pipe.tokenizer.apply_chat_template(
31
+ messages, tokenize=False, add_generation_prompt=True
32
+ )
33
+ predictions = pipe(
34
+ prompt,
35
+ temperature=temperature,
36
+ max_new_tokens=256,
37
+ do_sample=True,
38
+ top_k=50,
39
+ top_p=0.95,
40
+ )
41
+ output = predictions[0]["generated_text"].split("</s>\n<|assistant|>\n")[-1]
42
+ return output
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ transformers
4
+ torch
5
+ pydantic
6
+ bitsandbytes