Gokulavelan commited on
Commit
4b8202a
·
1 Parent(s): 089d8a1
Files changed (3) hide show
  1. Dockerfile +23 -5
  2. main.py +29 -13
  3. requirements.txt +2 -1
Dockerfile CHANGED
@@ -1,10 +1,28 @@
1
- FROM python:3.10
 
2
 
 
 
 
 
 
 
 
 
 
 
 
3
  WORKDIR /app
4
 
5
- COPY requirements.txt .
6
- RUN pip install -r requirements.txt
 
 
 
 
7
 
8
- COPY . .
 
9
 
10
- CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
1
+ # Use the official Python image as a base
2
+ FROM python:3.9-slim
3
 
4
+ # Set environment variables
5
+ ENV PYTHONUNBUFFERED=1 \
6
+ PYTHONDONTWRITEBYTECODE=1
7
+
8
+ # Install system dependencies
9
+ RUN apt-get update && apt-get install -y \
10
+ git \
11
+ && apt-get clean \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Create and set the working directory
15
  WORKDIR /app
16
 
17
+ # Copy requirements and install them
18
+ COPY app/requirements.txt .
19
+ RUN pip install --no-cache-dir -r requirements.txt
20
+
21
+ # Copy the FastAPI application code
22
+ COPY app /app
23
 
24
+ # Expose the port FastAPI will run on
25
+ EXPOSE 8000
26
 
27
+ # Command to run the application
28
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
main.py CHANGED
@@ -1,20 +1,36 @@
1
- from fastapi import FastAPI
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
 
5
  app = FastAPI()
6
 
7
- model_name = "unsloth/Qwen2.5-7B-bnb-4bit" # Change to your model
8
- model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
9
- tokenizer = AutoTokenizer.from_pretrained(model_name)
 
10
 
11
- @app.get("/")
12
- def read_root():
13
- return {"message": "Chat API is running!"}
 
 
 
 
 
 
14
 
15
- @app.post("/chat")
16
- def chat(prompt: str):
17
- inputs = tokenizer(prompt, return_tensors="pt")
18
- outputs = model.generate(**inputs, max_new_tokens=100)
19
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
20
- return {"response": response}
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import torch
5
 
6
  app = FastAPI()
7
 
8
+ class TextGenerationRequest(BaseModel):
9
+ prompt: str
10
+ max_length: int = 100
11
+ temperature: float = 0.7
12
 
13
+ # Load model and tokenizer (force CPU usage)
14
+ model_name = "unsloth/Qwen2.5-7B-bnb-4bit"
15
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
16
+ model = AutoModelForCausalLM.from_pretrained(
17
+ model_name,
18
+ trust_remote_code=True,
19
+ torch_dtype=torch.float32, # Change to float32 for CPU
20
+ device_map="cpu" # Force CPU usage
21
+ )
22
 
23
+ @app.post("/generate")
24
+ async def generate_text(request: TextGenerationRequest):
25
+ try:
26
+ inputs = tokenizer(request.prompt, return_tensors="pt").to("cpu") # Move to CPU
27
+ outputs = model.generate(
28
+ inputs.input_ids,
29
+ max_length=request.max_length,
30
+ temperature=request.temperature,
31
+ do_sample=True,
32
+ )
33
+ generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
34
+ return {"generated_text": generated_text}
35
+ except Exception as e:
36
+ raise HTTPException(status_code=500, detail=str(e))
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  fastapi
2
  uvicorn
3
- transformers
4
  torch
 
 
 
1
  fastapi
2
  uvicorn
 
3
  torch
4
+ transformers
5
+ accelerate