Spaces:
Runtime error
Runtime error
Commit
·
4b8202a
1
Parent(s):
089d8a1
change
Browse files- Dockerfile +23 -5
- main.py +29 -13
- requirements.txt +2 -1
Dockerfile
CHANGED
@@ -1,10 +1,28 @@
|
|
1 |
-
|
|
|
2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
WORKDIR /app
|
4 |
|
5 |
-
|
6 |
-
|
|
|
|
|
|
|
|
|
7 |
|
8 |
-
|
|
|
9 |
|
10 |
-
|
|
|
|
1 |
+
# Use the official Python image as a base
|
2 |
+
FROM python:3.9-slim
|
3 |
|
4 |
+
# Set environment variables
|
5 |
+
ENV PYTHONUNBUFFERED=1 \
|
6 |
+
PYTHONDONTWRITEBYTECODE=1
|
7 |
+
|
8 |
+
# Install system dependencies
|
9 |
+
RUN apt-get update && apt-get install -y \
|
10 |
+
git \
|
11 |
+
&& apt-get clean \
|
12 |
+
&& rm -rf /var/lib/apt/lists/*
|
13 |
+
|
14 |
+
# Create and set the working directory
|
15 |
WORKDIR /app
|
16 |
|
17 |
+
# Copy requirements and install them
|
18 |
+
COPY app/requirements.txt .
|
19 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
20 |
+
|
21 |
+
# Copy the FastAPI application code
|
22 |
+
COPY app /app
|
23 |
|
24 |
+
# Expose the port FastAPI will run on
|
25 |
+
EXPOSE 8000
|
26 |
|
27 |
+
# Command to run the application
|
28 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
main.py
CHANGED
@@ -1,20 +1,36 @@
|
|
1 |
-
from fastapi import FastAPI
|
|
|
2 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
import torch
|
4 |
|
5 |
app = FastAPI()
|
6 |
|
7 |
-
|
8 |
-
|
9 |
-
|
|
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
-
@app.post("/
|
16 |
-
def
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException
|
2 |
+
from pydantic import BaseModel
|
3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
import torch
|
5 |
|
6 |
app = FastAPI()
|
7 |
|
8 |
+
class TextGenerationRequest(BaseModel):
|
9 |
+
prompt: str
|
10 |
+
max_length: int = 100
|
11 |
+
temperature: float = 0.7
|
12 |
|
13 |
+
# Load model and tokenizer (force CPU usage)
|
14 |
+
model_name = "unsloth/Qwen2.5-7B-bnb-4bit"
|
15 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
16 |
+
model = AutoModelForCausalLM.from_pretrained(
|
17 |
+
model_name,
|
18 |
+
trust_remote_code=True,
|
19 |
+
torch_dtype=torch.float32, # Change to float32 for CPU
|
20 |
+
device_map="cpu" # Force CPU usage
|
21 |
+
)
|
22 |
|
23 |
+
@app.post("/generate")
|
24 |
+
async def generate_text(request: TextGenerationRequest):
|
25 |
+
try:
|
26 |
+
inputs = tokenizer(request.prompt, return_tensors="pt").to("cpu") # Move to CPU
|
27 |
+
outputs = model.generate(
|
28 |
+
inputs.input_ids,
|
29 |
+
max_length=request.max_length,
|
30 |
+
temperature=request.temperature,
|
31 |
+
do_sample=True,
|
32 |
+
)
|
33 |
+
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
34 |
+
return {"generated_text": generated_text}
|
35 |
+
except Exception as e:
|
36 |
+
raise HTTPException(status_code=500, detail=str(e))
|
requirements.txt
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
fastapi
|
2 |
uvicorn
|
3 |
-
transformers
|
4 |
torch
|
|
|
|
|
|
1 |
fastapi
|
2 |
uvicorn
|
|
|
3 |
torch
|
4 |
+
transformers
|
5 |
+
accelerate
|