snsynth commited on
Commit
8980288
·
1 Parent(s): 10a9c4f

add all files

Browse files
Files changed (3) hide show
  1. Dockerfile +16 -0
  2. app.py +51 -0
  3. requirements.txt +4 -0
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install Python deps
6
+ COPY requirements.txt .
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ # Copy app code
10
+ COPY app.py .
11
+
12
+ # Expose port used by uvicorn
13
+ EXPOSE 7860
14
+
15
+ # Run FastAPI server
16
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from transformers import AutoTokenizer, AutoModel
4
+ import torch
5
+
6
+ app = FastAPI(
7
+ title="OpenAI-compatible Embedding API",
8
+ version="1.0.0",
9
+ )
10
+
11
+ # Load model from Hugging Face Hub
12
+ MODEL_NAME = "BAAI/bge-small-en-v1.5"
13
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
14
+ model = AutoModel.from_pretrained(MODEL_NAME)
15
+ model.eval()
16
+
17
+ class EmbeddingRequest(BaseModel):
18
+ input: list[str]
19
+ model: str
20
+
21
+ @app.get("/")
22
+ def root():
23
+ return {"message": "API is working"}
24
+
25
+
26
+ @app.post("/v1/embeddings")
27
+ def create_embeddings(request: EmbeddingRequest):
28
+ with torch.no_grad():
29
+ tokens = tokenizer(request.input, return_tensors="pt", padding=True, truncation=True)
30
+ output = model(**tokens)
31
+ cls_embeddings = output.last_hidden_state[:, 0]
32
+ norm_embeddings = torch.nn.functional.normalize(cls_embeddings, p=2, dim=1)
33
+
34
+ data = [
35
+ {
36
+ "object": "embedding",
37
+ "embedding": e.tolist(),
38
+ "index": i
39
+ }
40
+ for i, e in enumerate(norm_embeddings)
41
+ ]
42
+
43
+ return {
44
+ "object": "list",
45
+ "data": data,
46
+ "model": request.model,
47
+ "usage": {
48
+ "prompt_tokens": sum(len(tokenizer.encode(x)) for x in request.input),
49
+ "total_tokens": sum(len(tokenizer.encode(x)) for x in request.input),
50
+ }
51
+ }
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ transformers
4
+ torch