Tim Luka Horstmann commited on
Commit
cb8303f
·
1 Parent(s): 61a9825

Initial setup

Browse files
Files changed (4) hide show
  1. Dockerfile +28 -0
  2. app.py +67 -0
  3. cv_embeddings.json +0 -0
  4. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a base image
2
+ FROM python:3.10-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies (e.g., for torch, sentence-transformers)
8
+ RUN apt-get update && apt-get install -y \
9
+ gcc \
10
+ g++ \
11
+ libffi-dev \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Copy requirements file
15
+ COPY requirements.txt .
16
+
17
+ # Install Python dependencies
18
+ RUN pip install --no-cache-dir -r requirements.txt
19
+
20
+ # Copy application files
21
+ COPY app.py .
22
+ COPY cv_embeddings.json .
23
+
24
+ # Expose the port FastAPI will run on
25
+ EXPOSE 7860
26
+
27
+ # Command to run the application
28
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import numpy as np
3
+ from sentence_transformers import SentenceTransformer
4
+ from transformers import pipeline, TextIteratorStreamer
5
+ from threading import Thread
6
+ import torch
7
+ import torch.nn.functional as F
8
+ from fastapi import FastAPI, HTTPException
9
+ from fastapi.responses import StreamingResponse
10
+ from pydantic import BaseModel
11
+
12
+ app = FastAPI()
13
+
14
+ # Load precomputed CV embeddings
15
+ with open("cv_embeddings.json", "r", encoding="utf-8") as f:
16
+ cv_data = json.load(f)
17
+ cv_chunks = [item["chunk"] for item in cv_data]
18
+ cv_embeddings = np.array([item["embedding"] for item in cv_data])
19
+
20
+ cv_embeddings_tensor = torch.tensor(cv_embeddings)
21
+
22
+ embedder = SentenceTransformer("all-MiniLM-L6-v2", device="cpu")
23
+
24
+ generator = pipeline(
25
+ "text-generation",
26
+ model="distilgpt2",
27
+ device=-1,
28
+ )
29
+
30
+ def retrieve_context(query, top_k=3):
31
+ query_embedding = embedder.encode(query, convert_to_tensor=True).unsqueeze(0)
32
+ similarities = F.cosine_similarity(query_embedding, cv_embeddings_tensor, dim=1)
33
+ top_k = min(top_k, len(similarities))
34
+ top_indices = torch.topk(similarities, k=top_k).indices.cpu().numpy()
35
+ return "\n".join([cv_chunks[i] for i in top_indices])
36
+
37
+ def stream_response(query):
38
+ context = retrieve_context(query)
39
+ prompt = (
40
+ f"I am Tim Luka Horstmann, a German Computer Scientist. Based on my CV:\n{context}\n\n"
41
+ f"Question: {query}\nAnswer:"
42
+ )
43
+
44
+ streamer = TextIteratorStreamer(generator.tokenizer, skip_prompt=True, skip_special_tokens=True)
45
+ generation_kwargs = {
46
+ "text_inputs": prompt,
47
+ "max_new_tokens": 200,
48
+ "do_sample": False,
49
+ "streamer": streamer,
50
+ }
51
+
52
+ thread = Thread(target=generator, kwargs=generation_kwargs)
53
+ thread.start()
54
+
55
+ for token in streamer:
56
+ yield f"data: {token}\n\n"
57
+ yield "data: [DONE]\n\n"
58
+
59
+ class QueryRequest(BaseModel):
60
+ data: list
61
+
62
+ @app.post("/api/predict")
63
+ async def predict(request: QueryRequest):
64
+ if not request.data or not isinstance(request.data, list) or len(request.data) < 1:
65
+ raise HTTPException(status_code=400, detail="Invalid input: 'data' must be a non-empty list")
66
+ query = request.data[0]
67
+ return StreamingResponse(stream_response(query), media_type="text/event-stream")
cv_embeddings.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi==0.115.0
2
+ uvicorn==0.31.0
3
+ sentence-transformers==3.1.1
4
+ transformers==4.44.2
5
+ torch==2.4.1
6
+ numpy==1.26.4