Tim Luka Horstmann
commited on
Commit
·
48a65b5
1
Parent(s):
e54e8f7
Cogito model
Browse files
app.py
CHANGED
@@ -25,14 +25,14 @@ login(token=hf_token)
|
|
25 |
|
26 |
# Models
|
27 |
sentence_transformer_model = "all-MiniLM-L6-v2"
|
28 |
-
repo_id = "bartowski/
|
29 |
-
filename = "
|
30 |
|
31 |
-
# Define FAQs
|
32 |
faqs = [
|
33 |
{"question": "What is your name?", "answer": "My name is Tim Luka Horstmann."},
|
34 |
{"question": "Where do you live?", "answer": "I live in Paris, France."},
|
35 |
-
{"question": "What is your education?", "answer": "I am
|
36 |
{"question": "What are your skills?", "answer": "I am proficient in Python, Java, SQL, Cypher, SPARQL, VBA, JavaScript, HTML/CSS, and Ruby. I also use tools like PyTorch, Hugging Face, Scikit-Learn, NumPy, Pandas, Matplotlib, Jupyter, Git, Bash, IoT, Ansible, QuickSight, and Wordpress."},
|
37 |
{"question": "How are you?", "answer": "I’m doing great, thanks for asking! I’m enjoying life in Paris and working on some exciting AI projects."},
|
38 |
{"question": "What do you do?", "answer": "I’m a Computer Scientist and AI enthusiast, currently pursuing a MSc in Data and AI at Institut Polytechnique de Paris and interning as a Machine Learning Research Engineer at Hi! PARIS."},
|
@@ -40,7 +40,7 @@ faqs = [
|
|
40 |
]
|
41 |
|
42 |
try:
|
43 |
-
# Load CV embeddings and build FAISS index
|
44 |
logger.info("Loading CV embeddings from cv_embeddings.json")
|
45 |
with open("cv_embeddings.json", "r", encoding="utf-8") as f:
|
46 |
cv_data = json.load(f)
|
@@ -51,17 +51,17 @@ try:
|
|
51 |
faiss_index.add(cv_embeddings)
|
52 |
logger.info("FAISS index built successfully")
|
53 |
|
54 |
-
# Load embedding model
|
55 |
logger.info("Loading SentenceTransformer model")
|
56 |
embedder = SentenceTransformer(sentence_transformer_model, device="cpu")
|
57 |
logger.info("SentenceTransformer model loaded")
|
58 |
|
59 |
-
# Compute FAQ embeddings
|
60 |
faq_questions = [faq["question"] for faq in faqs]
|
61 |
faq_embeddings = embedder.encode(faq_questions, convert_to_numpy=True).astype("float32")
|
62 |
faiss.normalize_L2(faq_embeddings)
|
63 |
|
64 |
-
# Load
|
65 |
logger.info(f"Loading {filename} model")
|
66 |
model_path = hf_hub_download(
|
67 |
repo_id=repo_id,
|
@@ -71,10 +71,10 @@ try:
|
|
71 |
)
|
72 |
generator = Llama(
|
73 |
model_path=model_path,
|
74 |
-
n_ctx=1024
|
75 |
n_threads=2,
|
76 |
n_batch=512,
|
77 |
-
n_gpu_layers=0,
|
78 |
verbose=True,
|
79 |
)
|
80 |
logger.info(f"{filename} model loaded")
|
@@ -97,7 +97,7 @@ def retrieve_context(query, top_k=3):
|
|
97 |
def stream_response(query):
|
98 |
try:
|
99 |
logger.info(f"Processing query: {query}")
|
100 |
-
# Check FAQ cache
|
101 |
query_embedding = embedder.encode(query, convert_to_numpy=True).astype("float32")
|
102 |
query_embedding = query_embedding.reshape(1, -1)
|
103 |
faiss.normalize_L2(query_embedding)
|
@@ -111,10 +111,12 @@ def stream_response(query):
|
|
111 |
|
112 |
context = retrieve_context(query)
|
113 |
prompt = (
|
114 |
-
f"<|
|
115 |
-
f"
|
116 |
-
f"
|
117 |
-
f"<|
|
|
|
|
|
118 |
)
|
119 |
|
120 |
response_text = ""
|
@@ -122,15 +124,15 @@ def stream_response(query):
|
|
122 |
prompt,
|
123 |
max_tokens=200,
|
124 |
stream=True,
|
125 |
-
stop=["<|
|
126 |
-
temperature=0.5,
|
127 |
top_p=0.9,
|
128 |
-
repeat_penalty=1.2,
|
129 |
):
|
130 |
text = chunk['choices'][0]['text']
|
131 |
response_text += text
|
132 |
yield f"data: {text}\n\n"
|
133 |
-
if "<|
|
134 |
break
|
135 |
yield "data: [DONE]\n\n"
|
136 |
except Exception as e:
|
@@ -155,8 +157,9 @@ async def health_check():
|
|
155 |
@app.get("/model_info")
|
156 |
async def model_info():
|
157 |
return {
|
158 |
-
"model_name": "
|
159 |
"model_size": "3B",
|
|
|
160 |
"embedding_model": sentence_transformer_model,
|
161 |
"faiss_index_size": len(cv_chunks),
|
162 |
"faiss_index_dim": cv_embeddings.shape[1],
|
|
|
25 |
|
26 |
# Models
|
27 |
sentence_transformer_model = "all-MiniLM-L6-v2"
|
28 |
+
repo_id = "bartowski/deepcogito_cogito-v1-preview-llama-3B-GGUF"
|
29 |
+
filename = "deepcogito_cogito-v1-preview-llama-3B-Q4_K_M.gguf" # Updated to Cogito Q4_K_M
|
30 |
|
31 |
+
# Define FAQs (unchanged)
|
32 |
faqs = [
|
33 |
{"question": "What is your name?", "answer": "My name is Tim Luka Horstmann."},
|
34 |
{"question": "Where do you live?", "answer": "I live in Paris, France."},
|
35 |
+
{"question": "What is your education?", "answer": "I am currently pursuing a MSc in Data and AI at Institut Polytechnique de Paris. I have an MPhil in Advanced Computer Science from the University of Cambridge, and a BSc in Business Informatics from RheinMain University of Applied Sciences."},
|
36 |
{"question": "What are your skills?", "answer": "I am proficient in Python, Java, SQL, Cypher, SPARQL, VBA, JavaScript, HTML/CSS, and Ruby. I also use tools like PyTorch, Hugging Face, Scikit-Learn, NumPy, Pandas, Matplotlib, Jupyter, Git, Bash, IoT, Ansible, QuickSight, and Wordpress."},
|
37 |
{"question": "How are you?", "answer": "I’m doing great, thanks for asking! I’m enjoying life in Paris and working on some exciting AI projects."},
|
38 |
{"question": "What do you do?", "answer": "I’m a Computer Scientist and AI enthusiast, currently pursuing a MSc in Data and AI at Institut Polytechnique de Paris and interning as a Machine Learning Research Engineer at Hi! PARIS."},
|
|
|
40 |
]
|
41 |
|
42 |
try:
|
43 |
+
# Load CV embeddings and build FAISS index (unchanged)
|
44 |
logger.info("Loading CV embeddings from cv_embeddings.json")
|
45 |
with open("cv_embeddings.json", "r", encoding="utf-8") as f:
|
46 |
cv_data = json.load(f)
|
|
|
51 |
faiss_index.add(cv_embeddings)
|
52 |
logger.info("FAISS index built successfully")
|
53 |
|
54 |
+
# Load embedding model (unchanged)
|
55 |
logger.info("Loading SentenceTransformer model")
|
56 |
embedder = SentenceTransformer(sentence_transformer_model, device="cpu")
|
57 |
logger.info("SentenceTransformer model loaded")
|
58 |
|
59 |
+
# Compute FAQ embeddings (unchanged)
|
60 |
faq_questions = [faq["question"] for faq in faqs]
|
61 |
faq_embeddings = embedder.encode(faq_questions, convert_to_numpy=True).astype("float32")
|
62 |
faiss.normalize_L2(faq_embeddings)
|
63 |
|
64 |
+
# Load Cogito model
|
65 |
logger.info(f"Loading {filename} model")
|
66 |
model_path = hf_hub_download(
|
67 |
repo_id=repo_id,
|
|
|
71 |
)
|
72 |
generator = Llama(
|
73 |
model_path=model_path,
|
74 |
+
n_ctx=2048, # Adjust if 128k is supported and memory allows; start with 1024
|
75 |
n_threads=2,
|
76 |
n_batch=512,
|
77 |
+
n_gpu_layers=0, # No GPU on free tier
|
78 |
verbose=True,
|
79 |
)
|
80 |
logger.info(f"{filename} model loaded")
|
|
|
97 |
def stream_response(query):
|
98 |
try:
|
99 |
logger.info(f"Processing query: {query}")
|
100 |
+
# Check FAQ cache (unchanged)
|
101 |
query_embedding = embedder.encode(query, convert_to_numpy=True).astype("float32")
|
102 |
query_embedding = query_embedding.reshape(1, -1)
|
103 |
faiss.normalize_L2(query_embedding)
|
|
|
111 |
|
112 |
context = retrieve_context(query)
|
113 |
prompt = (
|
114 |
+
f"<|begin_of_text|><|start_header_id|>system<|end_header_id>\n"
|
115 |
+
f"You are Tim Luka Horstmann, a Computer Scientist. Here is your CV:\n{context}\n"
|
116 |
+
f"A user is asking you a question. Respond as yourself, using the first person, in a friendly and concise manner. For questions about your CV, base your answer strictly on the provided CV information. For casual questions not covered by the CV, respond naturally but do not invent specific details beyond what’s generally true about you (e.g., your current location or field of work). Avoid meta-commentary or critiquing your own response.\n"
|
117 |
+
f"<|eot_id|><|start_header_id|>user<|end_header_id>\n"
|
118 |
+
f"{query}\n"
|
119 |
+
f"<|eot_id|><|start_header_id|>assistant<|end_header_id>\n"
|
120 |
)
|
121 |
|
122 |
response_text = ""
|
|
|
124 |
prompt,
|
125 |
max_tokens=200,
|
126 |
stream=True,
|
127 |
+
stop=["<|eot_id|>", "[DONE]"], # Updated stop tokens
|
128 |
+
temperature=0.5,
|
129 |
top_p=0.9,
|
130 |
+
repeat_penalty=1.2,
|
131 |
):
|
132 |
text = chunk['choices'][0]['text']
|
133 |
response_text += text
|
134 |
yield f"data: {text}\n\n"
|
135 |
+
if "<|eot_id>" in response_text or "[DONE]" in response_text:
|
136 |
break
|
137 |
yield "data: [DONE]\n\n"
|
138 |
except Exception as e:
|
|
|
157 |
@app.get("/model_info")
|
158 |
async def model_info():
|
159 |
return {
|
160 |
+
"model_name": "deepcogito_cogito-v1-preview-llama-3B-GGUF",
|
161 |
"model_size": "3B",
|
162 |
+
"quantization": "Q4_K_M",
|
163 |
"embedding_model": sentence_transformer_model,
|
164 |
"faiss_index_size": len(cv_chunks),
|
165 |
"faiss_index_dim": cv_embeddings.shape[1],
|