Tim Luka Horstmann commited on
Commit
48a65b5
·
1 Parent(s): e54e8f7

Cogito model

Browse files
Files changed (1) hide show
  1. app.py +23 -20
app.py CHANGED
@@ -25,14 +25,14 @@ login(token=hf_token)
25
 
26
  # Models
27
  sentence_transformer_model = "all-MiniLM-L6-v2"
28
- repo_id = "bartowski/Llama-3.2-3B-Instruct-GGUF"
29
- filename = "Llama-3.2-3B-Instruct-Q4_K_M.gguf"
30
 
31
- # Define FAQs
32
  faqs = [
33
  {"question": "What is your name?", "answer": "My name is Tim Luka Horstmann."},
34
  {"question": "Where do you live?", "answer": "I live in Paris, France."},
35
- {"question": "What is your education?", "answer": "I am currenlty pursuing a MSc in Data and AI at Institut Polytechnique de Paris. I have an MPhil in Advanced Computer Science from the University of Cambridge, and a BSc in Business Informatics from RheinMain University of Applied Sciences."},
36
  {"question": "What are your skills?", "answer": "I am proficient in Python, Java, SQL, Cypher, SPARQL, VBA, JavaScript, HTML/CSS, and Ruby. I also use tools like PyTorch, Hugging Face, Scikit-Learn, NumPy, Pandas, Matplotlib, Jupyter, Git, Bash, IoT, Ansible, QuickSight, and Wordpress."},
37
  {"question": "How are you?", "answer": "I’m doing great, thanks for asking! I’m enjoying life in Paris and working on some exciting AI projects."},
38
  {"question": "What do you do?", "answer": "I’m a Computer Scientist and AI enthusiast, currently pursuing a MSc in Data and AI at Institut Polytechnique de Paris and interning as a Machine Learning Research Engineer at Hi! PARIS."},
@@ -40,7 +40,7 @@ faqs = [
40
  ]
41
 
42
  try:
43
- # Load CV embeddings and build FAISS index
44
  logger.info("Loading CV embeddings from cv_embeddings.json")
45
  with open("cv_embeddings.json", "r", encoding="utf-8") as f:
46
  cv_data = json.load(f)
@@ -51,17 +51,17 @@ try:
51
  faiss_index.add(cv_embeddings)
52
  logger.info("FAISS index built successfully")
53
 
54
- # Load embedding model
55
  logger.info("Loading SentenceTransformer model")
56
  embedder = SentenceTransformer(sentence_transformer_model, device="cpu")
57
  logger.info("SentenceTransformer model loaded")
58
 
59
- # Compute FAQ embeddings
60
  faq_questions = [faq["question"] for faq in faqs]
61
  faq_embeddings = embedder.encode(faq_questions, convert_to_numpy=True).astype("float32")
62
  faiss.normalize_L2(faq_embeddings)
63
 
64
- # Load Llama model
65
  logger.info(f"Loading {filename} model")
66
  model_path = hf_hub_download(
67
  repo_id=repo_id,
@@ -71,10 +71,10 @@ try:
71
  )
72
  generator = Llama(
73
  model_path=model_path,
74
- n_ctx=1024,
75
  n_threads=2,
76
  n_batch=512,
77
- n_gpu_layers=0,
78
  verbose=True,
79
  )
80
  logger.info(f"{filename} model loaded")
@@ -97,7 +97,7 @@ def retrieve_context(query, top_k=3):
97
  def stream_response(query):
98
  try:
99
  logger.info(f"Processing query: {query}")
100
- # Check FAQ cache
101
  query_embedding = embedder.encode(query, convert_to_numpy=True).astype("float32")
102
  query_embedding = query_embedding.reshape(1, -1)
103
  faiss.normalize_L2(query_embedding)
@@ -111,10 +111,12 @@ def stream_response(query):
111
 
112
  context = retrieve_context(query)
113
  prompt = (
114
- f"<|im_start|>system\nYou are Tim Luka Horstmann, a Computer Scientist. Here is your CV:\n{context}\n"
115
- f"A user is asking you a question. Respond as yourself, using the first person, in a friendly and concise manner. For questions about your CV, base your answer strictly on the provided CV information. For casual questions not covered by the CV, respond naturally but do not invent specific details beyond what’s generally true about you (e.g., your current location or field of work). Avoid meta-commentary or critiquing your own response.\n<|im_end>\n"
116
- f"<|im_start|>user\n{query}\n<|im_end>\n"
117
- f"<|im_start|>assistant\n"
 
 
118
  )
119
 
120
  response_text = ""
@@ -122,15 +124,15 @@ def stream_response(query):
122
  prompt,
123
  max_tokens=200,
124
  stream=True,
125
- stop=["<|im_end|>", "[DONE]"],
126
- temperature=0.5, # Slightly higher for friendly tone
127
  top_p=0.9,
128
- repeat_penalty=1.2, # Maintain control
129
  ):
130
  text = chunk['choices'][0]['text']
131
  response_text += text
132
  yield f"data: {text}\n\n"
133
- if "<|im_end>" in response_text or "[DONE]" in response_text:
134
  break
135
  yield "data: [DONE]\n\n"
136
  except Exception as e:
@@ -155,8 +157,9 @@ async def health_check():
155
  @app.get("/model_info")
156
  async def model_info():
157
  return {
158
- "model_name": "Llama-3.2-3B-Instruct-GGUF",
159
  "model_size": "3B",
 
160
  "embedding_model": sentence_transformer_model,
161
  "faiss_index_size": len(cv_chunks),
162
  "faiss_index_dim": cv_embeddings.shape[1],
 
25
 
26
  # Models
27
  sentence_transformer_model = "all-MiniLM-L6-v2"
28
+ repo_id = "bartowski/deepcogito_cogito-v1-preview-llama-3B-GGUF"
29
+ filename = "deepcogito_cogito-v1-preview-llama-3B-Q4_K_M.gguf" # Updated to Cogito Q4_K_M
30
 
31
+ # Define FAQs (unchanged)
32
  faqs = [
33
  {"question": "What is your name?", "answer": "My name is Tim Luka Horstmann."},
34
  {"question": "Where do you live?", "answer": "I live in Paris, France."},
35
+ {"question": "What is your education?", "answer": "I am currently pursuing a MSc in Data and AI at Institut Polytechnique de Paris. I have an MPhil in Advanced Computer Science from the University of Cambridge, and a BSc in Business Informatics from RheinMain University of Applied Sciences."},
36
  {"question": "What are your skills?", "answer": "I am proficient in Python, Java, SQL, Cypher, SPARQL, VBA, JavaScript, HTML/CSS, and Ruby. I also use tools like PyTorch, Hugging Face, Scikit-Learn, NumPy, Pandas, Matplotlib, Jupyter, Git, Bash, IoT, Ansible, QuickSight, and Wordpress."},
37
  {"question": "How are you?", "answer": "I’m doing great, thanks for asking! I’m enjoying life in Paris and working on some exciting AI projects."},
38
  {"question": "What do you do?", "answer": "I’m a Computer Scientist and AI enthusiast, currently pursuing a MSc in Data and AI at Institut Polytechnique de Paris and interning as a Machine Learning Research Engineer at Hi! PARIS."},
 
40
  ]
41
 
42
  try:
43
+ # Load CV embeddings and build FAISS index (unchanged)
44
  logger.info("Loading CV embeddings from cv_embeddings.json")
45
  with open("cv_embeddings.json", "r", encoding="utf-8") as f:
46
  cv_data = json.load(f)
 
51
  faiss_index.add(cv_embeddings)
52
  logger.info("FAISS index built successfully")
53
 
54
+ # Load embedding model (unchanged)
55
  logger.info("Loading SentenceTransformer model")
56
  embedder = SentenceTransformer(sentence_transformer_model, device="cpu")
57
  logger.info("SentenceTransformer model loaded")
58
 
59
+ # Compute FAQ embeddings (unchanged)
60
  faq_questions = [faq["question"] for faq in faqs]
61
  faq_embeddings = embedder.encode(faq_questions, convert_to_numpy=True).astype("float32")
62
  faiss.normalize_L2(faq_embeddings)
63
 
64
+ # Load Cogito model
65
  logger.info(f"Loading {filename} model")
66
  model_path = hf_hub_download(
67
  repo_id=repo_id,
 
71
  )
72
  generator = Llama(
73
  model_path=model_path,
74
+ n_ctx=2048, # Adjust if 128k is supported and memory allows; start with 1024
75
  n_threads=2,
76
  n_batch=512,
77
+ n_gpu_layers=0, # No GPU on free tier
78
  verbose=True,
79
  )
80
  logger.info(f"{filename} model loaded")
 
97
  def stream_response(query):
98
  try:
99
  logger.info(f"Processing query: {query}")
100
+ # Check FAQ cache (unchanged)
101
  query_embedding = embedder.encode(query, convert_to_numpy=True).astype("float32")
102
  query_embedding = query_embedding.reshape(1, -1)
103
  faiss.normalize_L2(query_embedding)
 
111
 
112
  context = retrieve_context(query)
113
  prompt = (
114
+ f"<|begin_of_text|><|start_header_id|>system<|end_header_id>\n"
115
+ f"You are Tim Luka Horstmann, a Computer Scientist. Here is your CV:\n{context}\n"
116
+ f"A user is asking you a question. Respond as yourself, using the first person, in a friendly and concise manner. For questions about your CV, base your answer strictly on the provided CV information. For casual questions not covered by the CV, respond naturally but do not invent specific details beyond what’s generally true about you (e.g., your current location or field of work). Avoid meta-commentary or critiquing your own response.\n"
117
+ f"<|eot_id|><|start_header_id|>user<|end_header_id>\n"
118
+ f"{query}\n"
119
+ f"<|eot_id|><|start_header_id|>assistant<|end_header_id>\n"
120
  )
121
 
122
  response_text = ""
 
124
  prompt,
125
  max_tokens=200,
126
  stream=True,
127
+ stop=["<|eot_id|>", "[DONE]"], # Updated stop tokens
128
+ temperature=0.5,
129
  top_p=0.9,
130
+ repeat_penalty=1.2,
131
  ):
132
  text = chunk['choices'][0]['text']
133
  response_text += text
134
  yield f"data: {text}\n\n"
135
+ if "<|eot_id>" in response_text or "[DONE]" in response_text:
136
  break
137
  yield "data: [DONE]\n\n"
138
  except Exception as e:
 
157
  @app.get("/model_info")
158
  async def model_info():
159
  return {
160
+ "model_name": "deepcogito_cogito-v1-preview-llama-3B-GGUF",
161
  "model_size": "3B",
162
+ "quantization": "Q4_K_M",
163
  "embedding_model": sentence_transformer_model,
164
  "faiss_index_size": len(cv_chunks),
165
  "faiss_index_dim": cv_embeddings.shape[1],