Tim Luka Horstmann
commited on
Commit
·
dc475e9
1
Parent(s):
54039cd
Fixed path
Browse files
app.py
CHANGED
@@ -31,7 +31,7 @@ login(token=hf_token)
|
|
31 |
sentence_transformer_model = "all-MiniLM-L6-v2"
|
32 |
# Upgrade to the 8B model and choose Q4_0 quantization for a good balance of performance and resource usage.
|
33 |
repo_id = "bartowski/deepcogito_cogito-v1-preview-llama-8B-GGUF"
|
34 |
-
filename = "deepcogito_cogito-v1-preview-llama-8B-
|
35 |
|
36 |
# Define FAQs (unchanged)
|
37 |
faqs = [
|
@@ -175,7 +175,7 @@ async def model_info():
|
|
175 |
return {
|
176 |
"model_name": "deepcogito_cogito-v1-preview-llama-8B-GGUF",
|
177 |
"model_size": "8B",
|
178 |
-
"quantization": "
|
179 |
"embedding_model": sentence_transformer_model,
|
180 |
"faiss_index_size": len(cv_chunks),
|
181 |
"faiss_index_dim": cv_embeddings.shape[1],
|
|
|
31 |
sentence_transformer_model = "all-MiniLM-L6-v2"
|
32 |
# Upgrade to the 8B model and choose Q4_0 quantization for a good balance of performance and resource usage.
|
33 |
repo_id = "bartowski/deepcogito_cogito-v1-preview-llama-8B-GGUF"
|
34 |
+
filename = "deepcogito_cogito-v1-preview-llama-8B-Q4_K_M.gguf" # New 8B model with Q4_0 quantization
|
35 |
|
36 |
# Define FAQs (unchanged)
|
37 |
faqs = [
|
|
|
175 |
return {
|
176 |
"model_name": "deepcogito_cogito-v1-preview-llama-8B-GGUF",
|
177 |
"model_size": "8B",
|
178 |
+
"quantization": "Q4_K_M",
|
179 |
"embedding_model": sentence_transformer_model,
|
180 |
"faiss_index_size": len(cv_chunks),
|
181 |
"faiss_index_dim": cv_embeddings.shape[1],
|