Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -280,8 +280,8 @@ def split_into_chunks(texts, references, chunk_size, chunk_overlap):
|
|
| 280 |
print(f"Total number of chunks: {len(chunks)}")
|
| 281 |
return chunks
|
| 282 |
|
| 283 |
-
# Setup
|
| 284 |
-
def
|
| 285 |
embedding_model = HuggingFaceEmbeddings(model_name=model_name)
|
| 286 |
vectorstore = Chroma.from_documents(chunks, embedding=embedding_model, persist_directory=persist_directory)
|
| 287 |
return vectorstore
|
|
@@ -291,7 +291,7 @@ def setup_llm(model_name, temperature, api_key):
|
|
| 291 |
llm = ChatGroq(model=model_name, temperature=temperature, api_key=api_key)
|
| 292 |
return llm
|
| 293 |
|
| 294 |
-
def
|
| 295 |
results = vectorstore.similarity_search(query, k=k)
|
| 296 |
chunks_with_references = [(result.page_content, result.metadata["source"]) for result in results]
|
| 297 |
# Print the chosen chunks and their sources to the console
|
|
@@ -302,14 +302,14 @@ def query_chroma(vectorstore, query, k):
|
|
| 302 |
return chunks_with_references
|
| 303 |
|
| 304 |
def rag_workflow(query):
|
| 305 |
-
retrieved_doc_chunks =
|
| 306 |
-
|
| 307 |
|
| 308 |
doc_context = "\n\n".join([doc_chunk for doc_chunk, _ in retrieved_doc_chunks])
|
| 309 |
-
|
| 310 |
|
| 311 |
-
|
| 312 |
-
|
| 313 |
|
| 314 |
print(f"Context for the query:\n{doc_context}\n")
|
| 315 |
|
|
@@ -332,7 +332,7 @@ def rag_workflow(query):
|
|
| 332 |
|
| 333 |
|
| 334 |
def initialize():
|
| 335 |
-
global docstore,
|
| 336 |
|
| 337 |
code_partial_paths = ['kadi_apy/lib/']
|
| 338 |
code_file_path = []
|
|
@@ -350,8 +350,9 @@ def initialize():
|
|
| 350 |
print(f"Total number of code_chunks: {len(code_chunks)}")
|
| 351 |
print(f"Total number of doc_chunks: {len(doc_chunks)}")
|
| 352 |
|
| 353 |
-
|
| 354 |
-
|
|
|
|
| 355 |
llm = setup_llm(LLM_MODEL_NAME, LLM_TEMPERATURE, GROQ_API_KEY)
|
| 356 |
|
| 357 |
|
|
|
|
| 280 |
print(f"Total number of chunks: {len(chunks)}")
|
| 281 |
return chunks
|
| 282 |
|
| 283 |
+
# Setup Vectorstore
|
| 284 |
+
def setup_vectorstore(chunks, model_name, persist_directory):
|
| 285 |
embedding_model = HuggingFaceEmbeddings(model_name=model_name)
|
| 286 |
vectorstore = Chroma.from_documents(chunks, embedding=embedding_model, persist_directory=persist_directory)
|
| 287 |
return vectorstore
|
|
|
|
| 291 |
llm = ChatGroq(model=model_name, temperature=temperature, api_key=api_key)
|
| 292 |
return llm
|
| 293 |
|
| 294 |
+
def retrieve_from_vectorstore(vectorstore, query, k):
|
| 295 |
results = vectorstore.similarity_search(query, k=k)
|
| 296 |
chunks_with_references = [(result.page_content, result.metadata["source"]) for result in results]
|
| 297 |
# Print the chosen chunks and their sources to the console
|
|
|
|
| 302 |
return chunks_with_references
|
| 303 |
|
| 304 |
def rag_workflow(query):
|
| 305 |
+
retrieved_doc_chunks = retrieve_from_vectorstore(docstore, query, k=5)
|
| 306 |
+
retrieved_code_chunks = retrieve_from_vectorstore(codestore, query, k=5)
|
| 307 |
|
| 308 |
doc_context = "\n\n".join([doc_chunk for doc_chunk, _ in retrieved_doc_chunks])
|
| 309 |
+
code_context = "\n\n".join([code_chunk for code_chunk, _ in retrieved_code_chunks])
|
| 310 |
|
| 311 |
+
doc_references = "\n".join([f"[{i+1}] {ref}" for i, (_, ref) in enumerate(retrieved_doc_chunks)])
|
| 312 |
+
code_references = "\n".join([f"[{i+1}] {ref}" for i, (_, ref) in enumerate(retrieved_code_chunks)])
|
| 313 |
|
| 314 |
print(f"Context for the query:\n{doc_context}\n")
|
| 315 |
|
|
|
|
| 332 |
|
| 333 |
|
| 334 |
def initialize():
|
| 335 |
+
global docstore, codestore, chunks, llm
|
| 336 |
|
| 337 |
code_partial_paths = ['kadi_apy/lib/']
|
| 338 |
code_file_path = []
|
|
|
|
| 350 |
print(f"Total number of code_chunks: {len(code_chunks)}")
|
| 351 |
print(f"Total number of doc_chunks: {len(doc_chunks)}")
|
| 352 |
|
| 353 |
+
docstore = setup_vectorstore(doc_chunks, EMBEDDING_MODEL_NAME, PERSIST_DIRECTORY)
|
| 354 |
+
codestore = setup_vectorstore(code_chunks, EMBEDDING_MODEL_NAME, PERSIST_DIRECTORY)
|
| 355 |
+
|
| 356 |
llm = setup_llm(LLM_MODEL_NAME, LLM_TEMPERATURE, GROQ_API_KEY)
|
| 357 |
|
| 358 |
|