Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -403,34 +403,29 @@ def initialize():
|
|
| 403 |
global vector_store, chunks, llm
|
| 404 |
|
| 405 |
download_gitlab_project_by_version()
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
doc_partial_paths = []
|
| 410 |
-
doc_partial_paths = ['docs/source/setup/']
|
| 411 |
-
doc_file_paths = ['docs/source/usage/lib.rst']
|
| 412 |
-
|
| 413 |
-
|
| 414 |
|
| 415 |
|
| 416 |
-
|
| 417 |
-
print("LEEEEEEEEEEEENGTH of code_texts: ", len(
|
| 418 |
|
| 419 |
|
| 420 |
-
|
| 421 |
-
print("LEEEEEEEEEEEENGTH of doc_files: ", len(
|
| 422 |
|
| 423 |
-
|
| 424 |
-
|
| 425 |
|
| 426 |
-
print(f"Total number of code_chunks: {len(
|
| 427 |
-
print(f"Total number of doc_chunks: {len(
|
| 428 |
|
| 429 |
#docstore = embed_documents_into_vectorstore(kadiAPY_code_chunks, EMBEDDING_MODEL_NAME, PERSIST_DOC_DIRECTORY)
|
| 430 |
#codestore = embed_documents_into_vectorstore(kadiAPY_doc_chunks, EMBEDDING_MODEL_NAME, PERSIST_CODE_DIRECTORY)
|
| 431 |
|
| 432 |
vector_store = embed_documents_into_vectorstore(
|
| 433 |
-
chunks=
|
| 434 |
model_name= EMBEDDING_MODEL_NAME,
|
| 435 |
persist_directory= PERSIST_DOC_DIRECTORY
|
| 436 |
)
|
|
|
|
| 403 |
global vector_store, chunks, llm
|
| 404 |
|
| 405 |
download_gitlab_project_by_version()
|
| 406 |
+
|
| 407 |
+
code_file_paths = ['kadi_apy']
|
| 408 |
+
doc_file_path = ['docs/source/']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
|
| 410 |
|
| 411 |
+
code_texts, code_references = process_directory(DATA_DIR, code_partial_paths, code_file_paths)
|
| 412 |
+
print("LEEEEEEEEEEEENGTH of code_texts: ", len(code_texts))
|
| 413 |
|
| 414 |
|
| 415 |
+
doc_texts, kadiAPY_doc_references = process_directory(DATA_DIR, doc_partial_paths, doc_file_paths)
|
| 416 |
+
print("LEEEEEEEEEEEENGTH of doc_files: ", len(doc_texts))
|
| 417 |
|
| 418 |
+
code_chunks = split_python_code_into_chunks(code_texts, code_references)
|
| 419 |
+
doc_chunks = split_into_chunks(doc_texts, kadiAPY_doc_references, CHUNK_SIZE, CHUNK_OVERLAP)
|
| 420 |
|
| 421 |
+
print(f"Total number of code_chunks: {len(code_chunks)}")
|
| 422 |
+
print(f"Total number of doc_chunks: {len(doc_chunks)}")
|
| 423 |
|
| 424 |
#docstore = embed_documents_into_vectorstore(kadiAPY_code_chunks, EMBEDDING_MODEL_NAME, PERSIST_DOC_DIRECTORY)
|
| 425 |
#codestore = embed_documents_into_vectorstore(kadiAPY_doc_chunks, EMBEDDING_MODEL_NAME, PERSIST_CODE_DIRECTORY)
|
| 426 |
|
| 427 |
vector_store = embed_documents_into_vectorstore(
|
| 428 |
+
chunks= doc_chunks + code_chunks,
|
| 429 |
model_name= EMBEDDING_MODEL_NAME,
|
| 430 |
persist_directory= PERSIST_DOC_DIRECTORY
|
| 431 |
)
|