Spaces:
Sleeping
Sleeping
Update chunk_python_code.py
Browse files- chunk_python_code.py +5 -2
chunk_python_code.py
CHANGED
|
@@ -1,8 +1,11 @@
|
|
| 1 |
import ast
|
| 2 |
from langchain.schema import Document
|
| 3 |
|
|
|
|
|
|
|
| 4 |
def chunk_python_code_with_metadata(python_code, file_path):
|
| 5 |
"""
|
|
|
|
| 6 |
Entry point method to process the Python file.
|
| 7 |
It invokes the iterate_ast function.
|
| 8 |
"""
|
|
@@ -18,7 +21,7 @@ def chunk_python_code_with_metadata(python_code, file_path):
|
|
| 18 |
else:
|
| 19 |
usage = "undefined"
|
| 20 |
|
| 21 |
-
# Add metadata
|
| 22 |
for doc in documents:
|
| 23 |
doc.metadata["source"] = file_path
|
| 24 |
doc.metadata["usage"] = usage # Add the determined usage metadata
|
|
@@ -44,7 +47,7 @@ def _iterate_ast(python_code, documents, file_path):
|
|
| 44 |
all_imports = all(isinstance(node, (ast.Import, ast.ImportFrom)) for node in first_level_nodes)
|
| 45 |
if all_imports:
|
| 46 |
documents.extend(
|
| 47 |
-
_chunk_import_only_python_code(
|
| 48 |
|
| 49 |
# Iterate over first-level nodes
|
| 50 |
for first_level_node in ast.iter_child_nodes(tree):
|
|
|
|
| 1 |
import ast
|
| 2 |
from langchain.schema import Document
|
| 3 |
|
| 4 |
+
|
| 5 |
+
|
| 6 |
def chunk_python_code_with_metadata(python_code, file_path):
|
| 7 |
"""
|
| 8 |
+
Custom Python Code Splitter for KadiApy-Use Case
|
| 9 |
Entry point method to process the Python file.
|
| 10 |
It invokes the iterate_ast function.
|
| 11 |
"""
|
|
|
|
| 21 |
else:
|
| 22 |
usage = "undefined"
|
| 23 |
|
| 24 |
+
# Add metadata-type "usage" to all documents
|
| 25 |
for doc in documents:
|
| 26 |
doc.metadata["source"] = file_path
|
| 27 |
doc.metadata["usage"] = usage # Add the determined usage metadata
|
|
|
|
| 47 |
all_imports = all(isinstance(node, (ast.Import, ast.ImportFrom)) for node in first_level_nodes)
|
| 48 |
if all_imports:
|
| 49 |
documents.extend(
|
| 50 |
+
_chunk_import_only_python_code(python_code, file_path))
|
| 51 |
|
| 52 |
# Iterate over first-level nodes
|
| 53 |
for first_level_node in ast.iter_child_nodes(tree):
|