Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Update chunking.py
Browse files- chunking.py +13 -8
    	
        chunking.py
    CHANGED
    
    | @@ -33,7 +33,7 @@ def chunk_text_and_add_metadata(texts, references, chunk_size, chunk_overlap): | |
| 33 | 
             
                            page_content=chunk,
         | 
| 34 | 
             
                            metadata={
         | 
| 35 | 
             
                                "source": reference,
         | 
| 36 | 
            -
                                " | 
| 37 | 
             
                            }
         | 
| 38 | 
             
                        ) 
         | 
| 39 | 
             
                        for chunk in text_splitter.split_text(text)
         | 
| @@ -58,16 +58,21 @@ def generate_code_chunks_with_metadata(code_file_content, code_file_path): | |
| 58 | 
             
                _iterate_ast(code_file_content, documents, code_file_path)
         | 
| 59 | 
             
                # Determine usage based on the file_path
         | 
| 60 | 
             
                if code_file_path.startswith("kadi_apy"):
         | 
| 61 | 
            -
                     | 
| 62 | 
            -
             | 
| 63 | 
            -
             | 
| 64 | 
            -
             | 
| 65 | 
            -
             | 
| 66 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 67 | 
             
                # Add metadata-type "usage" to all documents
         | 
| 68 | 
             
                for doc in documents:
         | 
| 69 | 
             
                    doc.metadata["source"] = code_file_path
         | 
| 70 | 
            -
                    doc.metadata[" | 
| 71 | 
             
                    doc.metadata["usage"] = usage  # Add the determined usage metadata
         | 
| 72 | 
             
                    #print(doc)
         | 
| 73 | 
             
                return documents
         | 
|  | |
| 33 | 
             
                            page_content=chunk,
         | 
| 34 | 
             
                            metadata={
         | 
| 35 | 
             
                                "source": reference,
         | 
| 36 | 
            +
                                "directory": "doc/"
         | 
| 37 | 
             
                            }
         | 
| 38 | 
             
                        ) 
         | 
| 39 | 
             
                        for chunk in text_splitter.split_text(text)
         | 
|  | |
| 58 | 
             
                _iterate_ast(code_file_content, documents, code_file_path)
         | 
| 59 | 
             
                # Determine usage based on the file_path
         | 
| 60 | 
             
                if code_file_path.startswith("kadi_apy"):
         | 
| 61 | 
            +
                    directory = "kadi_apy/"
         | 
| 62 | 
            +
                    if code_file_path.startswith("kadi_apy/lib/"):
         | 
| 63 | 
            +
                        usage = "kadi_apy/lib/"
         | 
| 64 | 
            +
                    elif code_file_path.startswith("kadi_apy/cli/"):
         | 
| 65 | 
            +
                        usage = "kadi_apy/cli/"
         | 
| 66 | 
            +
                    else:
         | 
| 67 | 
            +
                        usage = "kadi_apy/top_level_file.py"
         | 
| 68 | 
            +
                else:
         | 
| 69 | 
            +
                    directory = "undefined"
         | 
| 70 | 
            +
                    usage = "undefined"
         | 
| 71 | 
            +
                    
         | 
| 72 | 
             
                # Add metadata-type "usage" to all documents
         | 
| 73 | 
             
                for doc in documents:
         | 
| 74 | 
             
                    doc.metadata["source"] = code_file_path
         | 
| 75 | 
            +
                    doc.metadata["directory"] = directory
         | 
| 76 | 
             
                    doc.metadata["usage"] = usage  # Add the determined usage metadata
         | 
| 77 | 
             
                    #print(doc)
         | 
| 78 | 
             
                return documents
         |