Spaces:
Running
on
T4
Running
on
T4
Update auditqa/doc_process.py
Browse files- auditqa/doc_process.py +2 -0
auditqa/doc_process.py
CHANGED
|
@@ -37,9 +37,11 @@ def process_pdf():
|
|
| 37 |
all_documents = {}
|
| 38 |
categories = list(files.keys())
|
| 39 |
for category in categories:
|
|
|
|
| 40 |
all_documents[category] = []
|
| 41 |
subtypes = list(files[category].keys())
|
| 42 |
for subtype in subtypes:
|
|
|
|
| 43 |
for file in files[category][subtype]:
|
| 44 |
doc_processed = text_splitter.split_documents(docs[file])
|
| 45 |
for doc in doc_processed:
|
|
|
|
| 37 |
all_documents = {}
|
| 38 |
categories = list(files.keys())
|
| 39 |
for category in categories:
|
| 40 |
+
print(category)
|
| 41 |
all_documents[category] = []
|
| 42 |
subtypes = list(files[category].keys())
|
| 43 |
for subtype in subtypes:
|
| 44 |
+
print(subtype)
|
| 45 |
for file in files[category][subtype]:
|
| 46 |
doc_processed = text_splitter.split_documents(docs[file])
|
| 47 |
for doc in doc_processed:
|